if_em.c revision 250458
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 250458 2013-05-10 16:16:33Z luigi $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <machine/bus.h>
60#include <machine/resource.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68
69#include <net/if_types.h>
70#include <net/if_vlan_var.h>
71
72#include <netinet/in_systm.h>
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/ip.h>
76#include <netinet/ip6.h>
77#include <netinet/tcp.h>
78#include <netinet/udp.h>
79
80#include <machine/in_cksum.h>
81#include <dev/led/led.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82571.h"
87#include "if_em.h"
88
89/*********************************************************************
90 *  Set this to one to display debug statistics
91 *********************************************************************/
92int	em_display_debug_stats = 0;
93
94/*********************************************************************
95 *  Driver version:
96 *********************************************************************/
97char em_driver_version[] = "7.3.7";
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178						PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180						PCI_ANY_ID, PCI_ANY_ID, 0},
181	/* required last entry */
182	{ 0, 0, 0, 0, 0}
183};
184
185/*********************************************************************
186 *  Table of branding strings for all supported NICs.
187 *********************************************************************/
188
189static char *em_strings[] = {
190	"Intel(R) PRO/1000 Network Connection"
191};
192
193/*********************************************************************
194 *  Function prototypes
195 *********************************************************************/
196static int	em_probe(device_t);
197static int	em_attach(device_t);
198static int	em_detach(device_t);
199static int	em_shutdown(device_t);
200static int	em_suspend(device_t);
201static int	em_resume(device_t);
202#ifdef EM_MULTIQUEUE
203static int	em_mq_start(struct ifnet *, struct mbuf *);
204static int	em_mq_start_locked(struct ifnet *,
205		    struct tx_ring *, struct mbuf *);
206static void	em_qflush(struct ifnet *);
207#else
208static void	em_start(struct ifnet *);
209static void	em_start_locked(struct ifnet *, struct tx_ring *);
210#endif
211static int	em_ioctl(struct ifnet *, u_long, caddr_t);
212static void	em_init(void *);
213static void	em_init_locked(struct adapter *);
214static void	em_stop(void *);
215static void	em_media_status(struct ifnet *, struct ifmediareq *);
216static int	em_media_change(struct ifnet *);
217static void	em_identify_hardware(struct adapter *);
218static int	em_allocate_pci_resources(struct adapter *);
219static int	em_allocate_legacy(struct adapter *);
220static int	em_allocate_msix(struct adapter *);
221static int	em_allocate_queues(struct adapter *);
222static int	em_setup_msix(struct adapter *);
223static void	em_free_pci_resources(struct adapter *);
224static void	em_local_timer(void *);
225static void	em_reset(struct adapter *);
226static int	em_setup_interface(device_t, struct adapter *);
227
228static void	em_setup_transmit_structures(struct adapter *);
229static void	em_initialize_transmit_unit(struct adapter *);
230static int	em_allocate_transmit_buffers(struct tx_ring *);
231static void	em_free_transmit_structures(struct adapter *);
232static void	em_free_transmit_buffers(struct tx_ring *);
233
234static int	em_setup_receive_structures(struct adapter *);
235static int	em_allocate_receive_buffers(struct rx_ring *);
236static void	em_initialize_receive_unit(struct adapter *);
237static void	em_free_receive_structures(struct adapter *);
238static void	em_free_receive_buffers(struct rx_ring *);
239
240static void	em_enable_intr(struct adapter *);
241static void	em_disable_intr(struct adapter *);
242static void	em_update_stats_counters(struct adapter *);
243static void	em_add_hw_stats(struct adapter *adapter);
244static void	em_txeof(struct tx_ring *);
245static bool	em_rxeof(struct rx_ring *, int, int *);
246#ifndef __NO_STRICT_ALIGNMENT
247static int	em_fixup_rx(struct rx_ring *);
248#endif
249static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251		    struct ip *, u32 *, u32 *);
252static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253		    struct tcphdr *, u32 *, u32 *);
254static void	em_set_promisc(struct adapter *);
255static void	em_disable_promisc(struct adapter *);
256static void	em_set_multi(struct adapter *);
257static void	em_update_link_status(struct adapter *);
258static void	em_refresh_mbufs(struct rx_ring *, int);
259static void	em_register_vlan(void *, struct ifnet *, u16);
260static void	em_unregister_vlan(void *, struct ifnet *, u16);
261static void	em_setup_vlan_hw_support(struct adapter *);
262static int	em_xmit(struct tx_ring *, struct mbuf **);
263static int	em_dma_malloc(struct adapter *, bus_size_t,
264		    struct em_dma_alloc *, int);
265static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
266static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267static void	em_print_nvm_info(struct adapter *);
268static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269static void	em_print_debug_info(struct adapter *);
270static int 	em_is_valid_ether_addr(u8 *);
271static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272static void	em_add_int_delay_sysctl(struct adapter *, const char *,
273		    const char *, struct em_int_delay_info *, int, int);
274/* Management and WOL Support */
275static void	em_init_manageability(struct adapter *);
276static void	em_release_manageability(struct adapter *);
277static void     em_get_hw_control(struct adapter *);
278static void     em_release_hw_control(struct adapter *);
279static void	em_get_wakeup(device_t);
280static void     em_enable_wakeup(device_t);
281static int	em_enable_phy_wakeup(struct adapter *);
282static void	em_led_func(void *, int);
283static void	em_disable_aspm(struct adapter *);
284
285static int	em_irq_fast(void *);
286
287/* MSIX handlers */
288static void	em_msix_tx(void *);
289static void	em_msix_rx(void *);
290static void	em_msix_link(void *);
291static void	em_handle_tx(void *context, int pending);
292static void	em_handle_rx(void *context, int pending);
293static void	em_handle_link(void *context, int pending);
294
295static void	em_set_sysctl_value(struct adapter *, const char *,
296		    const char *, int *, int);
297static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299
300static __inline void em_rx_discard(struct rx_ring *, int);
301
302#ifdef DEVICE_POLLING
303static poll_handler_t em_poll;
304#endif /* POLLING */
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	DEVMETHOD_END
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338#define MAX_INTS_PER_SEC	8000
339#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
340
341/* Allow common code without TSO */
342#ifndef CSUM_TSO
343#define CSUM_TSO	0
344#endif
345
346static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
347
348static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
349static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
350TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
351TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
352SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
353    0, "Default transmit interrupt delay in usecs");
354SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
355    0, "Default receive interrupt delay in usecs");
356
357static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
358static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
359TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
360TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
361SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
362    &em_tx_abs_int_delay_dflt, 0,
363    "Default transmit interrupt delay limit in usecs");
364SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
365    &em_rx_abs_int_delay_dflt, 0,
366    "Default receive interrupt delay limit in usecs");
367
368static int em_rxd = EM_DEFAULT_RXD;
369static int em_txd = EM_DEFAULT_TXD;
370TUNABLE_INT("hw.em.rxd", &em_rxd);
371TUNABLE_INT("hw.em.txd", &em_txd);
372SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
373    "Number of receive descriptors per queue");
374SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
375    "Number of transmit descriptors per queue");
376
377static int em_smart_pwr_down = FALSE;
378TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
379SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
380    0, "Set to true to leave smart power down enabled on newer adapters");
381
382/* Controls whether promiscuous also shows bad packets */
383static int em_debug_sbp = FALSE;
384TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
385SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
386    "Show bad packets in promiscuous mode");
387
388static int em_enable_msix = TRUE;
389TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
390SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
391    "Enable MSI-X interrupts");
392
393/* How many packets rxeof tries to clean at a time */
394static int em_rx_process_limit = 100;
395TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
396SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
397    &em_rx_process_limit, 0,
398    "Maximum number of received packets to process "
399    "at a time, -1 means unlimited");
400
401/* Energy efficient ethernet - default to OFF */
402static int eee_setting = 1;
403TUNABLE_INT("hw.em.eee_setting", &eee_setting);
404SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
405    "Enable Energy Efficient Ethernet");
406
407/* Global used in WOL setup with multiport cards */
408static int global_quad_port_a = 0;
409
410#ifdef DEV_NETMAP	/* see ixgbe.c for details */
411#include <dev/netmap/if_em_netmap.h>
412#endif /* DEV_NETMAP */
413
414/*********************************************************************
415 *  Device identification routine
416 *
417 *  em_probe determines if the driver should be loaded on
418 *  adapter based on PCI vendor/device id of the adapter.
419 *
420 *  return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424em_probe(device_t dev)
425{
426	char		adapter_name[60];
427	u16		pci_vendor_id = 0;
428	u16		pci_device_id = 0;
429	u16		pci_subvendor_id = 0;
430	u16		pci_subdevice_id = 0;
431	em_vendor_info_t *ent;
432
433	INIT_DEBUGOUT("em_probe: begin");
434
435	pci_vendor_id = pci_get_vendor(dev);
436	if (pci_vendor_id != EM_VENDOR_ID)
437		return (ENXIO);
438
439	pci_device_id = pci_get_device(dev);
440	pci_subvendor_id = pci_get_subvendor(dev);
441	pci_subdevice_id = pci_get_subdevice(dev);
442
443	ent = em_vendor_info_array;
444	while (ent->vendor_id != 0) {
445		if ((pci_vendor_id == ent->vendor_id) &&
446		    (pci_device_id == ent->device_id) &&
447
448		    ((pci_subvendor_id == ent->subvendor_id) ||
449		    (ent->subvendor_id == PCI_ANY_ID)) &&
450
451		    ((pci_subdevice_id == ent->subdevice_id) ||
452		    (ent->subdevice_id == PCI_ANY_ID))) {
453			sprintf(adapter_name, "%s %s",
454				em_strings[ent->index],
455				em_driver_version);
456			device_set_desc_copy(dev, adapter_name);
457			return (BUS_PROBE_DEFAULT);
458		}
459		ent++;
460	}
461
462	return (ENXIO);
463}
464
465/*********************************************************************
466 *  Device initialization routine
467 *
468 *  The attach entry point is called when the driver is being loaded.
469 *  This routine identifies the type of hardware, allocates all resources
470 *  and initializes the hardware.
471 *
472 *  return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476em_attach(device_t dev)
477{
478	struct adapter	*adapter;
479	struct e1000_hw	*hw;
480	int		error = 0;
481
482	INIT_DEBUGOUT("em_attach: begin");
483
484	if (resource_disabled("em", device_get_unit(dev))) {
485		device_printf(dev, "Disabled by device hint\n");
486		return (ENXIO);
487	}
488
489	adapter = device_get_softc(dev);
490	adapter->dev = adapter->osdep.dev = dev;
491	hw = &adapter->hw;
492	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
493
494	/* SYSCTL stuff */
495	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498	    em_sysctl_nvm_info, "I", "NVM Information");
499
500	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503	    em_sysctl_debug_info, "I", "Debug Information");
504
505	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508	    em_set_flowcntl, "I", "Flow Control");
509
510	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
511
512	/* Determine hardware and mac info */
513	em_identify_hardware(adapter);
514
515	/* Setup PCI resources */
516	if (em_allocate_pci_resources(adapter)) {
517		device_printf(dev, "Allocation of PCI resources failed\n");
518		error = ENXIO;
519		goto err_pci;
520	}
521
522	/*
523	** For ICH8 and family we need to
524	** map the flash memory, and this
525	** must happen after the MAC is
526	** identified
527	*/
528	if ((hw->mac.type == e1000_ich8lan) ||
529	    (hw->mac.type == e1000_ich9lan) ||
530	    (hw->mac.type == e1000_ich10lan) ||
531	    (hw->mac.type == e1000_pchlan) ||
532	    (hw->mac.type == e1000_pch2lan) ||
533	    (hw->mac.type == e1000_pch_lpt)) {
534		int rid = EM_BAR_TYPE_FLASH;
535		adapter->flash = bus_alloc_resource_any(dev,
536		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
537		if (adapter->flash == NULL) {
538			device_printf(dev, "Mapping of Flash failed\n");
539			error = ENXIO;
540			goto err_pci;
541		}
542		/* This is used in the shared code */
543		hw->flash_address = (u8 *)adapter->flash;
544		adapter->osdep.flash_bus_space_tag =
545		    rman_get_bustag(adapter->flash);
546		adapter->osdep.flash_bus_space_handle =
547		    rman_get_bushandle(adapter->flash);
548	}
549
550	/* Do Shared Code initialization */
551	if (e1000_setup_init_funcs(hw, TRUE)) {
552		device_printf(dev, "Setup of Shared code failed\n");
553		error = ENXIO;
554		goto err_pci;
555	}
556
557	e1000_get_bus_info(hw);
558
559	/* Set up some sysctls for the tunable interrupt delays */
560	em_add_int_delay_sysctl(adapter, "rx_int_delay",
561	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
562	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
563	em_add_int_delay_sysctl(adapter, "tx_int_delay",
564	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
565	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
566	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
567	    "receive interrupt delay limit in usecs",
568	    &adapter->rx_abs_int_delay,
569	    E1000_REGISTER(hw, E1000_RADV),
570	    em_rx_abs_int_delay_dflt);
571	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
572	    "transmit interrupt delay limit in usecs",
573	    &adapter->tx_abs_int_delay,
574	    E1000_REGISTER(hw, E1000_TADV),
575	    em_tx_abs_int_delay_dflt);
576	em_add_int_delay_sysctl(adapter, "itr",
577	    "interrupt delay limit in usecs/4",
578	    &adapter->tx_itr,
579	    E1000_REGISTER(hw, E1000_ITR),
580	    DEFAULT_ITR);
581
582	/* Sysctl for limiting the amount of work done in the taskqueue */
583	em_set_sysctl_value(adapter, "rx_processing_limit",
584	    "max number of rx packets to process", &adapter->rx_process_limit,
585	    em_rx_process_limit);
586
587	/*
588	 * Validate number of transmit and receive descriptors. It
589	 * must not exceed hardware maximum, and must be multiple
590	 * of E1000_DBA_ALIGN.
591	 */
592	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
593	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
594		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
595		    EM_DEFAULT_TXD, em_txd);
596		adapter->num_tx_desc = EM_DEFAULT_TXD;
597	} else
598		adapter->num_tx_desc = em_txd;
599
600	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
601	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
602		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
603		    EM_DEFAULT_RXD, em_rxd);
604		adapter->num_rx_desc = EM_DEFAULT_RXD;
605	} else
606		adapter->num_rx_desc = em_rxd;
607
608	hw->mac.autoneg = DO_AUTO_NEG;
609	hw->phy.autoneg_wait_to_complete = FALSE;
610	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
611
612	/* Copper options */
613	if (hw->phy.media_type == e1000_media_type_copper) {
614		hw->phy.mdix = AUTO_ALL_MODES;
615		hw->phy.disable_polarity_correction = FALSE;
616		hw->phy.ms_type = EM_MASTER_SLAVE;
617	}
618
619	/*
620	 * Set the frame limits assuming
621	 * standard ethernet sized frames.
622	 */
623	adapter->hw.mac.max_frame_size =
624	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
625
626	/*
627	 * This controls when hardware reports transmit completion
628	 * status.
629	 */
630	hw->mac.report_tx_early = 1;
631
632	/*
633	** Get queue/ring memory
634	*/
635	if (em_allocate_queues(adapter)) {
636		error = ENOMEM;
637		goto err_pci;
638	}
639
640	/* Allocate multicast array memory. */
641	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
642	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
643	if (adapter->mta == NULL) {
644		device_printf(dev, "Can not allocate multicast setup array\n");
645		error = ENOMEM;
646		goto err_late;
647	}
648
649	/* Check SOL/IDER usage */
650	if (e1000_check_reset_block(hw))
651		device_printf(dev, "PHY reset is blocked"
652		    " due to SOL/IDER session.\n");
653
654	/* Sysctl for setting Energy Efficient Ethernet */
655	hw->dev_spec.ich8lan.eee_disable = eee_setting;
656	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
657	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
658	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
659	    adapter, 0, em_sysctl_eee, "I",
660	    "Disable Energy Efficient Ethernet");
661
662	/*
663	** Start from a known state, this is
664	** important in reading the nvm and
665	** mac from that.
666	*/
667	e1000_reset_hw(hw);
668
669
670	/* Make sure we have a good EEPROM before we read from it */
671	if (e1000_validate_nvm_checksum(hw) < 0) {
672		/*
673		** Some PCI-E parts fail the first check due to
674		** the link being in sleep state, call it again,
675		** if it fails a second time its a real issue.
676		*/
677		if (e1000_validate_nvm_checksum(hw) < 0) {
678			device_printf(dev,
679			    "The EEPROM Checksum Is Not Valid\n");
680			error = EIO;
681			goto err_late;
682		}
683	}
684
685	/* Copy the permanent MAC address out of the EEPROM */
686	if (e1000_read_mac_addr(hw) < 0) {
687		device_printf(dev, "EEPROM read error while reading MAC"
688		    " address\n");
689		error = EIO;
690		goto err_late;
691	}
692
693	if (!em_is_valid_ether_addr(hw->mac.addr)) {
694		device_printf(dev, "Invalid MAC address\n");
695		error = EIO;
696		goto err_late;
697	}
698
699	/*
700	**  Do interrupt configuration
701	*/
702	if (adapter->msix > 1) /* Do MSIX */
703		error = em_allocate_msix(adapter);
704	else  /* MSI or Legacy */
705		error = em_allocate_legacy(adapter);
706	if (error)
707		goto err_late;
708
709	/*
710	 * Get Wake-on-Lan and Management info for later use
711	 */
712	em_get_wakeup(dev);
713
714	/* Setup OS specific network interface */
715	if (em_setup_interface(dev, adapter) != 0)
716		goto err_late;
717
718	em_reset(adapter);
719
720	/* Initialize statistics */
721	em_update_stats_counters(adapter);
722
723	hw->mac.get_link_status = 1;
724	em_update_link_status(adapter);
725
726	/* Register for VLAN events */
727	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
728	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
729	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
730	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731
732	em_add_hw_stats(adapter);
733
734	/* Non-AMT based hardware can now take control from firmware */
735	if (adapter->has_manage && !adapter->has_amt)
736		em_get_hw_control(adapter);
737
738	/* Tell the stack that the interface is not active */
739	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
740	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
741
742	adapter->led_dev = led_create(em_led_func, adapter,
743	    device_get_nameunit(dev));
744#ifdef DEV_NETMAP
745	em_netmap_attach(adapter);
746#endif /* DEV_NETMAP */
747
748	INIT_DEBUGOUT("em_attach: end");
749
750	return (0);
751
752err_late:
753	em_free_transmit_structures(adapter);
754	em_free_receive_structures(adapter);
755	em_release_hw_control(adapter);
756	if (adapter->ifp != NULL)
757		if_free(adapter->ifp);
758err_pci:
759	em_free_pci_resources(adapter);
760	free(adapter->mta, M_DEVBUF);
761	EM_CORE_LOCK_DESTROY(adapter);
762
763	return (error);
764}
765
766/*********************************************************************
767 *  Device removal routine
768 *
769 *  The detach entry point is called when the driver is being removed.
770 *  This routine stops the adapter and deallocates all the resources
771 *  that were allocated for driver operation.
772 *
773 *  return 0 on success, positive on failure
774 *********************************************************************/
775
776static int
777em_detach(device_t dev)
778{
779	struct adapter	*adapter = device_get_softc(dev);
780	struct ifnet	*ifp = adapter->ifp;
781
782	INIT_DEBUGOUT("em_detach: begin");
783
784	/* Make sure VLANS are not using driver */
785	if (adapter->ifp->if_vlantrunk != NULL) {
786		device_printf(dev,"Vlan in use, detach first\n");
787		return (EBUSY);
788	}
789
790#ifdef DEVICE_POLLING
791	if (ifp->if_capenable & IFCAP_POLLING)
792		ether_poll_deregister(ifp);
793#endif
794
795	if (adapter->led_dev != NULL)
796		led_destroy(adapter->led_dev);
797
798	EM_CORE_LOCK(adapter);
799	adapter->in_detach = 1;
800	em_stop(adapter);
801	EM_CORE_UNLOCK(adapter);
802	EM_CORE_LOCK_DESTROY(adapter);
803
804	e1000_phy_hw_reset(&adapter->hw);
805
806	em_release_manageability(adapter);
807	em_release_hw_control(adapter);
808
809	/* Unregister VLAN events */
810	if (adapter->vlan_attach != NULL)
811		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
812	if (adapter->vlan_detach != NULL)
813		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
814
815	ether_ifdetach(adapter->ifp);
816	callout_drain(&adapter->timer);
817
818#ifdef DEV_NETMAP
819	netmap_detach(ifp);
820#endif /* DEV_NETMAP */
821
822	em_free_pci_resources(adapter);
823	bus_generic_detach(dev);
824	if_free(ifp);
825
826	em_free_transmit_structures(adapter);
827	em_free_receive_structures(adapter);
828
829	em_release_hw_control(adapter);
830	free(adapter->mta, M_DEVBUF);
831
832	return (0);
833}
834
835/*********************************************************************
836 *
837 *  Shutdown entry point
838 *
839 **********************************************************************/
840
841static int
842em_shutdown(device_t dev)
843{
844	return em_suspend(dev);
845}
846
847/*
848 * Suspend/resume device methods.
849 */
850static int
851em_suspend(device_t dev)
852{
853	struct adapter *adapter = device_get_softc(dev);
854
855	EM_CORE_LOCK(adapter);
856
857        em_release_manageability(adapter);
858	em_release_hw_control(adapter);
859	em_enable_wakeup(dev);
860
861	EM_CORE_UNLOCK(adapter);
862
863	return bus_generic_suspend(dev);
864}
865
866static int
867em_resume(device_t dev)
868{
869	struct adapter *adapter = device_get_softc(dev);
870	struct tx_ring	*txr = adapter->tx_rings;
871	struct ifnet *ifp = adapter->ifp;
872
873	EM_CORE_LOCK(adapter);
874	if (adapter->hw.mac.type == e1000_pch2lan)
875		e1000_resume_workarounds_pchlan(&adapter->hw);
876	em_init_locked(adapter);
877	em_init_manageability(adapter);
878
879	if ((ifp->if_flags & IFF_UP) &&
880	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
881		for (int i = 0; i < adapter->num_queues; i++, txr++) {
882			EM_TX_LOCK(txr);
883#ifdef EM_MULTIQUEUE
884			if (!drbr_empty(ifp, txr->br))
885				em_mq_start_locked(ifp, txr, NULL);
886#else
887			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
888				em_start_locked(ifp, txr);
889#endif
890			EM_TX_UNLOCK(txr);
891		}
892	}
893	EM_CORE_UNLOCK(adapter);
894
895	return bus_generic_resume(dev);
896}
897
898
899#ifdef EM_MULTIQUEUE
900/*********************************************************************
901 *  Multiqueue Transmit routines
902 *
903 *  em_mq_start is called by the stack to initiate a transmit.
904 *  however, if busy the driver can queue the request rather
905 *  than do an immediate send. It is this that is an advantage
906 *  in this driver, rather than also having multiple tx queues.
907 **********************************************************************/
908static int
909em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
910{
911	struct adapter  *adapter = txr->adapter;
912        struct mbuf     *next;
913        int             err = 0, enq = 0;
914
915	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
916	    IFF_DRV_RUNNING || adapter->link_active == 0) {
917		if (m != NULL)
918			err = drbr_enqueue(ifp, txr->br, m);
919		return (err);
920	}
921
922	enq = 0;
923	if (m != NULL) {
924		err = drbr_enqueue(ifp, txr->br, m);
925		if (err)
926			return (err);
927	}
928
929	/* Process the queue */
930	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
931		if ((err = em_xmit(txr, &next)) != 0) {
932			if (next == NULL)
933				drbr_advance(ifp, txr->br);
934			else
935				drbr_putback(ifp, txr->br, next);
936			break;
937		}
938		drbr_advance(ifp, txr->br);
939		enq++;
940		ifp->if_obytes += next->m_pkthdr.len;
941		if (next->m_flags & M_MCAST)
942			ifp->if_omcasts++;
943		ETHER_BPF_MTAP(ifp, next);
944		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
945                        break;
946	}
947
948	if (enq > 0) {
949                /* Set the watchdog */
950                txr->queue_status = EM_QUEUE_WORKING;
951		txr->watchdog_time = ticks;
952	}
953
954	if (txr->tx_avail < EM_MAX_SCATTER)
955		em_txeof(txr);
956	if (txr->tx_avail < EM_MAX_SCATTER)
957		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
958	return (err);
959}
960
961/*
962** Multiqueue capable stack interface
963*/
964static int
965em_mq_start(struct ifnet *ifp, struct mbuf *m)
966{
967	struct adapter	*adapter = ifp->if_softc;
968	struct tx_ring	*txr = adapter->tx_rings;
969	int 		error;
970
971	if (EM_TX_TRYLOCK(txr)) {
972		error = em_mq_start_locked(ifp, txr, m);
973		EM_TX_UNLOCK(txr);
974	} else
975		error = drbr_enqueue(ifp, txr->br, m);
976
977	return (error);
978}
979
980/*
981** Flush all ring buffers
982*/
983static void
984em_qflush(struct ifnet *ifp)
985{
986	struct adapter  *adapter = ifp->if_softc;
987	struct tx_ring  *txr = adapter->tx_rings;
988	struct mbuf     *m;
989
990	for (int i = 0; i < adapter->num_queues; i++, txr++) {
991		EM_TX_LOCK(txr);
992		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
993			m_freem(m);
994		EM_TX_UNLOCK(txr);
995	}
996	if_qflush(ifp);
997}
998#else  /* !EM_MULTIQUEUE */
999
1000static void
1001em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1002{
1003	struct adapter	*adapter = ifp->if_softc;
1004	struct mbuf	*m_head;
1005
1006	EM_TX_LOCK_ASSERT(txr);
1007
1008	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1009	    IFF_DRV_RUNNING)
1010		return;
1011
1012	if (!adapter->link_active)
1013		return;
1014
1015	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1016        	/* Call cleanup if number of TX descriptors low */
1017		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1018			em_txeof(txr);
1019		if (txr->tx_avail < EM_MAX_SCATTER) {
1020			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1021			break;
1022		}
1023                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1024		if (m_head == NULL)
1025			break;
1026		/*
1027		 *  Encapsulation can modify our pointer, and or make it
1028		 *  NULL on failure.  In that event, we can't requeue.
1029		 */
1030		if (em_xmit(txr, &m_head)) {
1031			if (m_head == NULL)
1032				break;
1033			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1034			break;
1035		}
1036
1037		/* Send a copy of the frame to the BPF listener */
1038		ETHER_BPF_MTAP(ifp, m_head);
1039
1040		/* Set timeout in case hardware has problems transmitting. */
1041		txr->watchdog_time = ticks;
1042                txr->queue_status = EM_QUEUE_WORKING;
1043	}
1044
1045	return;
1046}
1047
1048static void
1049em_start(struct ifnet *ifp)
1050{
1051	struct adapter	*adapter = ifp->if_softc;
1052	struct tx_ring	*txr = adapter->tx_rings;
1053
1054	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1055		EM_TX_LOCK(txr);
1056		em_start_locked(ifp, txr);
1057		EM_TX_UNLOCK(txr);
1058	}
1059	return;
1060}
1061#endif /* EM_MULTIQUEUE */
1062
1063/*********************************************************************
1064 *  Ioctl entry point
1065 *
1066 *  em_ioctl is called when the user wants to configure the
1067 *  interface.
1068 *
1069 *  return 0 on success, positive on failure
1070 **********************************************************************/
1071
1072static int
1073em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1074{
1075	struct adapter	*adapter = ifp->if_softc;
1076	struct ifreq	*ifr = (struct ifreq *)data;
1077#if defined(INET) || defined(INET6)
1078	struct ifaddr	*ifa = (struct ifaddr *)data;
1079#endif
1080	bool		avoid_reset = FALSE;
1081	int		error = 0;
1082
1083	if (adapter->in_detach)
1084		return (error);
1085
1086	switch (command) {
1087	case SIOCSIFADDR:
1088#ifdef INET
1089		if (ifa->ifa_addr->sa_family == AF_INET)
1090			avoid_reset = TRUE;
1091#endif
1092#ifdef INET6
1093		if (ifa->ifa_addr->sa_family == AF_INET6)
1094			avoid_reset = TRUE;
1095#endif
1096		/*
1097		** Calling init results in link renegotiation,
1098		** so we avoid doing it when possible.
1099		*/
1100		if (avoid_reset) {
1101			ifp->if_flags |= IFF_UP;
1102			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1103				em_init(adapter);
1104#ifdef INET
1105			if (!(ifp->if_flags & IFF_NOARP))
1106				arp_ifinit(ifp, ifa);
1107#endif
1108		} else
1109			error = ether_ioctl(ifp, command, data);
1110		break;
1111	case SIOCSIFMTU:
1112	    {
1113		int max_frame_size;
1114
1115		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1116
1117		EM_CORE_LOCK(adapter);
1118		switch (adapter->hw.mac.type) {
1119		case e1000_82571:
1120		case e1000_82572:
1121		case e1000_ich9lan:
1122		case e1000_ich10lan:
1123		case e1000_pch2lan:
1124		case e1000_pch_lpt:
1125		case e1000_82574:
1126		case e1000_82583:
1127		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1128			max_frame_size = 9234;
1129			break;
1130		case e1000_pchlan:
1131			max_frame_size = 4096;
1132			break;
1133			/* Adapters that do not support jumbo frames */
1134		case e1000_ich8lan:
1135			max_frame_size = ETHER_MAX_LEN;
1136			break;
1137		default:
1138			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1139		}
1140		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1141		    ETHER_CRC_LEN) {
1142			EM_CORE_UNLOCK(adapter);
1143			error = EINVAL;
1144			break;
1145		}
1146
1147		ifp->if_mtu = ifr->ifr_mtu;
1148		adapter->hw.mac.max_frame_size =
1149		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1150		em_init_locked(adapter);
1151		EM_CORE_UNLOCK(adapter);
1152		break;
1153	    }
1154	case SIOCSIFFLAGS:
1155		IOCTL_DEBUGOUT("ioctl rcv'd:\
1156		    SIOCSIFFLAGS (Set Interface Flags)");
1157		EM_CORE_LOCK(adapter);
1158		if (ifp->if_flags & IFF_UP) {
1159			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1160				if ((ifp->if_flags ^ adapter->if_flags) &
1161				    (IFF_PROMISC | IFF_ALLMULTI)) {
1162					em_disable_promisc(adapter);
1163					em_set_promisc(adapter);
1164				}
1165			} else
1166				em_init_locked(adapter);
1167		} else
1168			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1169				em_stop(adapter);
1170		adapter->if_flags = ifp->if_flags;
1171		EM_CORE_UNLOCK(adapter);
1172		break;
1173	case SIOCADDMULTI:
1174	case SIOCDELMULTI:
1175		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1176		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1177			EM_CORE_LOCK(adapter);
1178			em_disable_intr(adapter);
1179			em_set_multi(adapter);
1180#ifdef DEVICE_POLLING
1181			if (!(ifp->if_capenable & IFCAP_POLLING))
1182#endif
1183				em_enable_intr(adapter);
1184			EM_CORE_UNLOCK(adapter);
1185		}
1186		break;
1187	case SIOCSIFMEDIA:
1188		/* Check SOL/IDER usage */
1189		EM_CORE_LOCK(adapter);
1190		if (e1000_check_reset_block(&adapter->hw)) {
1191			EM_CORE_UNLOCK(adapter);
1192			device_printf(adapter->dev, "Media change is"
1193			    " blocked due to SOL/IDER session.\n");
1194			break;
1195		}
1196		EM_CORE_UNLOCK(adapter);
1197		/* falls thru */
1198	case SIOCGIFMEDIA:
1199		IOCTL_DEBUGOUT("ioctl rcv'd: \
1200		    SIOCxIFMEDIA (Get/Set Interface Media)");
1201		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1202		break;
1203	case SIOCSIFCAP:
1204	    {
1205		int mask, reinit;
1206
1207		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1208		reinit = 0;
1209		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1210#ifdef DEVICE_POLLING
1211		if (mask & IFCAP_POLLING) {
1212			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1213				error = ether_poll_register(em_poll, ifp);
1214				if (error)
1215					return (error);
1216				EM_CORE_LOCK(adapter);
1217				em_disable_intr(adapter);
1218				ifp->if_capenable |= IFCAP_POLLING;
1219				EM_CORE_UNLOCK(adapter);
1220			} else {
1221				error = ether_poll_deregister(ifp);
1222				/* Enable interrupt even in error case */
1223				EM_CORE_LOCK(adapter);
1224				em_enable_intr(adapter);
1225				ifp->if_capenable &= ~IFCAP_POLLING;
1226				EM_CORE_UNLOCK(adapter);
1227			}
1228		}
1229#endif
1230		if (mask & IFCAP_HWCSUM) {
1231			ifp->if_capenable ^= IFCAP_HWCSUM;
1232			reinit = 1;
1233		}
1234		if (mask & IFCAP_TSO4) {
1235			ifp->if_capenable ^= IFCAP_TSO4;
1236			reinit = 1;
1237		}
1238		if (mask & IFCAP_VLAN_HWTAGGING) {
1239			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1240			reinit = 1;
1241		}
1242		if (mask & IFCAP_VLAN_HWFILTER) {
1243			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1244			reinit = 1;
1245		}
1246		if (mask & IFCAP_VLAN_HWTSO) {
1247			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1248			reinit = 1;
1249		}
1250		if ((mask & IFCAP_WOL) &&
1251		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1252			if (mask & IFCAP_WOL_MCAST)
1253				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1254			if (mask & IFCAP_WOL_MAGIC)
1255				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1256		}
1257		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1258			em_init(adapter);
1259		VLAN_CAPABILITIES(ifp);
1260		break;
1261	    }
1262
1263	default:
1264		error = ether_ioctl(ifp, command, data);
1265		break;
1266	}
1267
1268	return (error);
1269}
1270
1271
1272/*********************************************************************
1273 *  Init entry point
1274 *
1275 *  This routine is used in two ways. It is used by the stack as
1276 *  init entry point in network interface structure. It is also used
1277 *  by the driver as a hw/sw initialization routine to get to a
1278 *  consistent state.
1279 *
1280 *  return 0 on success, positive on failure
1281 **********************************************************************/
1282
1283static void
1284em_init_locked(struct adapter *adapter)
1285{
1286	struct ifnet	*ifp = adapter->ifp;
1287	device_t	dev = adapter->dev;
1288
1289	INIT_DEBUGOUT("em_init: begin");
1290
1291	EM_CORE_LOCK_ASSERT(adapter);
1292
1293	em_disable_intr(adapter);
1294	callout_stop(&adapter->timer);
1295
1296	/* Get the latest mac address, User can use a LAA */
1297        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1298              ETHER_ADDR_LEN);
1299
1300	/* Put the address into the Receive Address Array */
1301	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1302
1303	/*
1304	 * With the 82571 adapter, RAR[0] may be overwritten
1305	 * when the other port is reset, we make a duplicate
1306	 * in RAR[14] for that eventuality, this assures
1307	 * the interface continues to function.
1308	 */
1309	if (adapter->hw.mac.type == e1000_82571) {
1310		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1311		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1312		    E1000_RAR_ENTRIES - 1);
1313	}
1314
1315	/* Initialize the hardware */
1316	em_reset(adapter);
1317	em_update_link_status(adapter);
1318
1319	/* Setup VLAN support, basic and offload if available */
1320	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1321
1322	/* Set hardware offload abilities */
1323	ifp->if_hwassist = 0;
1324	if (ifp->if_capenable & IFCAP_TXCSUM)
1325		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1326	if (ifp->if_capenable & IFCAP_TSO4)
1327		ifp->if_hwassist |= CSUM_TSO;
1328
1329	/* Configure for OS presence */
1330	em_init_manageability(adapter);
1331
1332	/* Prepare transmit descriptors and buffers */
1333	em_setup_transmit_structures(adapter);
1334	em_initialize_transmit_unit(adapter);
1335
1336	/* Setup Multicast table */
1337	em_set_multi(adapter);
1338
1339	/*
1340	** Figure out the desired mbuf
1341	** pool for doing jumbos
1342	*/
1343	if (adapter->hw.mac.max_frame_size <= 2048)
1344		adapter->rx_mbuf_sz = MCLBYTES;
1345	else if (adapter->hw.mac.max_frame_size <= 4096)
1346		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1347	else
1348		adapter->rx_mbuf_sz = MJUM9BYTES;
1349
1350	/* Prepare receive descriptors and buffers */
1351	if (em_setup_receive_structures(adapter)) {
1352		device_printf(dev, "Could not setup receive structures\n");
1353		em_stop(adapter);
1354		return;
1355	}
1356	em_initialize_receive_unit(adapter);
1357
1358	/* Use real VLAN Filter support? */
1359	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1360		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1361			/* Use real VLAN Filter support */
1362			em_setup_vlan_hw_support(adapter);
1363		else {
1364			u32 ctrl;
1365			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1366			ctrl |= E1000_CTRL_VME;
1367			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1368		}
1369	}
1370
1371	/* Don't lose promiscuous settings */
1372	em_set_promisc(adapter);
1373
1374	/* Set the interface as ACTIVE */
1375	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1376	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1377
1378	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1379	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1380
1381	/* MSI/X configuration for 82574 */
1382	if (adapter->hw.mac.type == e1000_82574) {
1383		int tmp;
1384		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1385		tmp |= E1000_CTRL_EXT_PBA_CLR;
1386		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1387		/* Set the IVAR - interrupt vector routing. */
1388		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1389	}
1390
1391#ifdef DEVICE_POLLING
1392	/*
1393	 * Only enable interrupts if we are not polling, make sure
1394	 * they are off otherwise.
1395	 */
1396	if (ifp->if_capenable & IFCAP_POLLING)
1397		em_disable_intr(adapter);
1398	else
1399#endif /* DEVICE_POLLING */
1400		em_enable_intr(adapter);
1401
1402	/* AMT based hardware can now take control from firmware */
1403	if (adapter->has_manage && adapter->has_amt)
1404		em_get_hw_control(adapter);
1405}
1406
1407static void
1408em_init(void *arg)
1409{
1410	struct adapter *adapter = arg;
1411
1412	EM_CORE_LOCK(adapter);
1413	em_init_locked(adapter);
1414	EM_CORE_UNLOCK(adapter);
1415}
1416
1417
1418#ifdef DEVICE_POLLING
1419/*********************************************************************
1420 *
1421 *  Legacy polling routine: note this only works with single queue
1422 *
1423 *********************************************************************/
1424static int
1425em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1426{
1427	struct adapter *adapter = ifp->if_softc;
1428	struct tx_ring	*txr = adapter->tx_rings;
1429	struct rx_ring	*rxr = adapter->rx_rings;
1430	u32		reg_icr;
1431	int		rx_done;
1432
1433	EM_CORE_LOCK(adapter);
1434	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1435		EM_CORE_UNLOCK(adapter);
1436		return (0);
1437	}
1438
1439	if (cmd == POLL_AND_CHECK_STATUS) {
1440		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1441		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1442			callout_stop(&adapter->timer);
1443			adapter->hw.mac.get_link_status = 1;
1444			em_update_link_status(adapter);
1445			callout_reset(&adapter->timer, hz,
1446			    em_local_timer, adapter);
1447		}
1448	}
1449	EM_CORE_UNLOCK(adapter);
1450
1451	em_rxeof(rxr, count, &rx_done);
1452
1453	EM_TX_LOCK(txr);
1454	em_txeof(txr);
1455#ifdef EM_MULTIQUEUE
1456	if (!drbr_empty(ifp, txr->br))
1457		em_mq_start_locked(ifp, txr, NULL);
1458#else
1459	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1460		em_start_locked(ifp, txr);
1461#endif
1462	EM_TX_UNLOCK(txr);
1463
1464	return (rx_done);
1465}
1466#endif /* DEVICE_POLLING */
1467
1468
1469/*********************************************************************
1470 *
1471 *  Fast Legacy/MSI Combined Interrupt Service routine
1472 *
1473 *********************************************************************/
1474static int
1475em_irq_fast(void *arg)
1476{
1477	struct adapter	*adapter = arg;
1478	struct ifnet	*ifp;
1479	u32		reg_icr;
1480
1481	ifp = adapter->ifp;
1482
1483	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484
1485	/* Hot eject?  */
1486	if (reg_icr == 0xffffffff)
1487		return FILTER_STRAY;
1488
1489	/* Definitely not our interrupt.  */
1490	if (reg_icr == 0x0)
1491		return FILTER_STRAY;
1492
1493	/*
1494	 * Starting with the 82571 chip, bit 31 should be used to
1495	 * determine whether the interrupt belongs to us.
1496	 */
1497	if (adapter->hw.mac.type >= e1000_82571 &&
1498	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499		return FILTER_STRAY;
1500
1501	em_disable_intr(adapter);
1502	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1503
1504	/* Link status change */
1505	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1506		adapter->hw.mac.get_link_status = 1;
1507		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1508	}
1509
1510	if (reg_icr & E1000_ICR_RXO)
1511		adapter->rx_overruns++;
1512	return FILTER_HANDLED;
1513}
1514
1515/* Combined RX/TX handler, used by Legacy and MSI */
1516static void
1517em_handle_que(void *context, int pending)
1518{
1519	struct adapter	*adapter = context;
1520	struct ifnet	*ifp = adapter->ifp;
1521	struct tx_ring	*txr = adapter->tx_rings;
1522	struct rx_ring	*rxr = adapter->rx_rings;
1523
1524
1525	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1526		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1527		EM_TX_LOCK(txr);
1528		em_txeof(txr);
1529#ifdef EM_MULTIQUEUE
1530		if (!drbr_empty(ifp, txr->br))
1531			em_mq_start_locked(ifp, txr, NULL);
1532#else
1533		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534			em_start_locked(ifp, txr);
1535#endif
1536		EM_TX_UNLOCK(txr);
1537		if (more) {
1538			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539			return;
1540		}
1541	}
1542
1543	em_enable_intr(adapter);
1544	return;
1545}
1546
1547
1548/*********************************************************************
1549 *
1550 *  MSIX Interrupt Service Routines
1551 *
1552 **********************************************************************/
1553static void
1554em_msix_tx(void *arg)
1555{
1556	struct tx_ring *txr = arg;
1557	struct adapter *adapter = txr->adapter;
1558	struct ifnet	*ifp = adapter->ifp;
1559
1560	++txr->tx_irq;
1561	EM_TX_LOCK(txr);
1562	em_txeof(txr);
1563#ifdef EM_MULTIQUEUE
1564	if (!drbr_empty(ifp, txr->br))
1565		em_mq_start_locked(ifp, txr, NULL);
1566#else
1567	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1568		em_start_locked(ifp, txr);
1569#endif
1570	/* Reenable this interrupt */
1571	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1572	EM_TX_UNLOCK(txr);
1573	return;
1574}
1575
1576/*********************************************************************
1577 *
1578 *  MSIX RX Interrupt Service routine
1579 *
1580 **********************************************************************/
1581
1582static void
1583em_msix_rx(void *arg)
1584{
1585	struct rx_ring	*rxr = arg;
1586	struct adapter	*adapter = rxr->adapter;
1587	bool		more;
1588
1589	++rxr->rx_irq;
1590	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1591		return;
1592	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1593	if (more)
1594		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1595	else
1596		/* Reenable this interrupt */
1597		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1598	return;
1599}
1600
1601/*********************************************************************
1602 *
1603 *  MSIX Link Fast Interrupt Service routine
1604 *
1605 **********************************************************************/
1606static void
1607em_msix_link(void *arg)
1608{
1609	struct adapter	*adapter = arg;
1610	u32		reg_icr;
1611
1612	++adapter->link_irq;
1613	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1614
1615	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616		adapter->hw.mac.get_link_status = 1;
1617		em_handle_link(adapter, 0);
1618	} else
1619		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620		    EM_MSIX_LINK | E1000_IMS_LSC);
1621	return;
1622}
1623
1624static void
1625em_handle_rx(void *context, int pending)
1626{
1627	struct rx_ring	*rxr = context;
1628	struct adapter	*adapter = rxr->adapter;
1629        bool            more;
1630
1631	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632	if (more)
1633		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634	else
1635		/* Reenable this interrupt */
1636		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637}
1638
1639static void
1640em_handle_tx(void *context, int pending)
1641{
1642	struct tx_ring	*txr = context;
1643	struct adapter	*adapter = txr->adapter;
1644	struct ifnet	*ifp = adapter->ifp;
1645
1646	EM_TX_LOCK(txr);
1647	em_txeof(txr);
1648#ifdef EM_MULTIQUEUE
1649	if (!drbr_empty(ifp, txr->br))
1650		em_mq_start_locked(ifp, txr, NULL);
1651#else
1652	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1653		em_start_locked(ifp, txr);
1654#endif
1655	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656	EM_TX_UNLOCK(txr);
1657}
1658
1659static void
1660em_handle_link(void *context, int pending)
1661{
1662	struct adapter	*adapter = context;
1663	struct tx_ring	*txr = adapter->tx_rings;
1664	struct ifnet *ifp = adapter->ifp;
1665
1666	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1667		return;
1668
1669	EM_CORE_LOCK(adapter);
1670	callout_stop(&adapter->timer);
1671	em_update_link_status(adapter);
1672	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674	    EM_MSIX_LINK | E1000_IMS_LSC);
1675	if (adapter->link_active) {
1676		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677			EM_TX_LOCK(txr);
1678#ifdef EM_MULTIQUEUE
1679			if (!drbr_empty(ifp, txr->br))
1680				em_mq_start_locked(ifp, txr, NULL);
1681#else
1682			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1683				em_start_locked(ifp, txr);
1684#endif
1685			EM_TX_UNLOCK(txr);
1686		}
1687	}
1688	EM_CORE_UNLOCK(adapter);
1689}
1690
1691
1692/*********************************************************************
1693 *
1694 *  Media Ioctl callback
1695 *
1696 *  This routine is called whenever the user queries the status of
1697 *  the interface using ifconfig.
1698 *
1699 **********************************************************************/
1700static void
1701em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1702{
1703	struct adapter *adapter = ifp->if_softc;
1704	u_char fiber_type = IFM_1000_SX;
1705
1706	INIT_DEBUGOUT("em_media_status: begin");
1707
1708	EM_CORE_LOCK(adapter);
1709	em_update_link_status(adapter);
1710
1711	ifmr->ifm_status = IFM_AVALID;
1712	ifmr->ifm_active = IFM_ETHER;
1713
1714	if (!adapter->link_active) {
1715		EM_CORE_UNLOCK(adapter);
1716		return;
1717	}
1718
1719	ifmr->ifm_status |= IFM_ACTIVE;
1720
1721	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723		ifmr->ifm_active |= fiber_type | IFM_FDX;
1724	} else {
1725		switch (adapter->link_speed) {
1726		case 10:
1727			ifmr->ifm_active |= IFM_10_T;
1728			break;
1729		case 100:
1730			ifmr->ifm_active |= IFM_100_TX;
1731			break;
1732		case 1000:
1733			ifmr->ifm_active |= IFM_1000_T;
1734			break;
1735		}
1736		if (adapter->link_duplex == FULL_DUPLEX)
1737			ifmr->ifm_active |= IFM_FDX;
1738		else
1739			ifmr->ifm_active |= IFM_HDX;
1740	}
1741	EM_CORE_UNLOCK(adapter);
1742}
1743
1744/*********************************************************************
1745 *
1746 *  Media Ioctl callback
1747 *
1748 *  This routine is called when the user changes speed/duplex using
1749 *  media/mediopt option with ifconfig.
1750 *
1751 **********************************************************************/
1752static int
1753em_media_change(struct ifnet *ifp)
1754{
1755	struct adapter *adapter = ifp->if_softc;
1756	struct ifmedia  *ifm = &adapter->media;
1757
1758	INIT_DEBUGOUT("em_media_change: begin");
1759
1760	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761		return (EINVAL);
1762
1763	EM_CORE_LOCK(adapter);
1764	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765	case IFM_AUTO:
1766		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768		break;
1769	case IFM_1000_LX:
1770	case IFM_1000_SX:
1771	case IFM_1000_T:
1772		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774		break;
1775	case IFM_100_TX:
1776		adapter->hw.mac.autoneg = FALSE;
1777		adapter->hw.phy.autoneg_advertised = 0;
1778		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780		else
1781			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782		break;
1783	case IFM_10_T:
1784		adapter->hw.mac.autoneg = FALSE;
1785		adapter->hw.phy.autoneg_advertised = 0;
1786		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788		else
1789			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790		break;
1791	default:
1792		device_printf(adapter->dev, "Unsupported media type\n");
1793	}
1794
1795	em_init_locked(adapter);
1796	EM_CORE_UNLOCK(adapter);
1797
1798	return (0);
1799}
1800
1801/*********************************************************************
1802 *
1803 *  This routine maps the mbufs to tx descriptors.
1804 *
1805 *  return 0 on success, positive on failure
1806 **********************************************************************/
1807
1808static int
1809em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810{
1811	struct adapter		*adapter = txr->adapter;
1812	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1813	bus_dmamap_t		map;
1814	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1815	struct e1000_tx_desc	*ctxd = NULL;
1816	struct mbuf		*m_head;
1817	struct ether_header	*eh;
1818	struct ip		*ip = NULL;
1819	struct tcphdr		*tp = NULL;
1820	u32			txd_upper, txd_lower, txd_used, txd_saved;
1821	int			ip_off, poff;
1822	int			nsegs, i, j, first, last = 0;
1823	int			error, do_tso, tso_desc = 0, remap = 1;
1824
1825retry:
1826	m_head = *m_headp;
1827	txd_upper = txd_lower = txd_used = txd_saved = 0;
1828	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1829	ip_off = poff = 0;
1830
1831	/*
1832	 * Intel recommends entire IP/TCP header length reside in a single
1833	 * buffer. If multiple descriptors are used to describe the IP and
1834	 * TCP header, each descriptor should describe one or more
1835	 * complete headers; descriptors referencing only parts of headers
1836	 * are not supported. If all layer headers are not coalesced into
1837	 * a single buffer, each buffer should not cross a 4KB boundary,
1838	 * or be larger than the maximum read request size.
1839	 * Controller also requires modifing IP/TCP header to make TSO work
1840	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1841	 * IP/TCP header into a single buffer to meet the requirement of
1842	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1843	 * which also has similiar restrictions.
1844	 */
1845	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1846		if (do_tso || (m_head->m_next != NULL &&
1847		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1848			if (M_WRITABLE(*m_headp) == 0) {
1849				m_head = m_dup(*m_headp, M_NOWAIT);
1850				m_freem(*m_headp);
1851				if (m_head == NULL) {
1852					*m_headp = NULL;
1853					return (ENOBUFS);
1854				}
1855				*m_headp = m_head;
1856			}
1857		}
1858		/*
1859		 * XXX
1860		 * Assume IPv4, we don't have TSO/checksum offload support
1861		 * for IPv6 yet.
1862		 */
1863		ip_off = sizeof(struct ether_header);
1864		m_head = m_pullup(m_head, ip_off);
1865		if (m_head == NULL) {
1866			*m_headp = NULL;
1867			return (ENOBUFS);
1868		}
1869		eh = mtod(m_head, struct ether_header *);
1870		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1871			ip_off = sizeof(struct ether_vlan_header);
1872			m_head = m_pullup(m_head, ip_off);
1873			if (m_head == NULL) {
1874				*m_headp = NULL;
1875				return (ENOBUFS);
1876			}
1877		}
1878		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1879		if (m_head == NULL) {
1880			*m_headp = NULL;
1881			return (ENOBUFS);
1882		}
1883		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1884		poff = ip_off + (ip->ip_hl << 2);
1885		if (do_tso) {
1886			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887			if (m_head == NULL) {
1888				*m_headp = NULL;
1889				return (ENOBUFS);
1890			}
1891			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892			/*
1893			 * TSO workaround:
1894			 *   pull 4 more bytes of data into it.
1895			 */
1896			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1897			if (m_head == NULL) {
1898				*m_headp = NULL;
1899				return (ENOBUFS);
1900			}
1901			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1902			ip->ip_len = 0;
1903			ip->ip_sum = 0;
1904			/*
1905			 * The pseudo TCP checksum does not include TCP payload
1906			 * length so driver should recompute the checksum here
1907			 * what hardware expect to see. This is adherence of
1908			 * Microsoft's Large Send specification.
1909			 */
1910			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1912			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1913		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1914			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1915			if (m_head == NULL) {
1916				*m_headp = NULL;
1917				return (ENOBUFS);
1918			}
1919			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1921			if (m_head == NULL) {
1922				*m_headp = NULL;
1923				return (ENOBUFS);
1924			}
1925			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1928			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1929			if (m_head == NULL) {
1930				*m_headp = NULL;
1931				return (ENOBUFS);
1932			}
1933			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1934		}
1935		*m_headp = m_head;
1936	}
1937
1938	/*
1939	 * Map the packet for DMA
1940	 *
1941	 * Capture the first descriptor index,
1942	 * this descriptor will have the index
1943	 * of the EOP which is the only one that
1944	 * now gets a DONE bit writeback.
1945	 */
1946	first = txr->next_avail_desc;
1947	tx_buffer = &txr->tx_buffers[first];
1948	tx_buffer_mapped = tx_buffer;
1949	map = tx_buffer->map;
1950
1951	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953
1954	/*
1955	 * There are two types of errors we can (try) to handle:
1956	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1957	 *   out of segments.  Defragment the mbuf chain and try again.
1958	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959	 *   at this point in time.  Defer sending and try again later.
1960	 * All other errors, in particular EINVAL, are fatal and prevent the
1961	 * mbuf chain from ever going through.  Drop it and report error.
1962	 */
1963	if (error == EFBIG && remap) {
1964		struct mbuf *m;
1965
1966		m = m_defrag(*m_headp, M_NOWAIT);
1967		if (m == NULL) {
1968			adapter->mbuf_alloc_failed++;
1969			m_freem(*m_headp);
1970			*m_headp = NULL;
1971			return (ENOBUFS);
1972		}
1973		*m_headp = m;
1974
1975		/* Try it again, but only once */
1976		remap = 0;
1977		goto retry;
1978	} else if (error == ENOMEM) {
1979		adapter->no_tx_dma_setup++;
1980		return (error);
1981	} else if (error != 0) {
1982		adapter->no_tx_dma_setup++;
1983		m_freem(*m_headp);
1984		*m_headp = NULL;
1985		return (error);
1986	}
1987
1988	/*
1989	 * TSO Hardware workaround, if this packet is not
1990	 * TSO, and is only a single descriptor long, and
1991	 * it follows a TSO burst, then we need to add a
1992	 * sentinel descriptor to prevent premature writeback.
1993	 */
1994	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995		if (nsegs == 1)
1996			tso_desc = TRUE;
1997		txr->tx_tso = FALSE;
1998	}
1999
2000        if (nsegs > (txr->tx_avail - 2)) {
2001                txr->no_desc_avail++;
2002		bus_dmamap_unload(txr->txtag, map);
2003		return (ENOBUFS);
2004        }
2005	m_head = *m_headp;
2006
2007	/* Do hardware assists */
2008	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009		em_tso_setup(txr, m_head, ip_off, ip, tp,
2010		    &txd_upper, &txd_lower);
2011		/* we need to make a final sentinel transmit desc */
2012		tso_desc = TRUE;
2013	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014		em_transmit_checksum_setup(txr, m_head,
2015		    ip_off, ip, &txd_upper, &txd_lower);
2016
2017	if (m_head->m_flags & M_VLANTAG) {
2018		/* Set the vlan id. */
2019		txd_upper |=
2020		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2021                /* Tell hardware to add tag */
2022                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2023        }
2024
2025	i = txr->next_avail_desc;
2026
2027	/* Set up our transmit descriptors */
2028	for (j = 0; j < nsegs; j++) {
2029		bus_size_t seg_len;
2030		bus_addr_t seg_addr;
2031
2032		tx_buffer = &txr->tx_buffers[i];
2033		ctxd = &txr->tx_base[i];
2034		seg_addr = segs[j].ds_addr;
2035		seg_len  = segs[j].ds_len;
2036		/*
2037		** TSO Workaround:
2038		** If this is the last descriptor, we want to
2039		** split it so we have a small final sentinel
2040		*/
2041		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2042			seg_len -= 4;
2043			ctxd->buffer_addr = htole64(seg_addr);
2044			ctxd->lower.data = htole32(
2045			adapter->txd_cmd | txd_lower | seg_len);
2046			ctxd->upper.data =
2047			    htole32(txd_upper);
2048			if (++i == adapter->num_tx_desc)
2049				i = 0;
2050			/* Now make the sentinel */
2051			++txd_used; /* using an extra txd */
2052			ctxd = &txr->tx_base[i];
2053			tx_buffer = &txr->tx_buffers[i];
2054			ctxd->buffer_addr =
2055			    htole64(seg_addr + seg_len);
2056			ctxd->lower.data = htole32(
2057			adapter->txd_cmd | txd_lower | 4);
2058			ctxd->upper.data =
2059			    htole32(txd_upper);
2060			last = i;
2061			if (++i == adapter->num_tx_desc)
2062				i = 0;
2063		} else {
2064			ctxd->buffer_addr = htole64(seg_addr);
2065			ctxd->lower.data = htole32(
2066			adapter->txd_cmd | txd_lower | seg_len);
2067			ctxd->upper.data =
2068			    htole32(txd_upper);
2069			last = i;
2070			if (++i == adapter->num_tx_desc)
2071				i = 0;
2072		}
2073		tx_buffer->m_head = NULL;
2074		tx_buffer->next_eop = -1;
2075	}
2076
2077	txr->next_avail_desc = i;
2078	txr->tx_avail -= nsegs;
2079	if (tso_desc) /* TSO used an extra for sentinel */
2080		txr->tx_avail -= txd_used;
2081
2082        tx_buffer->m_head = m_head;
2083	/*
2084	** Here we swap the map so the last descriptor,
2085	** which gets the completion interrupt has the
2086	** real map, and the first descriptor gets the
2087	** unused map from this descriptor.
2088	*/
2089	tx_buffer_mapped->map = tx_buffer->map;
2090	tx_buffer->map = map;
2091        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2092
2093        /*
2094         * Last Descriptor of Packet
2095	 * needs End Of Packet (EOP)
2096	 * and Report Status (RS)
2097         */
2098        ctxd->lower.data |=
2099	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2100	/*
2101	 * Keep track in the first buffer which
2102	 * descriptor will be written back
2103	 */
2104	tx_buffer = &txr->tx_buffers[first];
2105	tx_buffer->next_eop = last;
2106	/* Update the watchdog time early and often */
2107	txr->watchdog_time = ticks;
2108
2109	/*
2110	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2111	 * that this frame is available to transmit.
2112	 */
2113	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2114	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2115	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2116
2117	return (0);
2118}
2119
2120static void
2121em_set_promisc(struct adapter *adapter)
2122{
2123	struct ifnet	*ifp = adapter->ifp;
2124	u32		reg_rctl;
2125
2126	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2127
2128	if (ifp->if_flags & IFF_PROMISC) {
2129		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2130		/* Turn this on if you want to see bad packets */
2131		if (em_debug_sbp)
2132			reg_rctl |= E1000_RCTL_SBP;
2133		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2134	} else if (ifp->if_flags & IFF_ALLMULTI) {
2135		reg_rctl |= E1000_RCTL_MPE;
2136		reg_rctl &= ~E1000_RCTL_UPE;
2137		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2138	}
2139}
2140
2141static void
2142em_disable_promisc(struct adapter *adapter)
2143{
2144	u32	reg_rctl;
2145
2146	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2147
2148	reg_rctl &=  (~E1000_RCTL_UPE);
2149	reg_rctl &=  (~E1000_RCTL_MPE);
2150	reg_rctl &=  (~E1000_RCTL_SBP);
2151	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2152}
2153
2154
2155/*********************************************************************
2156 *  Multicast Update
2157 *
2158 *  This routine is called whenever multicast address list is updated.
2159 *
2160 **********************************************************************/
2161
2162static void
2163em_set_multi(struct adapter *adapter)
2164{
2165	struct ifnet	*ifp = adapter->ifp;
2166	struct ifmultiaddr *ifma;
2167	u32 reg_rctl = 0;
2168	u8  *mta; /* Multicast array memory */
2169	int mcnt = 0;
2170
2171	IOCTL_DEBUGOUT("em_set_multi: begin");
2172
2173	mta = adapter->mta;
2174	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2175
2176	if (adapter->hw.mac.type == e1000_82542 &&
2177	    adapter->hw.revision_id == E1000_REVISION_2) {
2178		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2179		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2180			e1000_pci_clear_mwi(&adapter->hw);
2181		reg_rctl |= E1000_RCTL_RST;
2182		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2183		msec_delay(5);
2184	}
2185
2186#if __FreeBSD_version < 800000
2187	IF_ADDR_LOCK(ifp);
2188#else
2189	if_maddr_rlock(ifp);
2190#endif
2191	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2192		if (ifma->ifma_addr->sa_family != AF_LINK)
2193			continue;
2194
2195		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2196			break;
2197
2198		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2199		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2200		mcnt++;
2201	}
2202#if __FreeBSD_version < 800000
2203	IF_ADDR_UNLOCK(ifp);
2204#else
2205	if_maddr_runlock(ifp);
2206#endif
2207	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2208		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2209		reg_rctl |= E1000_RCTL_MPE;
2210		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2211	} else
2212		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2213
2214	if (adapter->hw.mac.type == e1000_82542 &&
2215	    adapter->hw.revision_id == E1000_REVISION_2) {
2216		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2217		reg_rctl &= ~E1000_RCTL_RST;
2218		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2219		msec_delay(5);
2220		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2221			e1000_pci_set_mwi(&adapter->hw);
2222	}
2223}
2224
2225
2226/*********************************************************************
2227 *  Timer routine
2228 *
2229 *  This routine checks for link status and updates statistics.
2230 *
2231 **********************************************************************/
2232
2233static void
2234em_local_timer(void *arg)
2235{
2236	struct adapter	*adapter = arg;
2237	struct ifnet	*ifp = adapter->ifp;
2238	struct tx_ring	*txr = adapter->tx_rings;
2239	struct rx_ring	*rxr = adapter->rx_rings;
2240	u32		trigger;
2241
2242	EM_CORE_LOCK_ASSERT(adapter);
2243
2244	em_update_link_status(adapter);
2245	em_update_stats_counters(adapter);
2246
2247	/* Reset LAA into RAR[0] on 82571 */
2248	if ((adapter->hw.mac.type == e1000_82571) &&
2249	    e1000_get_laa_state_82571(&adapter->hw))
2250		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2251
2252	/* Mask to use in the irq trigger */
2253	if (adapter->msix_mem)
2254		trigger = rxr->ims; /* RX for 82574 */
2255	else
2256		trigger = E1000_ICS_RXDMT0;
2257
2258	/*
2259	** Check on the state of the TX queue(s), this
2260	** can be done without the lock because its RO
2261	** and the HUNG state will be static if set.
2262	*/
2263	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2264		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2265		    (adapter->pause_frames == 0))
2266			goto hung;
2267		/* Schedule a TX tasklet if needed */
2268		if (txr->tx_avail <= EM_MAX_SCATTER)
2269			taskqueue_enqueue(txr->tq, &txr->tx_task);
2270	}
2271
2272	adapter->pause_frames = 0;
2273	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2274#ifndef DEVICE_POLLING
2275	/* Trigger an RX interrupt to guarantee mbuf refresh */
2276	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2277#endif
2278	return;
2279hung:
2280	/* Looks like we're hung */
2281	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2282	device_printf(adapter->dev,
2283	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2284	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2285	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2286	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2287	    "Next TX to Clean = %d\n",
2288	    txr->me, txr->tx_avail, txr->next_to_clean);
2289	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2290	adapter->watchdog_events++;
2291	adapter->pause_frames = 0;
2292	em_init_locked(adapter);
2293}
2294
2295
2296static void
2297em_update_link_status(struct adapter *adapter)
2298{
2299	struct e1000_hw *hw = &adapter->hw;
2300	struct ifnet *ifp = adapter->ifp;
2301	device_t dev = adapter->dev;
2302	struct tx_ring *txr = adapter->tx_rings;
2303	u32 link_check = 0;
2304
2305	/* Get the cached link value or read phy for real */
2306	switch (hw->phy.media_type) {
2307	case e1000_media_type_copper:
2308		if (hw->mac.get_link_status) {
2309			/* Do the work to read phy */
2310			e1000_check_for_link(hw);
2311			link_check = !hw->mac.get_link_status;
2312			if (link_check) /* ESB2 fix */
2313				e1000_cfg_on_link_up(hw);
2314		} else
2315			link_check = TRUE;
2316		break;
2317	case e1000_media_type_fiber:
2318		e1000_check_for_link(hw);
2319		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2320                                 E1000_STATUS_LU);
2321		break;
2322	case e1000_media_type_internal_serdes:
2323		e1000_check_for_link(hw);
2324		link_check = adapter->hw.mac.serdes_has_link;
2325		break;
2326	default:
2327	case e1000_media_type_unknown:
2328		break;
2329	}
2330
2331	/* Now check for a transition */
2332	if (link_check && (adapter->link_active == 0)) {
2333		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2334		    &adapter->link_duplex);
2335		/* Check if we must disable SPEED_MODE bit on PCI-E */
2336		if ((adapter->link_speed != SPEED_1000) &&
2337		    ((hw->mac.type == e1000_82571) ||
2338		    (hw->mac.type == e1000_82572))) {
2339			int tarc0;
2340			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2341			tarc0 &= ~SPEED_MODE_BIT;
2342			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2343		}
2344		if (bootverbose)
2345			device_printf(dev, "Link is up %d Mbps %s\n",
2346			    adapter->link_speed,
2347			    ((adapter->link_duplex == FULL_DUPLEX) ?
2348			    "Full Duplex" : "Half Duplex"));
2349		adapter->link_active = 1;
2350		adapter->smartspeed = 0;
2351		ifp->if_baudrate = adapter->link_speed * 1000000;
2352		if_link_state_change(ifp, LINK_STATE_UP);
2353	} else if (!link_check && (adapter->link_active == 1)) {
2354		ifp->if_baudrate = adapter->link_speed = 0;
2355		adapter->link_duplex = 0;
2356		if (bootverbose)
2357			device_printf(dev, "Link is Down\n");
2358		adapter->link_active = 0;
2359		/* Link down, disable watchdog */
2360		for (int i = 0; i < adapter->num_queues; i++, txr++)
2361			txr->queue_status = EM_QUEUE_IDLE;
2362		if_link_state_change(ifp, LINK_STATE_DOWN);
2363	}
2364}
2365
2366/*********************************************************************
2367 *
2368 *  This routine disables all traffic on the adapter by issuing a
2369 *  global reset on the MAC and deallocates TX/RX buffers.
2370 *
2371 *  This routine should always be called with BOTH the CORE
2372 *  and TX locks.
2373 **********************************************************************/
2374
2375static void
2376em_stop(void *arg)
2377{
2378	struct adapter	*adapter = arg;
2379	struct ifnet	*ifp = adapter->ifp;
2380	struct tx_ring	*txr = adapter->tx_rings;
2381
2382	EM_CORE_LOCK_ASSERT(adapter);
2383
2384	INIT_DEBUGOUT("em_stop: begin");
2385
2386	em_disable_intr(adapter);
2387	callout_stop(&adapter->timer);
2388
2389	/* Tell the stack that the interface is no longer active */
2390	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2391	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2392
2393        /* Unarm watchdog timer. */
2394	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395		EM_TX_LOCK(txr);
2396		txr->queue_status = EM_QUEUE_IDLE;
2397		EM_TX_UNLOCK(txr);
2398	}
2399
2400	e1000_reset_hw(&adapter->hw);
2401	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2402
2403	e1000_led_off(&adapter->hw);
2404	e1000_cleanup_led(&adapter->hw);
2405}
2406
2407
2408/*********************************************************************
2409 *
2410 *  Determine hardware revision.
2411 *
2412 **********************************************************************/
2413static void
2414em_identify_hardware(struct adapter *adapter)
2415{
2416	device_t dev = adapter->dev;
2417
2418	/* Make sure our PCI config space has the necessary stuff set */
2419	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2420	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2421	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2422		device_printf(dev, "Memory Access and/or Bus Master bits "
2423		    "were not set!\n");
2424		adapter->hw.bus.pci_cmd_word |=
2425		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2426		pci_write_config(dev, PCIR_COMMAND,
2427		    adapter->hw.bus.pci_cmd_word, 2);
2428	}
2429
2430	/* Save off the information about this board */
2431	adapter->hw.vendor_id = pci_get_vendor(dev);
2432	adapter->hw.device_id = pci_get_device(dev);
2433	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2434	adapter->hw.subsystem_vendor_id =
2435	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2436	adapter->hw.subsystem_device_id =
2437	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2438
2439	/* Do Shared Code Init and Setup */
2440	if (e1000_set_mac_type(&adapter->hw)) {
2441		device_printf(dev, "Setup init failure\n");
2442		return;
2443	}
2444}
2445
2446static int
2447em_allocate_pci_resources(struct adapter *adapter)
2448{
2449	device_t	dev = adapter->dev;
2450	int		rid;
2451
2452	rid = PCIR_BAR(0);
2453	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2454	    &rid, RF_ACTIVE);
2455	if (adapter->memory == NULL) {
2456		device_printf(dev, "Unable to allocate bus resource: memory\n");
2457		return (ENXIO);
2458	}
2459	adapter->osdep.mem_bus_space_tag =
2460	    rman_get_bustag(adapter->memory);
2461	adapter->osdep.mem_bus_space_handle =
2462	    rman_get_bushandle(adapter->memory);
2463	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2464
2465	/* Default to a single queue */
2466	adapter->num_queues = 1;
2467
2468	/*
2469	 * Setup MSI/X or MSI if PCI Express
2470	 */
2471	adapter->msix = em_setup_msix(adapter);
2472
2473	adapter->hw.back = &adapter->osdep;
2474
2475	return (0);
2476}
2477
2478/*********************************************************************
2479 *
2480 *  Setup the Legacy or MSI Interrupt handler
2481 *
2482 **********************************************************************/
2483int
2484em_allocate_legacy(struct adapter *adapter)
2485{
2486	device_t dev = adapter->dev;
2487	struct tx_ring	*txr = adapter->tx_rings;
2488	int error, rid = 0;
2489
2490	/* Manually turn off all interrupts */
2491	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2492
2493	if (adapter->msix == 1) /* using MSI */
2494		rid = 1;
2495	/* We allocate a single interrupt resource */
2496	adapter->res = bus_alloc_resource_any(dev,
2497	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2498	if (adapter->res == NULL) {
2499		device_printf(dev, "Unable to allocate bus resource: "
2500		    "interrupt\n");
2501		return (ENXIO);
2502	}
2503
2504	/*
2505	 * Allocate a fast interrupt and the associated
2506	 * deferred processing contexts.
2507	 */
2508	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2509	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2510	    taskqueue_thread_enqueue, &adapter->tq);
2511	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2512	    device_get_nameunit(adapter->dev));
2513	/* Use a TX only tasklet for local timer */
2514	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2515	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2516	    taskqueue_thread_enqueue, &txr->tq);
2517	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2518	    device_get_nameunit(adapter->dev));
2519	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2520	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2521	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2522		device_printf(dev, "Failed to register fast interrupt "
2523			    "handler: %d\n", error);
2524		taskqueue_free(adapter->tq);
2525		adapter->tq = NULL;
2526		return (error);
2527	}
2528
2529	return (0);
2530}
2531
2532/*********************************************************************
2533 *
2534 *  Setup the MSIX Interrupt handlers
2535 *   This is not really Multiqueue, rather
2536 *   its just seperate interrupt vectors
2537 *   for TX, RX, and Link.
2538 *
2539 **********************************************************************/
2540int
2541em_allocate_msix(struct adapter *adapter)
2542{
2543	device_t	dev = adapter->dev;
2544	struct		tx_ring *txr = adapter->tx_rings;
2545	struct		rx_ring *rxr = adapter->rx_rings;
2546	int		error, rid, vector = 0;
2547
2548
2549	/* Make sure all interrupts are disabled */
2550	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2551
2552	/* First set up ring resources */
2553	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2554
2555		/* RX ring */
2556		rid = vector + 1;
2557
2558		rxr->res = bus_alloc_resource_any(dev,
2559		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2560		if (rxr->res == NULL) {
2561			device_printf(dev,
2562			    "Unable to allocate bus resource: "
2563			    "RX MSIX Interrupt %d\n", i);
2564			return (ENXIO);
2565		}
2566		if ((error = bus_setup_intr(dev, rxr->res,
2567		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2568		    rxr, &rxr->tag)) != 0) {
2569			device_printf(dev, "Failed to register RX handler");
2570			return (error);
2571		}
2572#if __FreeBSD_version >= 800504
2573		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2574#endif
2575		rxr->msix = vector++; /* NOTE increment vector for TX */
2576		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2577		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2578		    taskqueue_thread_enqueue, &rxr->tq);
2579		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2580		    device_get_nameunit(adapter->dev));
2581		/*
2582		** Set the bit to enable interrupt
2583		** in E1000_IMS -- bits 20 and 21
2584		** are for RX0 and RX1, note this has
2585		** NOTHING to do with the MSIX vector
2586		*/
2587		rxr->ims = 1 << (20 + i);
2588		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2589
2590		/* TX ring */
2591		rid = vector + 1;
2592		txr->res = bus_alloc_resource_any(dev,
2593		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2594		if (txr->res == NULL) {
2595			device_printf(dev,
2596			    "Unable to allocate bus resource: "
2597			    "TX MSIX Interrupt %d\n", i);
2598			return (ENXIO);
2599		}
2600		if ((error = bus_setup_intr(dev, txr->res,
2601		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2602		    txr, &txr->tag)) != 0) {
2603			device_printf(dev, "Failed to register TX handler");
2604			return (error);
2605		}
2606#if __FreeBSD_version >= 800504
2607		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2608#endif
2609		txr->msix = vector++; /* Increment vector for next pass */
2610		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2611		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2612		    taskqueue_thread_enqueue, &txr->tq);
2613		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2614		    device_get_nameunit(adapter->dev));
2615		/*
2616		** Set the bit to enable interrupt
2617		** in E1000_IMS -- bits 22 and 23
2618		** are for TX0 and TX1, note this has
2619		** NOTHING to do with the MSIX vector
2620		*/
2621		txr->ims = 1 << (22 + i);
2622		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2623	}
2624
2625	/* Link interrupt */
2626	++rid;
2627	adapter->res = bus_alloc_resource_any(dev,
2628	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2629	if (!adapter->res) {
2630		device_printf(dev,"Unable to allocate "
2631		    "bus resource: Link interrupt [%d]\n", rid);
2632		return (ENXIO);
2633        }
2634	/* Set the link handler function */
2635	error = bus_setup_intr(dev, adapter->res,
2636	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2637	    em_msix_link, adapter, &adapter->tag);
2638	if (error) {
2639		adapter->res = NULL;
2640		device_printf(dev, "Failed to register LINK handler");
2641		return (error);
2642	}
2643#if __FreeBSD_version >= 800504
2644		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2645#endif
2646	adapter->linkvec = vector;
2647	adapter->ivars |=  (8 | vector) << 16;
2648	adapter->ivars |= 0x80000000;
2649
2650	return (0);
2651}
2652
2653
2654static void
2655em_free_pci_resources(struct adapter *adapter)
2656{
2657	device_t	dev = adapter->dev;
2658	struct tx_ring	*txr;
2659	struct rx_ring	*rxr;
2660	int		rid;
2661
2662
2663	/*
2664	** Release all the queue interrupt resources:
2665	*/
2666	for (int i = 0; i < adapter->num_queues; i++) {
2667		txr = &adapter->tx_rings[i];
2668		rxr = &adapter->rx_rings[i];
2669		/* an early abort? */
2670		if ((txr == NULL) || (rxr == NULL))
2671			break;
2672		rid = txr->msix +1;
2673		if (txr->tag != NULL) {
2674			bus_teardown_intr(dev, txr->res, txr->tag);
2675			txr->tag = NULL;
2676		}
2677		if (txr->res != NULL)
2678			bus_release_resource(dev, SYS_RES_IRQ,
2679			    rid, txr->res);
2680		rid = rxr->msix +1;
2681		if (rxr->tag != NULL) {
2682			bus_teardown_intr(dev, rxr->res, rxr->tag);
2683			rxr->tag = NULL;
2684		}
2685		if (rxr->res != NULL)
2686			bus_release_resource(dev, SYS_RES_IRQ,
2687			    rid, rxr->res);
2688	}
2689
2690        if (adapter->linkvec) /* we are doing MSIX */
2691                rid = adapter->linkvec + 1;
2692        else
2693                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2694
2695	if (adapter->tag != NULL) {
2696		bus_teardown_intr(dev, adapter->res, adapter->tag);
2697		adapter->tag = NULL;
2698	}
2699
2700	if (adapter->res != NULL)
2701		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2702
2703
2704	if (adapter->msix)
2705		pci_release_msi(dev);
2706
2707	if (adapter->msix_mem != NULL)
2708		bus_release_resource(dev, SYS_RES_MEMORY,
2709		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2710
2711	if (adapter->memory != NULL)
2712		bus_release_resource(dev, SYS_RES_MEMORY,
2713		    PCIR_BAR(0), adapter->memory);
2714
2715	if (adapter->flash != NULL)
2716		bus_release_resource(dev, SYS_RES_MEMORY,
2717		    EM_FLASH, adapter->flash);
2718}
2719
2720/*
2721 * Setup MSI or MSI/X
2722 */
2723static int
2724em_setup_msix(struct adapter *adapter)
2725{
2726	device_t dev = adapter->dev;
2727	int val = 0;
2728
2729	/*
2730	** Setup MSI/X for Hartwell: tests have shown
2731	** use of two queues to be unstable, and to
2732	** provide no great gain anyway, so we simply
2733	** seperate the interrupts and use a single queue.
2734	*/
2735	if ((adapter->hw.mac.type == e1000_82574) &&
2736	    (em_enable_msix == TRUE)) {
2737		/* Map the MSIX BAR */
2738		int rid = PCIR_BAR(EM_MSIX_BAR);
2739		adapter->msix_mem = bus_alloc_resource_any(dev,
2740		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2741       		if (!adapter->msix_mem) {
2742			/* May not be enabled */
2743               		device_printf(adapter->dev,
2744			    "Unable to map MSIX table \n");
2745			goto msi;
2746       		}
2747		val = pci_msix_count(dev);
2748		/* We only need 3 vectors */
2749		if (val > 3)
2750			val = 3;
2751		if ((val != 3) && (val != 5)) {
2752			bus_release_resource(dev, SYS_RES_MEMORY,
2753			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2754			adapter->msix_mem = NULL;
2755               		device_printf(adapter->dev,
2756			    "MSIX: incorrect vectors, using MSI\n");
2757			goto msi;
2758		}
2759
2760		if (pci_alloc_msix(dev, &val) == 0) {
2761			device_printf(adapter->dev,
2762			    "Using MSIX interrupts "
2763			    "with %d vectors\n", val);
2764		}
2765
2766		return (val);
2767	}
2768msi:
2769       	val = pci_msi_count(dev);
2770       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2771               	adapter->msix = 1;
2772               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2773		return (val);
2774	}
2775	/* Should only happen due to manual configuration */
2776	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2777	return (0);
2778}
2779
2780
2781/*********************************************************************
2782 *
2783 *  Initialize the hardware to a configuration
2784 *  as specified by the adapter structure.
2785 *
2786 **********************************************************************/
2787static void
2788em_reset(struct adapter *adapter)
2789{
2790	device_t	dev = adapter->dev;
2791	struct ifnet	*ifp = adapter->ifp;
2792	struct e1000_hw	*hw = &adapter->hw;
2793	u16		rx_buffer_size;
2794	u32		pba;
2795
2796	INIT_DEBUGOUT("em_reset: begin");
2797
2798	/* Set up smart power down as default off on newer adapters. */
2799	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2800	    hw->mac.type == e1000_82572)) {
2801		u16 phy_tmp = 0;
2802
2803		/* Speed up time to link by disabling smart power down. */
2804		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2805		phy_tmp &= ~IGP02E1000_PM_SPD;
2806		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2807	}
2808
2809	/*
2810	 * Packet Buffer Allocation (PBA)
2811	 * Writing PBA sets the receive portion of the buffer
2812	 * the remainder is used for the transmit buffer.
2813	 */
2814	switch (hw->mac.type) {
2815	/* Total Packet Buffer on these is 48K */
2816	case e1000_82571:
2817	case e1000_82572:
2818	case e1000_80003es2lan:
2819			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2820		break;
2821	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2822			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2823		break;
2824	case e1000_82574:
2825	case e1000_82583:
2826			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2827		break;
2828	case e1000_ich8lan:
2829		pba = E1000_PBA_8K;
2830		break;
2831	case e1000_ich9lan:
2832	case e1000_ich10lan:
2833		/* Boost Receive side for jumbo frames */
2834		if (adapter->hw.mac.max_frame_size > 4096)
2835			pba = E1000_PBA_14K;
2836		else
2837			pba = E1000_PBA_10K;
2838		break;
2839	case e1000_pchlan:
2840	case e1000_pch2lan:
2841	case e1000_pch_lpt:
2842		pba = E1000_PBA_26K;
2843		break;
2844	default:
2845		if (adapter->hw.mac.max_frame_size > 8192)
2846			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2847		else
2848			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2849	}
2850	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2851
2852	/*
2853	 * These parameters control the automatic generation (Tx) and
2854	 * response (Rx) to Ethernet PAUSE frames.
2855	 * - High water mark should allow for at least two frames to be
2856	 *   received after sending an XOFF.
2857	 * - Low water mark works best when it is very near the high water mark.
2858	 *   This allows the receiver to restart by sending XON when it has
2859	 *   drained a bit. Here we use an arbitary value of 1500 which will
2860	 *   restart after one full frame is pulled from the buffer. There
2861	 *   could be several smaller frames in the buffer and if so they will
2862	 *   not trigger the XON until their total number reduces the buffer
2863	 *   by 1500.
2864	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2865	 */
2866	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2867	hw->fc.high_water = rx_buffer_size -
2868	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2869	hw->fc.low_water = hw->fc.high_water - 1500;
2870
2871	if (adapter->fc) /* locally set flow control value? */
2872		hw->fc.requested_mode = adapter->fc;
2873	else
2874		hw->fc.requested_mode = e1000_fc_full;
2875
2876	if (hw->mac.type == e1000_80003es2lan)
2877		hw->fc.pause_time = 0xFFFF;
2878	else
2879		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2880
2881	hw->fc.send_xon = TRUE;
2882
2883	/* Device specific overrides/settings */
2884	switch (hw->mac.type) {
2885	case e1000_pchlan:
2886		/* Workaround: no TX flow ctrl for PCH */
2887                hw->fc.requested_mode = e1000_fc_rx_pause;
2888		hw->fc.pause_time = 0xFFFF; /* override */
2889		if (ifp->if_mtu > ETHERMTU) {
2890			hw->fc.high_water = 0x3500;
2891			hw->fc.low_water = 0x1500;
2892		} else {
2893			hw->fc.high_water = 0x5000;
2894			hw->fc.low_water = 0x3000;
2895		}
2896		hw->fc.refresh_time = 0x1000;
2897		break;
2898	case e1000_pch2lan:
2899	case e1000_pch_lpt:
2900		hw->fc.high_water = 0x5C20;
2901		hw->fc.low_water = 0x5048;
2902		hw->fc.pause_time = 0x0650;
2903		hw->fc.refresh_time = 0x0400;
2904		/* Jumbos need adjusted PBA */
2905		if (ifp->if_mtu > ETHERMTU)
2906			E1000_WRITE_REG(hw, E1000_PBA, 12);
2907		else
2908			E1000_WRITE_REG(hw, E1000_PBA, 26);
2909		break;
2910        case e1000_ich9lan:
2911        case e1000_ich10lan:
2912		if (ifp->if_mtu > ETHERMTU) {
2913			hw->fc.high_water = 0x2800;
2914			hw->fc.low_water = hw->fc.high_water - 8;
2915			break;
2916		}
2917		/* else fall thru */
2918	default:
2919		if (hw->mac.type == e1000_80003es2lan)
2920			hw->fc.pause_time = 0xFFFF;
2921		break;
2922	}
2923
2924	/* Issue a global reset */
2925	e1000_reset_hw(hw);
2926	E1000_WRITE_REG(hw, E1000_WUC, 0);
2927	em_disable_aspm(adapter);
2928	/* and a re-init */
2929	if (e1000_init_hw(hw) < 0) {
2930		device_printf(dev, "Hardware Initialization Failed\n");
2931		return;
2932	}
2933
2934	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2935	e1000_get_phy_info(hw);
2936	e1000_check_for_link(hw);
2937	return;
2938}
2939
2940/*********************************************************************
2941 *
2942 *  Setup networking device structure and register an interface.
2943 *
2944 **********************************************************************/
2945static int
2946em_setup_interface(device_t dev, struct adapter *adapter)
2947{
2948	struct ifnet   *ifp;
2949
2950	INIT_DEBUGOUT("em_setup_interface: begin");
2951
2952	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2953	if (ifp == NULL) {
2954		device_printf(dev, "can not allocate ifnet structure\n");
2955		return (-1);
2956	}
2957	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2958	ifp->if_init =  em_init;
2959	ifp->if_softc = adapter;
2960	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2961	ifp->if_ioctl = em_ioctl;
2962#ifdef EM_MULTIQUEUE
2963	/* Multiqueue stack interface */
2964	ifp->if_transmit = em_mq_start;
2965	ifp->if_qflush = em_qflush;
2966#else
2967	ifp->if_start = em_start;
2968	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2969	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2970	IFQ_SET_READY(&ifp->if_snd);
2971#endif
2972
2973	ether_ifattach(ifp, adapter->hw.mac.addr);
2974
2975	ifp->if_capabilities = ifp->if_capenable = 0;
2976
2977
2978	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2979	ifp->if_capabilities |= IFCAP_TSO4;
2980	/*
2981	 * Tell the upper layer(s) we
2982	 * support full VLAN capability
2983	 */
2984	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2985	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2986			     |  IFCAP_VLAN_HWTSO
2987			     |  IFCAP_VLAN_MTU;
2988	ifp->if_capenable = ifp->if_capabilities;
2989
2990	/*
2991	** Don't turn this on by default, if vlans are
2992	** created on another pseudo device (eg. lagg)
2993	** then vlan events are not passed thru, breaking
2994	** operation, but with HW FILTER off it works. If
2995	** using vlans directly on the em driver you can
2996	** enable this and get full hardware tag filtering.
2997	*/
2998	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2999
3000#ifdef DEVICE_POLLING
3001	ifp->if_capabilities |= IFCAP_POLLING;
3002#endif
3003
3004	/* Enable only WOL MAGIC by default */
3005	if (adapter->wol) {
3006		ifp->if_capabilities |= IFCAP_WOL;
3007		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3008	}
3009
3010	/*
3011	 * Specify the media types supported by this adapter and register
3012	 * callbacks to update media and link information
3013	 */
3014	ifmedia_init(&adapter->media, IFM_IMASK,
3015	    em_media_change, em_media_status);
3016	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3017	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3018		u_char fiber_type = IFM_1000_SX;	/* default type */
3019
3020		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3021			    0, NULL);
3022		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3023	} else {
3024		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3025		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3026			    0, NULL);
3027		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3028			    0, NULL);
3029		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3030			    0, NULL);
3031		if (adapter->hw.phy.type != e1000_phy_ife) {
3032			ifmedia_add(&adapter->media,
3033				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3034			ifmedia_add(&adapter->media,
3035				IFM_ETHER | IFM_1000_T, 0, NULL);
3036		}
3037	}
3038	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3039	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3040	return (0);
3041}
3042
3043
3044/*
3045 * Manage DMA'able memory.
3046 */
3047static void
3048em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3049{
3050	if (error)
3051		return;
3052	*(bus_addr_t *) arg = segs[0].ds_addr;
3053}
3054
3055static int
3056em_dma_malloc(struct adapter *adapter, bus_size_t size,
3057        struct em_dma_alloc *dma, int mapflags)
3058{
3059	int error;
3060
3061	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3062				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3063				BUS_SPACE_MAXADDR,	/* lowaddr */
3064				BUS_SPACE_MAXADDR,	/* highaddr */
3065				NULL, NULL,		/* filter, filterarg */
3066				size,			/* maxsize */
3067				1,			/* nsegments */
3068				size,			/* maxsegsize */
3069				0,			/* flags */
3070				NULL,			/* lockfunc */
3071				NULL,			/* lockarg */
3072				&dma->dma_tag);
3073	if (error) {
3074		device_printf(adapter->dev,
3075		    "%s: bus_dma_tag_create failed: %d\n",
3076		    __func__, error);
3077		goto fail_0;
3078	}
3079
3080	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3081	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3082	if (error) {
3083		device_printf(adapter->dev,
3084		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3085		    __func__, (uintmax_t)size, error);
3086		goto fail_2;
3087	}
3088
3089	dma->dma_paddr = 0;
3090	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3091	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3092	if (error || dma->dma_paddr == 0) {
3093		device_printf(adapter->dev,
3094		    "%s: bus_dmamap_load failed: %d\n",
3095		    __func__, error);
3096		goto fail_3;
3097	}
3098
3099	return (0);
3100
3101fail_3:
3102	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3103fail_2:
3104	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3105	bus_dma_tag_destroy(dma->dma_tag);
3106fail_0:
3107	dma->dma_map = NULL;
3108	dma->dma_tag = NULL;
3109
3110	return (error);
3111}
3112
3113static void
3114em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3115{
3116	if (dma->dma_tag == NULL)
3117		return;
3118	if (dma->dma_map != NULL) {
3119		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3120		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3121		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3122		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3123		dma->dma_map = NULL;
3124	}
3125	bus_dma_tag_destroy(dma->dma_tag);
3126	dma->dma_tag = NULL;
3127}
3128
3129
3130/*********************************************************************
3131 *
3132 *  Allocate memory for the transmit and receive rings, and then
3133 *  the descriptors associated with each, called only once at attach.
3134 *
3135 **********************************************************************/
3136static int
3137em_allocate_queues(struct adapter *adapter)
3138{
3139	device_t		dev = adapter->dev;
3140	struct tx_ring		*txr = NULL;
3141	struct rx_ring		*rxr = NULL;
3142	int rsize, tsize, error = E1000_SUCCESS;
3143	int txconf = 0, rxconf = 0;
3144
3145
3146	/* Allocate the TX ring struct memory */
3147	if (!(adapter->tx_rings =
3148	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3149	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3150		device_printf(dev, "Unable to allocate TX ring memory\n");
3151		error = ENOMEM;
3152		goto fail;
3153	}
3154
3155	/* Now allocate the RX */
3156	if (!(adapter->rx_rings =
3157	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3158	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3159		device_printf(dev, "Unable to allocate RX ring memory\n");
3160		error = ENOMEM;
3161		goto rx_fail;
3162	}
3163
3164	tsize = roundup2(adapter->num_tx_desc *
3165	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3166	/*
3167	 * Now set up the TX queues, txconf is needed to handle the
3168	 * possibility that things fail midcourse and we need to
3169	 * undo memory gracefully
3170	 */
3171	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3172		/* Set up some basics */
3173		txr = &adapter->tx_rings[i];
3174		txr->adapter = adapter;
3175		txr->me = i;
3176
3177		/* Initialize the TX lock */
3178		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3179		    device_get_nameunit(dev), txr->me);
3180		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3181
3182		if (em_dma_malloc(adapter, tsize,
3183			&txr->txdma, BUS_DMA_NOWAIT)) {
3184			device_printf(dev,
3185			    "Unable to allocate TX Descriptor memory\n");
3186			error = ENOMEM;
3187			goto err_tx_desc;
3188		}
3189		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3190		bzero((void *)txr->tx_base, tsize);
3191
3192        	if (em_allocate_transmit_buffers(txr)) {
3193			device_printf(dev,
3194			    "Critical Failure setting up transmit buffers\n");
3195			error = ENOMEM;
3196			goto err_tx_desc;
3197        	}
3198#if __FreeBSD_version >= 800000
3199		/* Allocate a buf ring */
3200		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3201		    M_WAITOK, &txr->tx_mtx);
3202#endif
3203	}
3204
3205	/*
3206	 * Next the RX queues...
3207	 */
3208	rsize = roundup2(adapter->num_rx_desc *
3209	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3210	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3211		rxr = &adapter->rx_rings[i];
3212		rxr->adapter = adapter;
3213		rxr->me = i;
3214
3215		/* Initialize the RX lock */
3216		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3217		    device_get_nameunit(dev), txr->me);
3218		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3219
3220		if (em_dma_malloc(adapter, rsize,
3221			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3222			device_printf(dev,
3223			    "Unable to allocate RxDescriptor memory\n");
3224			error = ENOMEM;
3225			goto err_rx_desc;
3226		}
3227		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3228		bzero((void *)rxr->rx_base, rsize);
3229
3230        	/* Allocate receive buffers for the ring*/
3231		if (em_allocate_receive_buffers(rxr)) {
3232			device_printf(dev,
3233			    "Critical Failure setting up receive buffers\n");
3234			error = ENOMEM;
3235			goto err_rx_desc;
3236		}
3237	}
3238
3239	return (0);
3240
3241err_rx_desc:
3242	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3243		em_dma_free(adapter, &rxr->rxdma);
3244err_tx_desc:
3245	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3246		em_dma_free(adapter, &txr->txdma);
3247	free(adapter->rx_rings, M_DEVBUF);
3248rx_fail:
3249#if __FreeBSD_version >= 800000
3250	buf_ring_free(txr->br, M_DEVBUF);
3251#endif
3252	free(adapter->tx_rings, M_DEVBUF);
3253fail:
3254	return (error);
3255}
3256
3257
3258/*********************************************************************
3259 *
3260 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3261 *  the information needed to transmit a packet on the wire. This is
3262 *  called only once at attach, setup is done every reset.
3263 *
3264 **********************************************************************/
3265static int
3266em_allocate_transmit_buffers(struct tx_ring *txr)
3267{
3268	struct adapter *adapter = txr->adapter;
3269	device_t dev = adapter->dev;
3270	struct em_buffer *txbuf;
3271	int error, i;
3272
3273	/*
3274	 * Setup DMA descriptor areas.
3275	 */
3276	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3277			       1, 0,			/* alignment, bounds */
3278			       BUS_SPACE_MAXADDR,	/* lowaddr */
3279			       BUS_SPACE_MAXADDR,	/* highaddr */
3280			       NULL, NULL,		/* filter, filterarg */
3281			       EM_TSO_SIZE,		/* maxsize */
3282			       EM_MAX_SCATTER,		/* nsegments */
3283			       PAGE_SIZE,		/* maxsegsize */
3284			       0,			/* flags */
3285			       NULL,			/* lockfunc */
3286			       NULL,			/* lockfuncarg */
3287			       &txr->txtag))) {
3288		device_printf(dev,"Unable to allocate TX DMA tag\n");
3289		goto fail;
3290	}
3291
3292	if (!(txr->tx_buffers =
3293	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3294	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3295		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3296		error = ENOMEM;
3297		goto fail;
3298	}
3299
3300        /* Create the descriptor buffer dma maps */
3301	txbuf = txr->tx_buffers;
3302	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3303		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3304		if (error != 0) {
3305			device_printf(dev, "Unable to create TX DMA map\n");
3306			goto fail;
3307		}
3308	}
3309
3310	return 0;
3311fail:
3312	/* We free all, it handles case where we are in the middle */
3313	em_free_transmit_structures(adapter);
3314	return (error);
3315}
3316
3317/*********************************************************************
3318 *
3319 *  Initialize a transmit ring.
3320 *
3321 **********************************************************************/
3322static void
3323em_setup_transmit_ring(struct tx_ring *txr)
3324{
3325	struct adapter *adapter = txr->adapter;
3326	struct em_buffer *txbuf;
3327	int i;
3328#ifdef DEV_NETMAP
3329	struct netmap_adapter *na = NA(adapter->ifp);
3330	struct netmap_slot *slot;
3331#endif /* DEV_NETMAP */
3332
3333	/* Clear the old descriptor contents */
3334	EM_TX_LOCK(txr);
3335#ifdef DEV_NETMAP
3336	slot = netmap_reset(na, NR_TX, txr->me, 0);
3337#endif /* DEV_NETMAP */
3338
3339	bzero((void *)txr->tx_base,
3340	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3341	/* Reset indices */
3342	txr->next_avail_desc = 0;
3343	txr->next_to_clean = 0;
3344
3345	/* Free any existing tx buffers. */
3346        txbuf = txr->tx_buffers;
3347	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3348		if (txbuf->m_head != NULL) {
3349			bus_dmamap_sync(txr->txtag, txbuf->map,
3350			    BUS_DMASYNC_POSTWRITE);
3351			bus_dmamap_unload(txr->txtag, txbuf->map);
3352			m_freem(txbuf->m_head);
3353			txbuf->m_head = NULL;
3354		}
3355#ifdef DEV_NETMAP
3356		if (slot) {
3357			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3358			uint64_t paddr;
3359			void *addr;
3360
3361			addr = PNMB(slot + si, &paddr);
3362			txr->tx_base[i].buffer_addr = htole64(paddr);
3363			/* reload the map for netmap mode */
3364			netmap_load_map(txr->txtag, txbuf->map, addr);
3365		}
3366#endif /* DEV_NETMAP */
3367
3368		/* clear the watch index */
3369		txbuf->next_eop = -1;
3370        }
3371
3372	/* Set number of descriptors available */
3373	txr->tx_avail = adapter->num_tx_desc;
3374	txr->queue_status = EM_QUEUE_IDLE;
3375
3376	/* Clear checksum offload context. */
3377	txr->last_hw_offload = 0;
3378	txr->last_hw_ipcss = 0;
3379	txr->last_hw_ipcso = 0;
3380	txr->last_hw_tucss = 0;
3381	txr->last_hw_tucso = 0;
3382
3383	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3384	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3385	EM_TX_UNLOCK(txr);
3386}
3387
3388/*********************************************************************
3389 *
3390 *  Initialize all transmit rings.
3391 *
3392 **********************************************************************/
3393static void
3394em_setup_transmit_structures(struct adapter *adapter)
3395{
3396	struct tx_ring *txr = adapter->tx_rings;
3397
3398	for (int i = 0; i < adapter->num_queues; i++, txr++)
3399		em_setup_transmit_ring(txr);
3400
3401	return;
3402}
3403
3404/*********************************************************************
3405 *
3406 *  Enable transmit unit.
3407 *
3408 **********************************************************************/
3409static void
3410em_initialize_transmit_unit(struct adapter *adapter)
3411{
3412	struct tx_ring	*txr = adapter->tx_rings;
3413	struct e1000_hw	*hw = &adapter->hw;
3414	u32	tctl, tarc, tipg = 0;
3415
3416	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3417
3418	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3419		u64 bus_addr = txr->txdma.dma_paddr;
3420		/* Base and Len of TX Ring */
3421		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3422	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3423		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3424	    	    (u32)(bus_addr >> 32));
3425		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3426	    	    (u32)bus_addr);
3427		/* Init the HEAD/TAIL indices */
3428		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3429		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3430
3431		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3432		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3433		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3434
3435		txr->queue_status = EM_QUEUE_IDLE;
3436	}
3437
3438	/* Set the default values for the Tx Inter Packet Gap timer */
3439	switch (adapter->hw.mac.type) {
3440	case e1000_80003es2lan:
3441		tipg = DEFAULT_82543_TIPG_IPGR1;
3442		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3443		    E1000_TIPG_IPGR2_SHIFT;
3444		break;
3445	default:
3446		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3447		    (adapter->hw.phy.media_type ==
3448		    e1000_media_type_internal_serdes))
3449			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3450		else
3451			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3452		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3453		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3454	}
3455
3456	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3457	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3458
3459	if(adapter->hw.mac.type >= e1000_82540)
3460		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3461		    adapter->tx_abs_int_delay.value);
3462
3463	if ((adapter->hw.mac.type == e1000_82571) ||
3464	    (adapter->hw.mac.type == e1000_82572)) {
3465		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3466		tarc |= SPEED_MODE_BIT;
3467		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3468	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3469		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3470		tarc |= 1;
3471		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3472		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3473		tarc |= 1;
3474		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3475	}
3476
3477	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3478	if (adapter->tx_int_delay.value > 0)
3479		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3480
3481	/* Program the Transmit Control Register */
3482	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3483	tctl &= ~E1000_TCTL_CT;
3484	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3485		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3486
3487	if (adapter->hw.mac.type >= e1000_82571)
3488		tctl |= E1000_TCTL_MULR;
3489
3490	/* This write will effectively turn on the transmit unit. */
3491	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3492
3493}
3494
3495
3496/*********************************************************************
3497 *
3498 *  Free all transmit rings.
3499 *
3500 **********************************************************************/
3501static void
3502em_free_transmit_structures(struct adapter *adapter)
3503{
3504	struct tx_ring *txr = adapter->tx_rings;
3505
3506	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3507		EM_TX_LOCK(txr);
3508		em_free_transmit_buffers(txr);
3509		em_dma_free(adapter, &txr->txdma);
3510		EM_TX_UNLOCK(txr);
3511		EM_TX_LOCK_DESTROY(txr);
3512	}
3513
3514	free(adapter->tx_rings, M_DEVBUF);
3515}
3516
3517/*********************************************************************
3518 *
3519 *  Free transmit ring related data structures.
3520 *
3521 **********************************************************************/
3522static void
3523em_free_transmit_buffers(struct tx_ring *txr)
3524{
3525	struct adapter		*adapter = txr->adapter;
3526	struct em_buffer	*txbuf;
3527
3528	INIT_DEBUGOUT("free_transmit_ring: begin");
3529
3530	if (txr->tx_buffers == NULL)
3531		return;
3532
3533	for (int i = 0; i < adapter->num_tx_desc; i++) {
3534		txbuf = &txr->tx_buffers[i];
3535		if (txbuf->m_head != NULL) {
3536			bus_dmamap_sync(txr->txtag, txbuf->map,
3537			    BUS_DMASYNC_POSTWRITE);
3538			bus_dmamap_unload(txr->txtag,
3539			    txbuf->map);
3540			m_freem(txbuf->m_head);
3541			txbuf->m_head = NULL;
3542			if (txbuf->map != NULL) {
3543				bus_dmamap_destroy(txr->txtag,
3544				    txbuf->map);
3545				txbuf->map = NULL;
3546			}
3547		} else if (txbuf->map != NULL) {
3548			bus_dmamap_unload(txr->txtag,
3549			    txbuf->map);
3550			bus_dmamap_destroy(txr->txtag,
3551			    txbuf->map);
3552			txbuf->map = NULL;
3553		}
3554	}
3555#if __FreeBSD_version >= 800000
3556	if (txr->br != NULL)
3557		buf_ring_free(txr->br, M_DEVBUF);
3558#endif
3559	if (txr->tx_buffers != NULL) {
3560		free(txr->tx_buffers, M_DEVBUF);
3561		txr->tx_buffers = NULL;
3562	}
3563	if (txr->txtag != NULL) {
3564		bus_dma_tag_destroy(txr->txtag);
3565		txr->txtag = NULL;
3566	}
3567	return;
3568}
3569
3570
3571/*********************************************************************
3572 *  The offload context is protocol specific (TCP/UDP) and thus
3573 *  only needs to be set when the protocol changes. The occasion
3574 *  of a context change can be a performance detriment, and
3575 *  might be better just disabled. The reason arises in the way
3576 *  in which the controller supports pipelined requests from the
3577 *  Tx data DMA. Up to four requests can be pipelined, and they may
3578 *  belong to the same packet or to multiple packets. However all
3579 *  requests for one packet are issued before a request is issued
3580 *  for a subsequent packet and if a request for the next packet
3581 *  requires a context change, that request will be stalled
3582 *  until the previous request completes. This means setting up
3583 *  a new context effectively disables pipelined Tx data DMA which
3584 *  in turn greatly slow down performance to send small sized
3585 *  frames.
3586 **********************************************************************/
3587static void
3588em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3589    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3590{
3591	struct adapter			*adapter = txr->adapter;
3592	struct e1000_context_desc	*TXD = NULL;
3593	struct em_buffer		*tx_buffer;
3594	int				cur, hdr_len;
3595	u32				cmd = 0;
3596	u16				offload = 0;
3597	u8				ipcso, ipcss, tucso, tucss;
3598
3599	ipcss = ipcso = tucss = tucso = 0;
3600	hdr_len = ip_off + (ip->ip_hl << 2);
3601	cur = txr->next_avail_desc;
3602
3603	/* Setup of IP header checksum. */
3604	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3605		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3606		offload |= CSUM_IP;
3607		ipcss = ip_off;
3608		ipcso = ip_off + offsetof(struct ip, ip_sum);
3609		/*
3610		 * Start offset for header checksum calculation.
3611		 * End offset for header checksum calculation.
3612		 * Offset of place to put the checksum.
3613		 */
3614		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3615		TXD->lower_setup.ip_fields.ipcss = ipcss;
3616		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3617		TXD->lower_setup.ip_fields.ipcso = ipcso;
3618		cmd |= E1000_TXD_CMD_IP;
3619	}
3620
3621	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3622 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3623 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3624 		offload |= CSUM_TCP;
3625 		tucss = hdr_len;
3626 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3627 		/*
3628 		 * Setting up new checksum offload context for every frames
3629 		 * takes a lot of processing time for hardware. This also
3630 		 * reduces performance a lot for small sized frames so avoid
3631 		 * it if driver can use previously configured checksum
3632 		 * offload context.
3633 		 */
3634 		if (txr->last_hw_offload == offload) {
3635 			if (offload & CSUM_IP) {
3636 				if (txr->last_hw_ipcss == ipcss &&
3637 				    txr->last_hw_ipcso == ipcso &&
3638 				    txr->last_hw_tucss == tucss &&
3639 				    txr->last_hw_tucso == tucso)
3640 					return;
3641 			} else {
3642 				if (txr->last_hw_tucss == tucss &&
3643 				    txr->last_hw_tucso == tucso)
3644 					return;
3645 			}
3646  		}
3647 		txr->last_hw_offload = offload;
3648 		txr->last_hw_tucss = tucss;
3649 		txr->last_hw_tucso = tucso;
3650 		/*
3651 		 * Start offset for payload checksum calculation.
3652 		 * End offset for payload checksum calculation.
3653 		 * Offset of place to put the checksum.
3654 		 */
3655		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3656 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3657 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3658 		TXD->upper_setup.tcp_fields.tucso = tucso;
3659 		cmd |= E1000_TXD_CMD_TCP;
3660 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3661 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3662 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3663 		tucss = hdr_len;
3664 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3665 		/*
3666 		 * Setting up new checksum offload context for every frames
3667 		 * takes a lot of processing time for hardware. This also
3668 		 * reduces performance a lot for small sized frames so avoid
3669 		 * it if driver can use previously configured checksum
3670 		 * offload context.
3671 		 */
3672 		if (txr->last_hw_offload == offload) {
3673 			if (offload & CSUM_IP) {
3674 				if (txr->last_hw_ipcss == ipcss &&
3675 				    txr->last_hw_ipcso == ipcso &&
3676 				    txr->last_hw_tucss == tucss &&
3677 				    txr->last_hw_tucso == tucso)
3678 					return;
3679 			} else {
3680 				if (txr->last_hw_tucss == tucss &&
3681 				    txr->last_hw_tucso == tucso)
3682 					return;
3683 			}
3684 		}
3685 		txr->last_hw_offload = offload;
3686 		txr->last_hw_tucss = tucss;
3687 		txr->last_hw_tucso = tucso;
3688 		/*
3689 		 * Start offset for header checksum calculation.
3690 		 * End offset for header checksum calculation.
3691 		 * Offset of place to put the checksum.
3692 		 */
3693		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3694 		TXD->upper_setup.tcp_fields.tucss = tucss;
3695 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3696 		TXD->upper_setup.tcp_fields.tucso = tucso;
3697  	}
3698
3699 	if (offload & CSUM_IP) {
3700 		txr->last_hw_ipcss = ipcss;
3701 		txr->last_hw_ipcso = ipcso;
3702  	}
3703
3704	TXD->tcp_seg_setup.data = htole32(0);
3705	TXD->cmd_and_length =
3706	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3707	tx_buffer = &txr->tx_buffers[cur];
3708	tx_buffer->m_head = NULL;
3709	tx_buffer->next_eop = -1;
3710
3711	if (++cur == adapter->num_tx_desc)
3712		cur = 0;
3713
3714	txr->tx_avail--;
3715	txr->next_avail_desc = cur;
3716}
3717
3718
3719/**********************************************************************
3720 *
3721 *  Setup work for hardware segmentation offload (TSO)
3722 *
3723 **********************************************************************/
3724static void
3725em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3726    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3727{
3728	struct adapter			*adapter = txr->adapter;
3729	struct e1000_context_desc	*TXD;
3730	struct em_buffer		*tx_buffer;
3731	int cur, hdr_len;
3732
3733	/*
3734	 * In theory we can use the same TSO context if and only if
3735	 * frame is the same type(IP/TCP) and the same MSS. However
3736	 * checking whether a frame has the same IP/TCP structure is
3737	 * hard thing so just ignore that and always restablish a
3738	 * new TSO context.
3739	 */
3740	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3741	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3742		      E1000_TXD_DTYP_D |	/* Data descr type */
3743		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3744
3745	/* IP and/or TCP header checksum calculation and insertion. */
3746	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3747
3748	cur = txr->next_avail_desc;
3749	tx_buffer = &txr->tx_buffers[cur];
3750	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3751
3752	/*
3753	 * Start offset for header checksum calculation.
3754	 * End offset for header checksum calculation.
3755	 * Offset of place put the checksum.
3756	 */
3757	TXD->lower_setup.ip_fields.ipcss = ip_off;
3758	TXD->lower_setup.ip_fields.ipcse =
3759	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3760	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3761	/*
3762	 * Start offset for payload checksum calculation.
3763	 * End offset for payload checksum calculation.
3764	 * Offset of place to put the checksum.
3765	 */
3766	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3767	TXD->upper_setup.tcp_fields.tucse = 0;
3768	TXD->upper_setup.tcp_fields.tucso =
3769	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3770	/*
3771	 * Payload size per packet w/o any headers.
3772	 * Length of all headers up to payload.
3773	 */
3774	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3775	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3776
3777	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3778				E1000_TXD_CMD_DEXT |	/* Extended descr */
3779				E1000_TXD_CMD_TSE |	/* TSE context */
3780				E1000_TXD_CMD_IP |	/* Do IP csum */
3781				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3782				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3783
3784	tx_buffer->m_head = NULL;
3785	tx_buffer->next_eop = -1;
3786
3787	if (++cur == adapter->num_tx_desc)
3788		cur = 0;
3789
3790	txr->tx_avail--;
3791	txr->next_avail_desc = cur;
3792	txr->tx_tso = TRUE;
3793}
3794
3795
3796/**********************************************************************
3797 *
3798 *  Examine each tx_buffer in the used queue. If the hardware is done
3799 *  processing the packet then free associated resources. The
3800 *  tx_buffer is put back on the free queue.
3801 *
3802 **********************************************************************/
3803static void
3804em_txeof(struct tx_ring *txr)
3805{
3806	struct adapter	*adapter = txr->adapter;
3807        int first, last, done, processed;
3808        struct em_buffer *tx_buffer;
3809        struct e1000_tx_desc   *tx_desc, *eop_desc;
3810	struct ifnet   *ifp = adapter->ifp;
3811
3812	EM_TX_LOCK_ASSERT(txr);
3813#ifdef DEV_NETMAP
3814	if (netmap_tx_irq(ifp, txr->me |
3815	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3816		return;
3817#endif /* DEV_NETMAP */
3818
3819	/* No work, make sure watchdog is off */
3820        if (txr->tx_avail == adapter->num_tx_desc) {
3821		txr->queue_status = EM_QUEUE_IDLE;
3822                return;
3823	}
3824
3825	processed = 0;
3826        first = txr->next_to_clean;
3827        tx_desc = &txr->tx_base[first];
3828        tx_buffer = &txr->tx_buffers[first];
3829	last = tx_buffer->next_eop;
3830        eop_desc = &txr->tx_base[last];
3831
3832	/*
3833	 * What this does is get the index of the
3834	 * first descriptor AFTER the EOP of the
3835	 * first packet, that way we can do the
3836	 * simple comparison on the inner while loop.
3837	 */
3838	if (++last == adapter->num_tx_desc)
3839 		last = 0;
3840	done = last;
3841
3842        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3843            BUS_DMASYNC_POSTREAD);
3844
3845        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3846		/* We clean the range of the packet */
3847		while (first != done) {
3848                	tx_desc->upper.data = 0;
3849                	tx_desc->lower.data = 0;
3850                	tx_desc->buffer_addr = 0;
3851                	++txr->tx_avail;
3852			++processed;
3853
3854			if (tx_buffer->m_head) {
3855				bus_dmamap_sync(txr->txtag,
3856				    tx_buffer->map,
3857				    BUS_DMASYNC_POSTWRITE);
3858				bus_dmamap_unload(txr->txtag,
3859				    tx_buffer->map);
3860                        	m_freem(tx_buffer->m_head);
3861                        	tx_buffer->m_head = NULL;
3862                	}
3863			tx_buffer->next_eop = -1;
3864			txr->watchdog_time = ticks;
3865
3866	                if (++first == adapter->num_tx_desc)
3867				first = 0;
3868
3869	                tx_buffer = &txr->tx_buffers[first];
3870			tx_desc = &txr->tx_base[first];
3871		}
3872		++ifp->if_opackets;
3873		/* See if we can continue to the next packet */
3874		last = tx_buffer->next_eop;
3875		if (last != -1) {
3876        		eop_desc = &txr->tx_base[last];
3877			/* Get new done point */
3878			if (++last == adapter->num_tx_desc) last = 0;
3879			done = last;
3880		} else
3881			break;
3882        }
3883        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3884            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3885
3886        txr->next_to_clean = first;
3887
3888	/*
3889	** Watchdog calculation, we know there's
3890	** work outstanding or the first return
3891	** would have been taken, so none processed
3892	** for too long indicates a hang. local timer
3893	** will examine this and do a reset if needed.
3894	*/
3895	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3896		txr->queue_status = EM_QUEUE_HUNG;
3897
3898        /*
3899         * If we have a minimum free, clear IFF_DRV_OACTIVE
3900         * to tell the stack that it is OK to send packets.
3901	 * Notice that all writes of OACTIVE happen under the
3902	 * TX lock which, with a single queue, guarantees
3903	 * sanity.
3904         */
3905        if (txr->tx_avail >= EM_MAX_SCATTER)
3906		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3907
3908	/* Disable watchdog if all clean */
3909	if (txr->tx_avail == adapter->num_tx_desc) {
3910		txr->queue_status = EM_QUEUE_IDLE;
3911	}
3912}
3913
3914
3915/*********************************************************************
3916 *
3917 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3918 *
3919 **********************************************************************/
3920static void
3921em_refresh_mbufs(struct rx_ring *rxr, int limit)
3922{
3923	struct adapter		*adapter = rxr->adapter;
3924	struct mbuf		*m;
3925	bus_dma_segment_t	segs[1];
3926	struct em_buffer	*rxbuf;
3927	int			i, j, error, nsegs;
3928	bool			cleaned = FALSE;
3929
3930	i = j = rxr->next_to_refresh;
3931	/*
3932	** Get one descriptor beyond
3933	** our work mark to control
3934	** the loop.
3935	*/
3936	if (++j == adapter->num_rx_desc)
3937		j = 0;
3938
3939	while (j != limit) {
3940		rxbuf = &rxr->rx_buffers[i];
3941		if (rxbuf->m_head == NULL) {
3942			m = m_getjcl(M_NOWAIT, MT_DATA,
3943			    M_PKTHDR, adapter->rx_mbuf_sz);
3944			/*
3945			** If we have a temporary resource shortage
3946			** that causes a failure, just abort refresh
3947			** for now, we will return to this point when
3948			** reinvoked from em_rxeof.
3949			*/
3950			if (m == NULL)
3951				goto update;
3952		} else
3953			m = rxbuf->m_head;
3954
3955		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3956		m->m_flags |= M_PKTHDR;
3957		m->m_data = m->m_ext.ext_buf;
3958
3959		/* Use bus_dma machinery to setup the memory mapping  */
3960		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3961		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3962		if (error != 0) {
3963			printf("Refresh mbufs: hdr dmamap load"
3964			    " failure - %d\n", error);
3965			m_free(m);
3966			rxbuf->m_head = NULL;
3967			goto update;
3968		}
3969		rxbuf->m_head = m;
3970		bus_dmamap_sync(rxr->rxtag,
3971		    rxbuf->map, BUS_DMASYNC_PREREAD);
3972		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3973		cleaned = TRUE;
3974
3975		i = j; /* Next is precalulated for us */
3976		rxr->next_to_refresh = i;
3977		/* Calculate next controlling index */
3978		if (++j == adapter->num_rx_desc)
3979			j = 0;
3980	}
3981update:
3982	/*
3983	** Update the tail pointer only if,
3984	** and as far as we have refreshed.
3985	*/
3986	if (cleaned)
3987		E1000_WRITE_REG(&adapter->hw,
3988		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3989
3990	return;
3991}
3992
3993
3994/*********************************************************************
3995 *
3996 *  Allocate memory for rx_buffer structures. Since we use one
3997 *  rx_buffer per received packet, the maximum number of rx_buffer's
3998 *  that we'll need is equal to the number of receive descriptors
3999 *  that we've allocated.
4000 *
4001 **********************************************************************/
4002static int
4003em_allocate_receive_buffers(struct rx_ring *rxr)
4004{
4005	struct adapter		*adapter = rxr->adapter;
4006	device_t		dev = adapter->dev;
4007	struct em_buffer	*rxbuf;
4008	int			error;
4009
4010	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4011	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4012	if (rxr->rx_buffers == NULL) {
4013		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4014		return (ENOMEM);
4015	}
4016
4017	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4018				1, 0,			/* alignment, bounds */
4019				BUS_SPACE_MAXADDR,	/* lowaddr */
4020				BUS_SPACE_MAXADDR,	/* highaddr */
4021				NULL, NULL,		/* filter, filterarg */
4022				MJUM9BYTES,		/* maxsize */
4023				1,			/* nsegments */
4024				MJUM9BYTES,		/* maxsegsize */
4025				0,			/* flags */
4026				NULL,			/* lockfunc */
4027				NULL,			/* lockarg */
4028				&rxr->rxtag);
4029	if (error) {
4030		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4031		    __func__, error);
4032		goto fail;
4033	}
4034
4035	rxbuf = rxr->rx_buffers;
4036	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4037		rxbuf = &rxr->rx_buffers[i];
4038		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4039		    &rxbuf->map);
4040		if (error) {
4041			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4042			    __func__, error);
4043			goto fail;
4044		}
4045	}
4046
4047	return (0);
4048
4049fail:
4050	em_free_receive_structures(adapter);
4051	return (error);
4052}
4053
4054
4055/*********************************************************************
4056 *
4057 *  Initialize a receive ring and its buffers.
4058 *
4059 **********************************************************************/
4060static int
4061em_setup_receive_ring(struct rx_ring *rxr)
4062{
4063	struct	adapter 	*adapter = rxr->adapter;
4064	struct em_buffer	*rxbuf;
4065	bus_dma_segment_t	seg[1];
4066	int			rsize, nsegs, error = 0;
4067#ifdef DEV_NETMAP
4068	struct netmap_adapter *na = NA(adapter->ifp);
4069	struct netmap_slot *slot;
4070#endif
4071
4072
4073	/* Clear the ring contents */
4074	EM_RX_LOCK(rxr);
4075	rsize = roundup2(adapter->num_rx_desc *
4076	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4077	bzero((void *)rxr->rx_base, rsize);
4078#ifdef DEV_NETMAP
4079	slot = netmap_reset(na, NR_RX, 0, 0);
4080#endif
4081
4082	/*
4083	** Free current RX buffer structs and their mbufs
4084	*/
4085	for (int i = 0; i < adapter->num_rx_desc; i++) {
4086		rxbuf = &rxr->rx_buffers[i];
4087		if (rxbuf->m_head != NULL) {
4088			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4089			    BUS_DMASYNC_POSTREAD);
4090			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4091			m_freem(rxbuf->m_head);
4092			rxbuf->m_head = NULL; /* mark as freed */
4093		}
4094	}
4095
4096	/* Now replenish the mbufs */
4097        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4098		rxbuf = &rxr->rx_buffers[j];
4099#ifdef DEV_NETMAP
4100		if (slot) {
4101			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4102			uint64_t paddr;
4103			void *addr;
4104
4105			addr = PNMB(slot + si, &paddr);
4106			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4107			/* Update descriptor */
4108			rxr->rx_base[j].buffer_addr = htole64(paddr);
4109			continue;
4110		}
4111#endif /* DEV_NETMAP */
4112		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4113		    M_PKTHDR, adapter->rx_mbuf_sz);
4114		if (rxbuf->m_head == NULL) {
4115			error = ENOBUFS;
4116			goto fail;
4117		}
4118		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4119		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4120		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4121
4122		/* Get the memory mapping */
4123		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4124		    rxbuf->map, rxbuf->m_head, seg,
4125		    &nsegs, BUS_DMA_NOWAIT);
4126		if (error != 0) {
4127			m_freem(rxbuf->m_head);
4128			rxbuf->m_head = NULL;
4129			goto fail;
4130		}
4131		bus_dmamap_sync(rxr->rxtag,
4132		    rxbuf->map, BUS_DMASYNC_PREREAD);
4133
4134		/* Update descriptor */
4135		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4136	}
4137	rxr->next_to_check = 0;
4138	rxr->next_to_refresh = 0;
4139	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4140	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4141
4142fail:
4143	EM_RX_UNLOCK(rxr);
4144	return (error);
4145}
4146
4147/*********************************************************************
4148 *
4149 *  Initialize all receive rings.
4150 *
4151 **********************************************************************/
4152static int
4153em_setup_receive_structures(struct adapter *adapter)
4154{
4155	struct rx_ring *rxr = adapter->rx_rings;
4156	int q;
4157
4158	for (q = 0; q < adapter->num_queues; q++, rxr++)
4159		if (em_setup_receive_ring(rxr))
4160			goto fail;
4161
4162	return (0);
4163fail:
4164	/*
4165	 * Free RX buffers allocated so far, we will only handle
4166	 * the rings that completed, the failing case will have
4167	 * cleaned up for itself. 'q' failed, so its the terminus.
4168	 */
4169	for (int i = 0; i < q; ++i) {
4170		rxr = &adapter->rx_rings[i];
4171		for (int n = 0; n < adapter->num_rx_desc; n++) {
4172			struct em_buffer *rxbuf;
4173			rxbuf = &rxr->rx_buffers[n];
4174			if (rxbuf->m_head != NULL) {
4175				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4176			  	  BUS_DMASYNC_POSTREAD);
4177				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4178				m_freem(rxbuf->m_head);
4179				rxbuf->m_head = NULL;
4180			}
4181		}
4182		rxr->next_to_check = 0;
4183		rxr->next_to_refresh = 0;
4184	}
4185
4186	return (ENOBUFS);
4187}
4188
4189/*********************************************************************
4190 *
4191 *  Free all receive rings.
4192 *
4193 **********************************************************************/
4194static void
4195em_free_receive_structures(struct adapter *adapter)
4196{
4197	struct rx_ring *rxr = adapter->rx_rings;
4198
4199	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4200		em_free_receive_buffers(rxr);
4201		/* Free the ring memory as well */
4202		em_dma_free(adapter, &rxr->rxdma);
4203		EM_RX_LOCK_DESTROY(rxr);
4204	}
4205
4206	free(adapter->rx_rings, M_DEVBUF);
4207}
4208
4209
4210/*********************************************************************
4211 *
4212 *  Free receive ring data structures
4213 *
4214 **********************************************************************/
4215static void
4216em_free_receive_buffers(struct rx_ring *rxr)
4217{
4218	struct adapter		*adapter = rxr->adapter;
4219	struct em_buffer	*rxbuf = NULL;
4220
4221	INIT_DEBUGOUT("free_receive_buffers: begin");
4222
4223	if (rxr->rx_buffers != NULL) {
4224		for (int i = 0; i < adapter->num_rx_desc; i++) {
4225			rxbuf = &rxr->rx_buffers[i];
4226			if (rxbuf->map != NULL) {
4227				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4228				    BUS_DMASYNC_POSTREAD);
4229				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4230				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4231			}
4232			if (rxbuf->m_head != NULL) {
4233				m_freem(rxbuf->m_head);
4234				rxbuf->m_head = NULL;
4235			}
4236		}
4237		free(rxr->rx_buffers, M_DEVBUF);
4238		rxr->rx_buffers = NULL;
4239		rxr->next_to_check = 0;
4240		rxr->next_to_refresh = 0;
4241	}
4242
4243	if (rxr->rxtag != NULL) {
4244		bus_dma_tag_destroy(rxr->rxtag);
4245		rxr->rxtag = NULL;
4246	}
4247
4248	return;
4249}
4250
4251
4252/*********************************************************************
4253 *
4254 *  Enable receive unit.
4255 *
4256 **********************************************************************/
4257
4258static void
4259em_initialize_receive_unit(struct adapter *adapter)
4260{
4261	struct rx_ring	*rxr = adapter->rx_rings;
4262	struct ifnet	*ifp = adapter->ifp;
4263	struct e1000_hw	*hw = &adapter->hw;
4264	u64	bus_addr;
4265	u32	rctl, rxcsum;
4266
4267	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4268
4269	/*
4270	 * Make sure receives are disabled while setting
4271	 * up the descriptor ring
4272	 */
4273	rctl = E1000_READ_REG(hw, E1000_RCTL);
4274	/* Do not disable if ever enabled on this hardware */
4275	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4276		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4277
4278	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4279	    adapter->rx_abs_int_delay.value);
4280	/*
4281	 * Set the interrupt throttling rate. Value is calculated
4282	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4283	 */
4284	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4285
4286	/*
4287	** When using MSIX interrupts we need to throttle
4288	** using the EITR register (82574 only)
4289	*/
4290	if (hw->mac.type == e1000_82574) {
4291		for (int i = 0; i < 4; i++)
4292			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4293			    DEFAULT_ITR);
4294		/* Disable accelerated acknowledge */
4295		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4296	}
4297
4298	if (ifp->if_capenable & IFCAP_RXCSUM) {
4299		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4300		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4301		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4302	}
4303
4304	/*
4305	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4306	** long latencies are observed, like Lenovo X60. This
4307	** change eliminates the problem, but since having positive
4308	** values in RDTR is a known source of problems on other
4309	** platforms another solution is being sought.
4310	*/
4311	if (hw->mac.type == e1000_82573)
4312		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4313
4314	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4315		/* Setup the Base and Length of the Rx Descriptor Ring */
4316		u32 rdt = adapter->num_rx_desc - 1; /* default */
4317
4318		bus_addr = rxr->rxdma.dma_paddr;
4319		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4320		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4321		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4322		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4323		/* Setup the Head and Tail Descriptor Pointers */
4324		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4325#ifdef DEV_NETMAP
4326		/*
4327		 * an init() while a netmap client is active must
4328		 * preserve the rx buffers passed to userspace.
4329		 */
4330		if (ifp->if_capenable & IFCAP_NETMAP)
4331			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4332#endif /* DEV_NETMAP */
4333		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4334	}
4335
4336	/* Set PTHRESH for improved jumbo performance */
4337	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4338	    (adapter->hw.mac.type == e1000_pch2lan) ||
4339	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4340	    (ifp->if_mtu > ETHERMTU)) {
4341		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4342		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4343	}
4344
4345	if (adapter->hw.mac.type >= e1000_pch2lan) {
4346		if (ifp->if_mtu > ETHERMTU)
4347			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4348		else
4349			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4350	}
4351
4352	/* Setup the Receive Control Register */
4353	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4354	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4355	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4356	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4357
4358        /* Strip the CRC */
4359        rctl |= E1000_RCTL_SECRC;
4360
4361        /* Make sure VLAN Filters are off */
4362        rctl &= ~E1000_RCTL_VFE;
4363	rctl &= ~E1000_RCTL_SBP;
4364
4365	if (adapter->rx_mbuf_sz == MCLBYTES)
4366		rctl |= E1000_RCTL_SZ_2048;
4367	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4368		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4369	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4370		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4371
4372	if (ifp->if_mtu > ETHERMTU)
4373		rctl |= E1000_RCTL_LPE;
4374	else
4375		rctl &= ~E1000_RCTL_LPE;
4376
4377	/* Write out the settings */
4378	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4379
4380	return;
4381}
4382
4383
4384/*********************************************************************
4385 *
4386 *  This routine executes in interrupt context. It replenishes
4387 *  the mbufs in the descriptor and sends data which has been
4388 *  dma'ed into host memory to upper layer.
4389 *
4390 *  We loop at most count times if count is > 0, or until done if
4391 *  count < 0.
4392 *
4393 *  For polling we also now return the number of cleaned packets
4394 *********************************************************************/
4395static bool
4396em_rxeof(struct rx_ring *rxr, int count, int *done)
4397{
4398	struct adapter		*adapter = rxr->adapter;
4399	struct ifnet		*ifp = adapter->ifp;
4400	struct mbuf		*mp, *sendmp;
4401	u8			status = 0;
4402	u16 			len;
4403	int			i, processed, rxdone = 0;
4404	bool			eop;
4405	struct e1000_rx_desc	*cur;
4406
4407	EM_RX_LOCK(rxr);
4408
4409#ifdef DEV_NETMAP
4410	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4411		return (FALSE);
4412#endif /* DEV_NETMAP */
4413
4414	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4415
4416		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4417			break;
4418
4419		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4420		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4421
4422		cur = &rxr->rx_base[i];
4423		status = cur->status;
4424		mp = sendmp = NULL;
4425
4426		if ((status & E1000_RXD_STAT_DD) == 0)
4427			break;
4428
4429		len = le16toh(cur->length);
4430		eop = (status & E1000_RXD_STAT_EOP) != 0;
4431
4432		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4433		    (rxr->discard == TRUE)) {
4434			adapter->dropped_pkts++;
4435			++rxr->rx_discarded;
4436			if (!eop) /* Catch subsequent segs */
4437				rxr->discard = TRUE;
4438			else
4439				rxr->discard = FALSE;
4440			em_rx_discard(rxr, i);
4441			goto next_desc;
4442		}
4443
4444		/* Assign correct length to the current fragment */
4445		mp = rxr->rx_buffers[i].m_head;
4446		mp->m_len = len;
4447
4448		/* Trigger for refresh */
4449		rxr->rx_buffers[i].m_head = NULL;
4450
4451		/* First segment? */
4452		if (rxr->fmp == NULL) {
4453			mp->m_pkthdr.len = len;
4454			rxr->fmp = rxr->lmp = mp;
4455		} else {
4456			/* Chain mbuf's together */
4457			mp->m_flags &= ~M_PKTHDR;
4458			rxr->lmp->m_next = mp;
4459			rxr->lmp = mp;
4460			rxr->fmp->m_pkthdr.len += len;
4461		}
4462
4463		if (eop) {
4464			--count;
4465			sendmp = rxr->fmp;
4466			sendmp->m_pkthdr.rcvif = ifp;
4467			ifp->if_ipackets++;
4468			em_receive_checksum(cur, sendmp);
4469#ifndef __NO_STRICT_ALIGNMENT
4470			if (adapter->hw.mac.max_frame_size >
4471			    (MCLBYTES - ETHER_ALIGN) &&
4472			    em_fixup_rx(rxr) != 0)
4473				goto skip;
4474#endif
4475			if (status & E1000_RXD_STAT_VP) {
4476				sendmp->m_pkthdr.ether_vtag =
4477				    le16toh(cur->special);
4478				sendmp->m_flags |= M_VLANTAG;
4479			}
4480#ifndef __NO_STRICT_ALIGNMENT
4481skip:
4482#endif
4483			rxr->fmp = rxr->lmp = NULL;
4484		}
4485next_desc:
4486		/* Zero out the receive descriptors status. */
4487		cur->status = 0;
4488		++rxdone;	/* cumulative for POLL */
4489		++processed;
4490
4491		/* Advance our pointers to the next descriptor. */
4492		if (++i == adapter->num_rx_desc)
4493			i = 0;
4494
4495		/* Send to the stack */
4496		if (sendmp != NULL) {
4497			rxr->next_to_check = i;
4498			EM_RX_UNLOCK(rxr);
4499			(*ifp->if_input)(ifp, sendmp);
4500			EM_RX_LOCK(rxr);
4501			i = rxr->next_to_check;
4502		}
4503
4504		/* Only refresh mbufs every 8 descriptors */
4505		if (processed == 8) {
4506			em_refresh_mbufs(rxr, i);
4507			processed = 0;
4508		}
4509	}
4510
4511	/* Catch any remaining refresh work */
4512	if (e1000_rx_unrefreshed(rxr))
4513		em_refresh_mbufs(rxr, i);
4514
4515	rxr->next_to_check = i;
4516	if (done != NULL)
4517		*done = rxdone;
4518	EM_RX_UNLOCK(rxr);
4519
4520	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4521}
4522
4523static __inline void
4524em_rx_discard(struct rx_ring *rxr, int i)
4525{
4526	struct em_buffer	*rbuf;
4527
4528	rbuf = &rxr->rx_buffers[i];
4529	/* Free any previous pieces */
4530	if (rxr->fmp != NULL) {
4531		rxr->fmp->m_flags |= M_PKTHDR;
4532		m_freem(rxr->fmp);
4533		rxr->fmp = NULL;
4534		rxr->lmp = NULL;
4535	}
4536	/*
4537	** Free buffer and allow em_refresh_mbufs()
4538	** to clean up and recharge buffer.
4539	*/
4540	if (rbuf->m_head) {
4541		m_free(rbuf->m_head);
4542		rbuf->m_head = NULL;
4543	}
4544	return;
4545}
4546
4547#ifndef __NO_STRICT_ALIGNMENT
4548/*
4549 * When jumbo frames are enabled we should realign entire payload on
4550 * architecures with strict alignment. This is serious design mistake of 8254x
4551 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4552 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4553 * payload. On architecures without strict alignment restrictions 8254x still
4554 * performs unaligned memory access which would reduce the performance too.
4555 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4556 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4557 * existing mbuf chain.
4558 *
4559 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4560 * not used at all on architectures with strict alignment.
4561 */
4562static int
4563em_fixup_rx(struct rx_ring *rxr)
4564{
4565	struct adapter *adapter = rxr->adapter;
4566	struct mbuf *m, *n;
4567	int error;
4568
4569	error = 0;
4570	m = rxr->fmp;
4571	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4572		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4573		m->m_data += ETHER_HDR_LEN;
4574	} else {
4575		MGETHDR(n, M_NOWAIT, MT_DATA);
4576		if (n != NULL) {
4577			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4578			m->m_data += ETHER_HDR_LEN;
4579			m->m_len -= ETHER_HDR_LEN;
4580			n->m_len = ETHER_HDR_LEN;
4581			M_MOVE_PKTHDR(n, m);
4582			n->m_next = m;
4583			rxr->fmp = n;
4584		} else {
4585			adapter->dropped_pkts++;
4586			m_freem(rxr->fmp);
4587			rxr->fmp = NULL;
4588			error = ENOMEM;
4589		}
4590	}
4591
4592	return (error);
4593}
4594#endif
4595
4596/*********************************************************************
4597 *
4598 *  Verify that the hardware indicated that the checksum is valid.
4599 *  Inform the stack about the status of checksum so that stack
4600 *  doesn't spend time verifying the checksum.
4601 *
4602 *********************************************************************/
4603static void
4604em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4605{
4606	/* Ignore Checksum bit is set */
4607	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4608		mp->m_pkthdr.csum_flags = 0;
4609		return;
4610	}
4611
4612	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4613		/* Did it pass? */
4614		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4615			/* IP Checksum Good */
4616			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4617			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4618
4619		} else {
4620			mp->m_pkthdr.csum_flags = 0;
4621		}
4622	}
4623
4624	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4625		/* Did it pass? */
4626		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4627			mp->m_pkthdr.csum_flags |=
4628			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4629			mp->m_pkthdr.csum_data = htons(0xffff);
4630		}
4631	}
4632}
4633
4634/*
4635 * This routine is run via an vlan
4636 * config EVENT
4637 */
4638static void
4639em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4640{
4641	struct adapter	*adapter = ifp->if_softc;
4642	u32		index, bit;
4643
4644	if (ifp->if_softc !=  arg)   /* Not our event */
4645		return;
4646
4647	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4648                return;
4649
4650	EM_CORE_LOCK(adapter);
4651	index = (vtag >> 5) & 0x7F;
4652	bit = vtag & 0x1F;
4653	adapter->shadow_vfta[index] |= (1 << bit);
4654	++adapter->num_vlans;
4655	/* Re-init to load the changes */
4656	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4657		em_init_locked(adapter);
4658	EM_CORE_UNLOCK(adapter);
4659}
4660
4661/*
4662 * This routine is run via an vlan
4663 * unconfig EVENT
4664 */
4665static void
4666em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4667{
4668	struct adapter	*adapter = ifp->if_softc;
4669	u32		index, bit;
4670
4671	if (ifp->if_softc !=  arg)
4672		return;
4673
4674	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4675                return;
4676
4677	EM_CORE_LOCK(adapter);
4678	index = (vtag >> 5) & 0x7F;
4679	bit = vtag & 0x1F;
4680	adapter->shadow_vfta[index] &= ~(1 << bit);
4681	--adapter->num_vlans;
4682	/* Re-init to load the changes */
4683	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4684		em_init_locked(adapter);
4685	EM_CORE_UNLOCK(adapter);
4686}
4687
4688static void
4689em_setup_vlan_hw_support(struct adapter *adapter)
4690{
4691	struct e1000_hw *hw = &adapter->hw;
4692	u32             reg;
4693
4694	/*
4695	** We get here thru init_locked, meaning
4696	** a soft reset, this has already cleared
4697	** the VFTA and other state, so if there
4698	** have been no vlan's registered do nothing.
4699	*/
4700	if (adapter->num_vlans == 0)
4701                return;
4702
4703	/*
4704	** A soft reset zero's out the VFTA, so
4705	** we need to repopulate it now.
4706	*/
4707	for (int i = 0; i < EM_VFTA_SIZE; i++)
4708                if (adapter->shadow_vfta[i] != 0)
4709			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4710                            i, adapter->shadow_vfta[i]);
4711
4712	reg = E1000_READ_REG(hw, E1000_CTRL);
4713	reg |= E1000_CTRL_VME;
4714	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4715
4716	/* Enable the Filter Table */
4717	reg = E1000_READ_REG(hw, E1000_RCTL);
4718	reg &= ~E1000_RCTL_CFIEN;
4719	reg |= E1000_RCTL_VFE;
4720	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4721}
4722
4723static void
4724em_enable_intr(struct adapter *adapter)
4725{
4726	struct e1000_hw *hw = &adapter->hw;
4727	u32 ims_mask = IMS_ENABLE_MASK;
4728
4729	if (hw->mac.type == e1000_82574) {
4730		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4731		ims_mask |= EM_MSIX_MASK;
4732	}
4733	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4734}
4735
4736static void
4737em_disable_intr(struct adapter *adapter)
4738{
4739	struct e1000_hw *hw = &adapter->hw;
4740
4741	if (hw->mac.type == e1000_82574)
4742		E1000_WRITE_REG(hw, EM_EIAC, 0);
4743	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4744}
4745
4746/*
4747 * Bit of a misnomer, what this really means is
4748 * to enable OS management of the system... aka
4749 * to disable special hardware management features
4750 */
4751static void
4752em_init_manageability(struct adapter *adapter)
4753{
4754	/* A shared code workaround */
4755#define E1000_82542_MANC2H E1000_MANC2H
4756	if (adapter->has_manage) {
4757		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4758		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4759
4760		/* disable hardware interception of ARP */
4761		manc &= ~(E1000_MANC_ARP_EN);
4762
4763                /* enable receiving management packets to the host */
4764		manc |= E1000_MANC_EN_MNG2HOST;
4765#define E1000_MNG2HOST_PORT_623 (1 << 5)
4766#define E1000_MNG2HOST_PORT_664 (1 << 6)
4767		manc2h |= E1000_MNG2HOST_PORT_623;
4768		manc2h |= E1000_MNG2HOST_PORT_664;
4769		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4770		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4771	}
4772}
4773
4774/*
4775 * Give control back to hardware management
4776 * controller if there is one.
4777 */
4778static void
4779em_release_manageability(struct adapter *adapter)
4780{
4781	if (adapter->has_manage) {
4782		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4783
4784		/* re-enable hardware interception of ARP */
4785		manc |= E1000_MANC_ARP_EN;
4786		manc &= ~E1000_MANC_EN_MNG2HOST;
4787
4788		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4789	}
4790}
4791
4792/*
4793 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4794 * For ASF and Pass Through versions of f/w this means
4795 * that the driver is loaded. For AMT version type f/w
4796 * this means that the network i/f is open.
4797 */
4798static void
4799em_get_hw_control(struct adapter *adapter)
4800{
4801	u32 ctrl_ext, swsm;
4802
4803	if (adapter->hw.mac.type == e1000_82573) {
4804		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4805		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4806		    swsm | E1000_SWSM_DRV_LOAD);
4807		return;
4808	}
4809	/* else */
4810	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4811	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4812	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4813	return;
4814}
4815
4816/*
4817 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4818 * For ASF and Pass Through versions of f/w this means that
4819 * the driver is no longer loaded. For AMT versions of the
4820 * f/w this means that the network i/f is closed.
4821 */
4822static void
4823em_release_hw_control(struct adapter *adapter)
4824{
4825	u32 ctrl_ext, swsm;
4826
4827	if (!adapter->has_manage)
4828		return;
4829
4830	if (adapter->hw.mac.type == e1000_82573) {
4831		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4832		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4833		    swsm & ~E1000_SWSM_DRV_LOAD);
4834		return;
4835	}
4836	/* else */
4837	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4838	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4839	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4840	return;
4841}
4842
4843static int
4844em_is_valid_ether_addr(u8 *addr)
4845{
4846	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4847
4848	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4849		return (FALSE);
4850	}
4851
4852	return (TRUE);
4853}
4854
4855/*
4856** Parse the interface capabilities with regard
4857** to both system management and wake-on-lan for
4858** later use.
4859*/
4860static void
4861em_get_wakeup(device_t dev)
4862{
4863	struct adapter	*adapter = device_get_softc(dev);
4864	u16		eeprom_data = 0, device_id, apme_mask;
4865
4866	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4867	apme_mask = EM_EEPROM_APME;
4868
4869	switch (adapter->hw.mac.type) {
4870	case e1000_82573:
4871	case e1000_82583:
4872		adapter->has_amt = TRUE;
4873		/* Falls thru */
4874	case e1000_82571:
4875	case e1000_82572:
4876	case e1000_80003es2lan:
4877		if (adapter->hw.bus.func == 1) {
4878			e1000_read_nvm(&adapter->hw,
4879			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4880			break;
4881		} else
4882			e1000_read_nvm(&adapter->hw,
4883			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4884		break;
4885	case e1000_ich8lan:
4886	case e1000_ich9lan:
4887	case e1000_ich10lan:
4888	case e1000_pchlan:
4889	case e1000_pch2lan:
4890		apme_mask = E1000_WUC_APME;
4891		adapter->has_amt = TRUE;
4892		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4893		break;
4894	default:
4895		e1000_read_nvm(&adapter->hw,
4896		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4897		break;
4898	}
4899	if (eeprom_data & apme_mask)
4900		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4901	/*
4902         * We have the eeprom settings, now apply the special cases
4903         * where the eeprom may be wrong or the board won't support
4904         * wake on lan on a particular port
4905	 */
4906	device_id = pci_get_device(dev);
4907        switch (device_id) {
4908	case E1000_DEV_ID_82571EB_FIBER:
4909		/* Wake events only supported on port A for dual fiber
4910		 * regardless of eeprom setting */
4911		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4912		    E1000_STATUS_FUNC_1)
4913			adapter->wol = 0;
4914		break;
4915	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4916	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4917	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4918                /* if quad port adapter, disable WoL on all but port A */
4919		if (global_quad_port_a != 0)
4920			adapter->wol = 0;
4921		/* Reset for multiple quad port adapters */
4922		if (++global_quad_port_a == 4)
4923			global_quad_port_a = 0;
4924                break;
4925	}
4926	return;
4927}
4928
4929
4930/*
4931 * Enable PCI Wake On Lan capability
4932 */
4933static void
4934em_enable_wakeup(device_t dev)
4935{
4936	struct adapter	*adapter = device_get_softc(dev);
4937	struct ifnet	*ifp = adapter->ifp;
4938	u32		pmc, ctrl, ctrl_ext, rctl;
4939	u16     	status;
4940
4941	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4942		return;
4943
4944	/* Advertise the wakeup capability */
4945	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4946	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4947	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4948	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4949
4950	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4951	    (adapter->hw.mac.type == e1000_pchlan) ||
4952	    (adapter->hw.mac.type == e1000_ich9lan) ||
4953	    (adapter->hw.mac.type == e1000_ich10lan))
4954		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4955
4956	/* Keep the laser running on Fiber adapters */
4957	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4958	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4959		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4960		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4961		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4962	}
4963
4964	/*
4965	** Determine type of Wakeup: note that wol
4966	** is set with all bits on by default.
4967	*/
4968	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4969		adapter->wol &= ~E1000_WUFC_MAG;
4970
4971	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4972		adapter->wol &= ~E1000_WUFC_MC;
4973	else {
4974		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4975		rctl |= E1000_RCTL_MPE;
4976		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4977	}
4978
4979	if ((adapter->hw.mac.type == e1000_pchlan) ||
4980	    (adapter->hw.mac.type == e1000_pch2lan)) {
4981		if (em_enable_phy_wakeup(adapter))
4982			return;
4983	} else {
4984		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4985		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4986	}
4987
4988	if (adapter->hw.phy.type == e1000_phy_igp_3)
4989		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4990
4991        /* Request PME */
4992        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4993	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4994	if (ifp->if_capenable & IFCAP_WOL)
4995		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4996        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4997
4998	return;
4999}
5000
5001/*
5002** WOL in the newer chipset interfaces (pchlan)
5003** require thing to be copied into the phy
5004*/
5005static int
5006em_enable_phy_wakeup(struct adapter *adapter)
5007{
5008	struct e1000_hw *hw = &adapter->hw;
5009	u32 mreg, ret = 0;
5010	u16 preg;
5011
5012	/* copy MAC RARs to PHY RARs */
5013	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5014
5015	/* copy MAC MTA to PHY MTA */
5016	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5017		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5018		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5019		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5020		    (u16)((mreg >> 16) & 0xFFFF));
5021	}
5022
5023	/* configure PHY Rx Control register */
5024	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5025	mreg = E1000_READ_REG(hw, E1000_RCTL);
5026	if (mreg & E1000_RCTL_UPE)
5027		preg |= BM_RCTL_UPE;
5028	if (mreg & E1000_RCTL_MPE)
5029		preg |= BM_RCTL_MPE;
5030	preg &= ~(BM_RCTL_MO_MASK);
5031	if (mreg & E1000_RCTL_MO_3)
5032		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5033				<< BM_RCTL_MO_SHIFT);
5034	if (mreg & E1000_RCTL_BAM)
5035		preg |= BM_RCTL_BAM;
5036	if (mreg & E1000_RCTL_PMCF)
5037		preg |= BM_RCTL_PMCF;
5038	mreg = E1000_READ_REG(hw, E1000_CTRL);
5039	if (mreg & E1000_CTRL_RFCE)
5040		preg |= BM_RCTL_RFCE;
5041	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5042
5043	/* enable PHY wakeup in MAC register */
5044	E1000_WRITE_REG(hw, E1000_WUC,
5045	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5046	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5047
5048	/* configure and enable PHY wakeup in PHY registers */
5049	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5050	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5051
5052	/* activate PHY wakeup */
5053	ret = hw->phy.ops.acquire(hw);
5054	if (ret) {
5055		printf("Could not acquire PHY\n");
5056		return ret;
5057	}
5058	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5059	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5060	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5061	if (ret) {
5062		printf("Could not read PHY page 769\n");
5063		goto out;
5064	}
5065	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5066	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5067	if (ret)
5068		printf("Could not set PHY Host Wakeup bit\n");
5069out:
5070	hw->phy.ops.release(hw);
5071
5072	return ret;
5073}
5074
5075static void
5076em_led_func(void *arg, int onoff)
5077{
5078	struct adapter	*adapter = arg;
5079
5080	EM_CORE_LOCK(adapter);
5081	if (onoff) {
5082		e1000_setup_led(&adapter->hw);
5083		e1000_led_on(&adapter->hw);
5084	} else {
5085		e1000_led_off(&adapter->hw);
5086		e1000_cleanup_led(&adapter->hw);
5087	}
5088	EM_CORE_UNLOCK(adapter);
5089}
5090
5091/*
5092** Disable the L0S and L1 LINK states
5093*/
5094static void
5095em_disable_aspm(struct adapter *adapter)
5096{
5097	int		base, reg;
5098	u16		link_cap,link_ctrl;
5099	device_t	dev = adapter->dev;
5100
5101	switch (adapter->hw.mac.type) {
5102		case e1000_82573:
5103		case e1000_82574:
5104		case e1000_82583:
5105			break;
5106		default:
5107			return;
5108	}
5109	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5110		return;
5111	reg = base + PCIER_LINK_CAP;
5112	link_cap = pci_read_config(dev, reg, 2);
5113	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5114		return;
5115	reg = base + PCIER_LINK_CTL;
5116	link_ctrl = pci_read_config(dev, reg, 2);
5117	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5118	pci_write_config(dev, reg, link_ctrl, 2);
5119	return;
5120}
5121
5122/**********************************************************************
5123 *
5124 *  Update the board statistics counters.
5125 *
5126 **********************************************************************/
5127static void
5128em_update_stats_counters(struct adapter *adapter)
5129{
5130	struct ifnet   *ifp;
5131
5132	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5133	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5134		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5135		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5136	}
5137	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5138	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5139	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5140	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5141
5142	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5143	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5144	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5145	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5146	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5147	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5148	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5149	/*
5150	** For watchdog management we need to know if we have been
5151	** paused during the last interval, so capture that here.
5152	*/
5153	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5154	adapter->stats.xoffrxc += adapter->pause_frames;
5155	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5156	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5157	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5158	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5159	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5160	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5161	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5162	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5163	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5164	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5165	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5166	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5167
5168	/* For the 64-bit byte counters the low dword must be read first. */
5169	/* Both registers clear on the read of the high dword */
5170
5171	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5172	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5173	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5174	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5175
5176	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5177	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5178	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5179	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5180	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5181
5182	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5183	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5184
5185	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5186	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5187	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5188	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5189	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5190	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5191	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5192	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5193	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5194	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5195
5196	/* Interrupt Counts */
5197
5198	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5199	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5200	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5201	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5202	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5203	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5204	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5205	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5206	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5207
5208	if (adapter->hw.mac.type >= e1000_82543) {
5209		adapter->stats.algnerrc +=
5210		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5211		adapter->stats.rxerrc +=
5212		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5213		adapter->stats.tncrs +=
5214		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5215		adapter->stats.cexterr +=
5216		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5217		adapter->stats.tsctc +=
5218		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5219		adapter->stats.tsctfc +=
5220		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5221	}
5222	ifp = adapter->ifp;
5223
5224	ifp->if_collisions = adapter->stats.colc;
5225
5226	/* Rx Errors */
5227	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5228	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5229	    adapter->stats.ruc + adapter->stats.roc +
5230	    adapter->stats.mpc + adapter->stats.cexterr;
5231
5232	/* Tx Errors */
5233	ifp->if_oerrors = adapter->stats.ecol +
5234	    adapter->stats.latecol + adapter->watchdog_events;
5235}
5236
5237/* Export a single 32-bit register via a read-only sysctl. */
5238static int
5239em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5240{
5241	struct adapter *adapter;
5242	u_int val;
5243
5244	adapter = oidp->oid_arg1;
5245	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5246	return (sysctl_handle_int(oidp, &val, 0, req));
5247}
5248
5249/*
5250 * Add sysctl variables, one per statistic, to the system.
5251 */
5252static void
5253em_add_hw_stats(struct adapter *adapter)
5254{
5255	device_t dev = adapter->dev;
5256
5257	struct tx_ring *txr = adapter->tx_rings;
5258	struct rx_ring *rxr = adapter->rx_rings;
5259
5260	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5261	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5262	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5263	struct e1000_hw_stats *stats = &adapter->stats;
5264
5265	struct sysctl_oid *stat_node, *queue_node, *int_node;
5266	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5267
5268#define QUEUE_NAME_LEN 32
5269	char namebuf[QUEUE_NAME_LEN];
5270
5271	/* Driver Statistics */
5272	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5273			CTLFLAG_RD, &adapter->link_irq,
5274			"Link MSIX IRQ Handled");
5275	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5276			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5277			 "Std mbuf failed");
5278	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5279			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5280			 "Std mbuf cluster failed");
5281	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5282			CTLFLAG_RD, &adapter->dropped_pkts,
5283			"Driver dropped packets");
5284	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5285			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5286			"Driver tx dma failure in xmit");
5287	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5288			CTLFLAG_RD, &adapter->rx_overruns,
5289			"RX overruns");
5290	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5291			CTLFLAG_RD, &adapter->watchdog_events,
5292			"Watchdog timeouts");
5293
5294	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5295			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5296			em_sysctl_reg_handler, "IU",
5297			"Device Control Register");
5298	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5299			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5300			em_sysctl_reg_handler, "IU",
5301			"Receiver Control Register");
5302	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5303			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5304			"Flow Control High Watermark");
5305	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5306			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5307			"Flow Control Low Watermark");
5308
5309	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5310		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5311		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5312					    CTLFLAG_RD, NULL, "Queue Name");
5313		queue_list = SYSCTL_CHILDREN(queue_node);
5314
5315		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5316				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5317				E1000_TDH(txr->me),
5318				em_sysctl_reg_handler, "IU",
5319 				"Transmit Descriptor Head");
5320		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5321				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5322				E1000_TDT(txr->me),
5323				em_sysctl_reg_handler, "IU",
5324 				"Transmit Descriptor Tail");
5325		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5326				CTLFLAG_RD, &txr->tx_irq,
5327				"Queue MSI-X Transmit Interrupts");
5328		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5329				CTLFLAG_RD, &txr->no_desc_avail,
5330				"Queue No Descriptor Available");
5331
5332		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5333				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5334				E1000_RDH(rxr->me),
5335				em_sysctl_reg_handler, "IU",
5336				"Receive Descriptor Head");
5337		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5338				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339				E1000_RDT(rxr->me),
5340				em_sysctl_reg_handler, "IU",
5341				"Receive Descriptor Tail");
5342		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5343				CTLFLAG_RD, &rxr->rx_irq,
5344				"Queue MSI-X Receive Interrupts");
5345	}
5346
5347	/* MAC stats get their own sub node */
5348
5349	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5350				    CTLFLAG_RD, NULL, "Statistics");
5351	stat_list = SYSCTL_CHILDREN(stat_node);
5352
5353	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5354			CTLFLAG_RD, &stats->ecol,
5355			"Excessive collisions");
5356	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5357			CTLFLAG_RD, &stats->scc,
5358			"Single collisions");
5359	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5360			CTLFLAG_RD, &stats->mcc,
5361			"Multiple collisions");
5362	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5363			CTLFLAG_RD, &stats->latecol,
5364			"Late collisions");
5365	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5366			CTLFLAG_RD, &stats->colc,
5367			"Collision Count");
5368	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5369			CTLFLAG_RD, &adapter->stats.symerrs,
5370			"Symbol Errors");
5371	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5372			CTLFLAG_RD, &adapter->stats.sec,
5373			"Sequence Errors");
5374	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5375			CTLFLAG_RD, &adapter->stats.dc,
5376			"Defer Count");
5377	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5378			CTLFLAG_RD, &adapter->stats.mpc,
5379			"Missed Packets");
5380	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5381			CTLFLAG_RD, &adapter->stats.rnbc,
5382			"Receive No Buffers");
5383	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5384			CTLFLAG_RD, &adapter->stats.ruc,
5385			"Receive Undersize");
5386	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5387			CTLFLAG_RD, &adapter->stats.rfc,
5388			"Fragmented Packets Received ");
5389	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5390			CTLFLAG_RD, &adapter->stats.roc,
5391			"Oversized Packets Received");
5392	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5393			CTLFLAG_RD, &adapter->stats.rjc,
5394			"Recevied Jabber");
5395	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5396			CTLFLAG_RD, &adapter->stats.rxerrc,
5397			"Receive Errors");
5398	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5399			CTLFLAG_RD, &adapter->stats.crcerrs,
5400			"CRC errors");
5401	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5402			CTLFLAG_RD, &adapter->stats.algnerrc,
5403			"Alignment Errors");
5404	/* On 82575 these are collision counts */
5405	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5406			CTLFLAG_RD, &adapter->stats.cexterr,
5407			"Collision/Carrier extension errors");
5408	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5409			CTLFLAG_RD, &adapter->stats.xonrxc,
5410			"XON Received");
5411	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5412			CTLFLAG_RD, &adapter->stats.xontxc,
5413			"XON Transmitted");
5414	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5415			CTLFLAG_RD, &adapter->stats.xoffrxc,
5416			"XOFF Received");
5417	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5418			CTLFLAG_RD, &adapter->stats.xofftxc,
5419			"XOFF Transmitted");
5420
5421	/* Packet Reception Stats */
5422	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5423			CTLFLAG_RD, &adapter->stats.tpr,
5424			"Total Packets Received ");
5425	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5426			CTLFLAG_RD, &adapter->stats.gprc,
5427			"Good Packets Received");
5428	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5429			CTLFLAG_RD, &adapter->stats.bprc,
5430			"Broadcast Packets Received");
5431	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5432			CTLFLAG_RD, &adapter->stats.mprc,
5433			"Multicast Packets Received");
5434	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5435			CTLFLAG_RD, &adapter->stats.prc64,
5436			"64 byte frames received ");
5437	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5438			CTLFLAG_RD, &adapter->stats.prc127,
5439			"65-127 byte frames received");
5440	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5441			CTLFLAG_RD, &adapter->stats.prc255,
5442			"128-255 byte frames received");
5443	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5444			CTLFLAG_RD, &adapter->stats.prc511,
5445			"256-511 byte frames received");
5446	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5447			CTLFLAG_RD, &adapter->stats.prc1023,
5448			"512-1023 byte frames received");
5449	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5450			CTLFLAG_RD, &adapter->stats.prc1522,
5451			"1023-1522 byte frames received");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5453 			CTLFLAG_RD, &adapter->stats.gorc,
5454 			"Good Octets Received");
5455
5456	/* Packet Transmission Stats */
5457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5458 			CTLFLAG_RD, &adapter->stats.gotc,
5459 			"Good Octets Transmitted");
5460	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5461			CTLFLAG_RD, &adapter->stats.tpt,
5462			"Total Packets Transmitted");
5463	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5464			CTLFLAG_RD, &adapter->stats.gptc,
5465			"Good Packets Transmitted");
5466	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5467			CTLFLAG_RD, &adapter->stats.bptc,
5468			"Broadcast Packets Transmitted");
5469	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5470			CTLFLAG_RD, &adapter->stats.mptc,
5471			"Multicast Packets Transmitted");
5472	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5473			CTLFLAG_RD, &adapter->stats.ptc64,
5474			"64 byte frames transmitted ");
5475	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5476			CTLFLAG_RD, &adapter->stats.ptc127,
5477			"65-127 byte frames transmitted");
5478	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5479			CTLFLAG_RD, &adapter->stats.ptc255,
5480			"128-255 byte frames transmitted");
5481	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5482			CTLFLAG_RD, &adapter->stats.ptc511,
5483			"256-511 byte frames transmitted");
5484	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5485			CTLFLAG_RD, &adapter->stats.ptc1023,
5486			"512-1023 byte frames transmitted");
5487	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5488			CTLFLAG_RD, &adapter->stats.ptc1522,
5489			"1024-1522 byte frames transmitted");
5490	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5491			CTLFLAG_RD, &adapter->stats.tsctc,
5492			"TSO Contexts Transmitted");
5493	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5494			CTLFLAG_RD, &adapter->stats.tsctfc,
5495			"TSO Contexts Failed");
5496
5497
5498	/* Interrupt Stats */
5499
5500	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5501				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5502	int_list = SYSCTL_CHILDREN(int_node);
5503
5504	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5505			CTLFLAG_RD, &adapter->stats.iac,
5506			"Interrupt Assertion Count");
5507
5508	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5509			CTLFLAG_RD, &adapter->stats.icrxptc,
5510			"Interrupt Cause Rx Pkt Timer Expire Count");
5511
5512	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5513			CTLFLAG_RD, &adapter->stats.icrxatc,
5514			"Interrupt Cause Rx Abs Timer Expire Count");
5515
5516	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5517			CTLFLAG_RD, &adapter->stats.ictxptc,
5518			"Interrupt Cause Tx Pkt Timer Expire Count");
5519
5520	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5521			CTLFLAG_RD, &adapter->stats.ictxatc,
5522			"Interrupt Cause Tx Abs Timer Expire Count");
5523
5524	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5525			CTLFLAG_RD, &adapter->stats.ictxqec,
5526			"Interrupt Cause Tx Queue Empty Count");
5527
5528	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5529			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5530			"Interrupt Cause Tx Queue Min Thresh Count");
5531
5532	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5533			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5534			"Interrupt Cause Rx Desc Min Thresh Count");
5535
5536	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5537			CTLFLAG_RD, &adapter->stats.icrxoc,
5538			"Interrupt Cause Receiver Overrun Count");
5539}
5540
5541/**********************************************************************
5542 *
5543 *  This routine provides a way to dump out the adapter eeprom,
5544 *  often a useful debug/service tool. This only dumps the first
5545 *  32 words, stuff that matters is in that extent.
5546 *
5547 **********************************************************************/
5548static int
5549em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5550{
5551	struct adapter *adapter = (struct adapter *)arg1;
5552	int error;
5553	int result;
5554
5555	result = -1;
5556	error = sysctl_handle_int(oidp, &result, 0, req);
5557
5558	if (error || !req->newptr)
5559		return (error);
5560
5561	/*
5562	 * This value will cause a hex dump of the
5563	 * first 32 16-bit words of the EEPROM to
5564	 * the screen.
5565	 */
5566	if (result == 1)
5567		em_print_nvm_info(adapter);
5568
5569	return (error);
5570}
5571
5572static void
5573em_print_nvm_info(struct adapter *adapter)
5574{
5575	u16	eeprom_data;
5576	int	i, j, row = 0;
5577
5578	/* Its a bit crude, but it gets the job done */
5579	printf("\nInterface EEPROM Dump:\n");
5580	printf("Offset\n0x0000  ");
5581	for (i = 0, j = 0; i < 32; i++, j++) {
5582		if (j == 8) { /* Make the offset block */
5583			j = 0; ++row;
5584			printf("\n0x00%x0  ",row);
5585		}
5586		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5587		printf("%04x ", eeprom_data);
5588	}
5589	printf("\n");
5590}
5591
5592static int
5593em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5594{
5595	struct em_int_delay_info *info;
5596	struct adapter *adapter;
5597	u32 regval;
5598	int error, usecs, ticks;
5599
5600	info = (struct em_int_delay_info *)arg1;
5601	usecs = info->value;
5602	error = sysctl_handle_int(oidp, &usecs, 0, req);
5603	if (error != 0 || req->newptr == NULL)
5604		return (error);
5605	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5606		return (EINVAL);
5607	info->value = usecs;
5608	ticks = EM_USECS_TO_TICKS(usecs);
5609	if (info->offset == E1000_ITR)	/* units are 256ns here */
5610		ticks *= 4;
5611
5612	adapter = info->adapter;
5613
5614	EM_CORE_LOCK(adapter);
5615	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5616	regval = (regval & ~0xffff) | (ticks & 0xffff);
5617	/* Handle a few special cases. */
5618	switch (info->offset) {
5619	case E1000_RDTR:
5620		break;
5621	case E1000_TIDV:
5622		if (ticks == 0) {
5623			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5624			/* Don't write 0 into the TIDV register. */
5625			regval++;
5626		} else
5627			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5628		break;
5629	}
5630	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5631	EM_CORE_UNLOCK(adapter);
5632	return (0);
5633}
5634
5635static void
5636em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5637	const char *description, struct em_int_delay_info *info,
5638	int offset, int value)
5639{
5640	info->adapter = adapter;
5641	info->offset = offset;
5642	info->value = value;
5643	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5644	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5645	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5646	    info, 0, em_sysctl_int_delay, "I", description);
5647}
5648
5649static void
5650em_set_sysctl_value(struct adapter *adapter, const char *name,
5651	const char *description, int *limit, int value)
5652{
5653	*limit = value;
5654	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5655	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5656	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5657}
5658
5659
5660/*
5661** Set flow control using sysctl:
5662** Flow control values:
5663**      0 - off
5664**      1 - rx pause
5665**      2 - tx pause
5666**      3 - full
5667*/
5668static int
5669em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5670{
5671        int		error;
5672	static int	input = 3; /* default is full */
5673        struct adapter	*adapter = (struct adapter *) arg1;
5674
5675        error = sysctl_handle_int(oidp, &input, 0, req);
5676
5677        if ((error) || (req->newptr == NULL))
5678                return (error);
5679
5680	if (input == adapter->fc) /* no change? */
5681		return (error);
5682
5683        switch (input) {
5684                case e1000_fc_rx_pause:
5685                case e1000_fc_tx_pause:
5686                case e1000_fc_full:
5687                case e1000_fc_none:
5688                        adapter->hw.fc.requested_mode = input;
5689			adapter->fc = input;
5690                        break;
5691                default:
5692			/* Do nothing */
5693			return (error);
5694        }
5695
5696        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5697        e1000_force_mac_fc(&adapter->hw);
5698        return (error);
5699}
5700
5701/*
5702** Manage Energy Efficient Ethernet:
5703** Control values:
5704**     0/1 - enabled/disabled
5705*/
5706static int
5707em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5708{
5709       struct adapter *adapter = (struct adapter *) arg1;
5710       int             error, value;
5711
5712       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5713       error = sysctl_handle_int(oidp, &value, 0, req);
5714       if (error || req->newptr == NULL)
5715               return (error);
5716       EM_CORE_LOCK(adapter);
5717       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5718       em_init_locked(adapter);
5719       EM_CORE_UNLOCK(adapter);
5720       return (0);
5721}
5722
5723static int
5724em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5725{
5726	struct adapter *adapter;
5727	int error;
5728	int result;
5729
5730	result = -1;
5731	error = sysctl_handle_int(oidp, &result, 0, req);
5732
5733	if (error || !req->newptr)
5734		return (error);
5735
5736	if (result == 1) {
5737		adapter = (struct adapter *)arg1;
5738		em_print_debug_info(adapter);
5739        }
5740
5741	return (error);
5742}
5743
5744/*
5745** This routine is meant to be fluid, add whatever is
5746** needed for debugging a problem.  -jfv
5747*/
5748static void
5749em_print_debug_info(struct adapter *adapter)
5750{
5751	device_t dev = adapter->dev;
5752	struct tx_ring *txr = adapter->tx_rings;
5753	struct rx_ring *rxr = adapter->rx_rings;
5754
5755	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5756		printf("Interface is RUNNING ");
5757	else
5758		printf("Interface is NOT RUNNING\n");
5759
5760	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5761		printf("and INACTIVE\n");
5762	else
5763		printf("and ACTIVE\n");
5764
5765	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5766	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5767	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5768	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5769	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5770	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5771	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5772	device_printf(dev, "TX descriptors avail = %d\n",
5773	    txr->tx_avail);
5774	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5775	    txr->no_desc_avail);
5776	device_printf(dev, "RX discarded packets = %ld\n",
5777	    rxr->rx_discarded);
5778	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5779	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5780}
5781