if_em.c revision 262151
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_em.c 262151 2014-02-18 05:01:04Z luigi $*/
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#ifdef HAVE_KERNEL_OPTION_HEADERS
39#include "opt_device_polling.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <machine/bus.h>
61#include <machine/resource.h>
62
63#include <net/bpf.h>
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_arp.h>
67#include <net/if_dl.h>
68#include <net/if_media.h>
69
70#include <net/if_types.h>
71#include <net/if_vlan_var.h>
72
73#include <netinet/in_systm.h>
74#include <netinet/in.h>
75#include <netinet/if_ether.h>
76#include <netinet/ip.h>
77#include <netinet/ip6.h>
78#include <netinet/tcp.h>
79#include <netinet/udp.h>
80
81#include <machine/in_cksum.h>
82#include <dev/led/led.h>
83#include <dev/pci/pcivar.h>
84#include <dev/pci/pcireg.h>
85
86#include "e1000_api.h"
87#include "e1000_82571.h"
88#include "if_em.h"
89
90/*********************************************************************
91 *  Set this to one to display debug statistics
92 *********************************************************************/
93int	em_display_debug_stats = 0;
94
95/*********************************************************************
96 *  Driver version:
97 *********************************************************************/
98char em_driver_version[] = "7.3.8";
99
100/*********************************************************************
101 *  PCI Device ID Table
102 *
103 *  Used by probe to select devices to load on
104 *  Last field stores an index into e1000_strings
105 *  Last entry must be all 0s
106 *
107 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108 *********************************************************************/
109
110static em_vendor_info_t em_vendor_info_array[] =
111{
112	/* Intel(R) PRO/1000 Network Connection */
113	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127						PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132
133	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144						PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179						PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181						PCI_ANY_ID, PCI_ANY_ID, 0},
182	/* required last entry */
183	{ 0, 0, 0, 0, 0}
184};
185
186/*********************************************************************
187 *  Table of branding strings for all supported NICs.
188 *********************************************************************/
189
190static char *em_strings[] = {
191	"Intel(R) PRO/1000 Network Connection"
192};
193
194/*********************************************************************
195 *  Function prototypes
196 *********************************************************************/
197static int	em_probe(device_t);
198static int	em_attach(device_t);
199static int	em_detach(device_t);
200static int	em_shutdown(device_t);
201static int	em_suspend(device_t);
202static int	em_resume(device_t);
203#ifdef EM_MULTIQUEUE
204static int	em_mq_start(struct ifnet *, struct mbuf *);
205static int	em_mq_start_locked(struct ifnet *,
206		    struct tx_ring *, struct mbuf *);
207static void	em_qflush(struct ifnet *);
208#else
209static void	em_start(struct ifnet *);
210static void	em_start_locked(struct ifnet *, struct tx_ring *);
211#endif
212static int	em_ioctl(struct ifnet *, u_long, caddr_t);
213static void	em_init(void *);
214static void	em_init_locked(struct adapter *);
215static void	em_stop(void *);
216static void	em_media_status(struct ifnet *, struct ifmediareq *);
217static int	em_media_change(struct ifnet *);
218static void	em_identify_hardware(struct adapter *);
219static int	em_allocate_pci_resources(struct adapter *);
220static int	em_allocate_legacy(struct adapter *);
221static int	em_allocate_msix(struct adapter *);
222static int	em_allocate_queues(struct adapter *);
223static int	em_setup_msix(struct adapter *);
224static void	em_free_pci_resources(struct adapter *);
225static void	em_local_timer(void *);
226static void	em_reset(struct adapter *);
227static int	em_setup_interface(device_t, struct adapter *);
228
229static void	em_setup_transmit_structures(struct adapter *);
230static void	em_initialize_transmit_unit(struct adapter *);
231static int	em_allocate_transmit_buffers(struct tx_ring *);
232static void	em_free_transmit_structures(struct adapter *);
233static void	em_free_transmit_buffers(struct tx_ring *);
234
235static int	em_setup_receive_structures(struct adapter *);
236static int	em_allocate_receive_buffers(struct rx_ring *);
237static void	em_initialize_receive_unit(struct adapter *);
238static void	em_free_receive_structures(struct adapter *);
239static void	em_free_receive_buffers(struct rx_ring *);
240
241static void	em_enable_intr(struct adapter *);
242static void	em_disable_intr(struct adapter *);
243static void	em_update_stats_counters(struct adapter *);
244static void	em_add_hw_stats(struct adapter *adapter);
245static void	em_txeof(struct tx_ring *);
246static bool	em_rxeof(struct rx_ring *, int, int *);
247#ifndef __NO_STRICT_ALIGNMENT
248static int	em_fixup_rx(struct rx_ring *);
249#endif
250static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252		    struct ip *, u32 *, u32 *);
253static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254		    struct tcphdr *, u32 *, u32 *);
255static void	em_set_promisc(struct adapter *);
256static void	em_disable_promisc(struct adapter *);
257static void	em_set_multi(struct adapter *);
258static void	em_update_link_status(struct adapter *);
259static void	em_refresh_mbufs(struct rx_ring *, int);
260static void	em_register_vlan(void *, struct ifnet *, u16);
261static void	em_unregister_vlan(void *, struct ifnet *, u16);
262static void	em_setup_vlan_hw_support(struct adapter *);
263static int	em_xmit(struct tx_ring *, struct mbuf **);
264static int	em_dma_malloc(struct adapter *, bus_size_t,
265		    struct em_dma_alloc *, int);
266static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
267static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268static void	em_print_nvm_info(struct adapter *);
269static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270static void	em_print_debug_info(struct adapter *);
271static int 	em_is_valid_ether_addr(u8 *);
272static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273static void	em_add_int_delay_sysctl(struct adapter *, const char *,
274		    const char *, struct em_int_delay_info *, int, int);
275/* Management and WOL Support */
276static void	em_init_manageability(struct adapter *);
277static void	em_release_manageability(struct adapter *);
278static void     em_get_hw_control(struct adapter *);
279static void     em_release_hw_control(struct adapter *);
280static void	em_get_wakeup(device_t);
281static void     em_enable_wakeup(device_t);
282static int	em_enable_phy_wakeup(struct adapter *);
283static void	em_led_func(void *, int);
284static void	em_disable_aspm(struct adapter *);
285
286static int	em_irq_fast(void *);
287
288/* MSIX handlers */
289static void	em_msix_tx(void *);
290static void	em_msix_rx(void *);
291static void	em_msix_link(void *);
292static void	em_handle_tx(void *context, int pending);
293static void	em_handle_rx(void *context, int pending);
294static void	em_handle_link(void *context, int pending);
295
296static void	em_set_sysctl_value(struct adapter *, const char *,
297		    const char *, int *, int);
298static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301static __inline void em_rx_discard(struct rx_ring *, int);
302
303#ifdef DEVICE_POLLING
304static poll_handler_t em_poll;
305#endif /* POLLING */
306
307/*********************************************************************
308 *  FreeBSD Device Interface Entry Points
309 *********************************************************************/
310
311static device_method_t em_methods[] = {
312	/* Device interface */
313	DEVMETHOD(device_probe, em_probe),
314	DEVMETHOD(device_attach, em_attach),
315	DEVMETHOD(device_detach, em_detach),
316	DEVMETHOD(device_shutdown, em_shutdown),
317	DEVMETHOD(device_suspend, em_suspend),
318	DEVMETHOD(device_resume, em_resume),
319	DEVMETHOD_END
320};
321
322static driver_t em_driver = {
323	"em", em_methods, sizeof(struct adapter),
324};
325
326devclass_t em_devclass;
327DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328MODULE_DEPEND(em, pci, 1, 1, 1);
329MODULE_DEPEND(em, ether, 1, 1, 1);
330
331/*********************************************************************
332 *  Tunable default values.
333 *********************************************************************/
334
335#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337#define M_TSO_LEN			66
338
339#define MAX_INTS_PER_SEC	8000
340#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
341
342/* Allow common code without TSO */
343#ifndef CSUM_TSO
344#define CSUM_TSO	0
345#endif
346
347static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354    0, "Default transmit interrupt delay in usecs");
355SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356    0, "Default receive interrupt delay in usecs");
357
358static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363    &em_tx_abs_int_delay_dflt, 0,
364    "Default transmit interrupt delay limit in usecs");
365SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366    &em_rx_abs_int_delay_dflt, 0,
367    "Default receive interrupt delay limit in usecs");
368
369static int em_rxd = EM_DEFAULT_RXD;
370static int em_txd = EM_DEFAULT_TXD;
371TUNABLE_INT("hw.em.rxd", &em_rxd);
372TUNABLE_INT("hw.em.txd", &em_txd);
373SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374    "Number of receive descriptors per queue");
375SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376    "Number of transmit descriptors per queue");
377
378static int em_smart_pwr_down = FALSE;
379TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381    0, "Set to true to leave smart power down enabled on newer adapters");
382
383/* Controls whether promiscuous also shows bad packets */
384static int em_debug_sbp = FALSE;
385TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387    "Show bad packets in promiscuous mode");
388
389static int em_enable_msix = TRUE;
390TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392    "Enable MSI-X interrupts");
393
394/* How many packets rxeof tries to clean at a time */
395static int em_rx_process_limit = 100;
396TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398    &em_rx_process_limit, 0,
399    "Maximum number of received packets to process "
400    "at a time, -1 means unlimited");
401
402/* Energy efficient ethernet - default to OFF */
403static int eee_setting = 1;
404TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406    "Enable Energy Efficient Ethernet");
407
408/* Global used in WOL setup with multiport cards */
409static int global_quad_port_a = 0;
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_em_netmap.h>
413#endif /* DEV_NETMAP */
414
415/*********************************************************************
416 *  Device identification routine
417 *
418 *  em_probe determines if the driver should be loaded on
419 *  adapter based on PCI vendor/device id of the adapter.
420 *
421 *  return BUS_PROBE_DEFAULT on success, positive on failure
422 *********************************************************************/
423
424static int
425em_probe(device_t dev)
426{
427	char		adapter_name[60];
428	u16		pci_vendor_id = 0;
429	u16		pci_device_id = 0;
430	u16		pci_subvendor_id = 0;
431	u16		pci_subdevice_id = 0;
432	em_vendor_info_t *ent;
433
434	INIT_DEBUGOUT("em_probe: begin");
435
436	pci_vendor_id = pci_get_vendor(dev);
437	if (pci_vendor_id != EM_VENDOR_ID)
438		return (ENXIO);
439
440	pci_device_id = pci_get_device(dev);
441	pci_subvendor_id = pci_get_subvendor(dev);
442	pci_subdevice_id = pci_get_subdevice(dev);
443
444	ent = em_vendor_info_array;
445	while (ent->vendor_id != 0) {
446		if ((pci_vendor_id == ent->vendor_id) &&
447		    (pci_device_id == ent->device_id) &&
448
449		    ((pci_subvendor_id == ent->subvendor_id) ||
450		    (ent->subvendor_id == PCI_ANY_ID)) &&
451
452		    ((pci_subdevice_id == ent->subdevice_id) ||
453		    (ent->subdevice_id == PCI_ANY_ID))) {
454			sprintf(adapter_name, "%s %s",
455				em_strings[ent->index],
456				em_driver_version);
457			device_set_desc_copy(dev, adapter_name);
458			return (BUS_PROBE_DEFAULT);
459		}
460		ent++;
461	}
462
463	return (ENXIO);
464}
465
466/*********************************************************************
467 *  Device initialization routine
468 *
469 *  The attach entry point is called when the driver is being loaded.
470 *  This routine identifies the type of hardware, allocates all resources
471 *  and initializes the hardware.
472 *
473 *  return 0 on success, positive on failure
474 *********************************************************************/
475
476static int
477em_attach(device_t dev)
478{
479	struct adapter	*adapter;
480	struct e1000_hw	*hw;
481	int		error = 0;
482
483	INIT_DEBUGOUT("em_attach: begin");
484
485	if (resource_disabled("em", device_get_unit(dev))) {
486		device_printf(dev, "Disabled by device hint\n");
487		return (ENXIO);
488	}
489
490	adapter = device_get_softc(dev);
491	adapter->dev = adapter->osdep.dev = dev;
492	hw = &adapter->hw;
493	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495	/* SYSCTL stuff */
496	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499	    em_sysctl_nvm_info, "I", "NVM Information");
500
501	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504	    em_sysctl_debug_info, "I", "Debug Information");
505
506	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509	    em_set_flowcntl, "I", "Flow Control");
510
511	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513	/* Determine hardware and mac info */
514	em_identify_hardware(adapter);
515
516	/* Setup PCI resources */
517	if (em_allocate_pci_resources(adapter)) {
518		device_printf(dev, "Allocation of PCI resources failed\n");
519		error = ENXIO;
520		goto err_pci;
521	}
522
523	/*
524	** For ICH8 and family we need to
525	** map the flash memory, and this
526	** must happen after the MAC is
527	** identified
528	*/
529	if ((hw->mac.type == e1000_ich8lan) ||
530	    (hw->mac.type == e1000_ich9lan) ||
531	    (hw->mac.type == e1000_ich10lan) ||
532	    (hw->mac.type == e1000_pchlan) ||
533	    (hw->mac.type == e1000_pch2lan) ||
534	    (hw->mac.type == e1000_pch_lpt)) {
535		int rid = EM_BAR_TYPE_FLASH;
536		adapter->flash = bus_alloc_resource_any(dev,
537		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
538		if (adapter->flash == NULL) {
539			device_printf(dev, "Mapping of Flash failed\n");
540			error = ENXIO;
541			goto err_pci;
542		}
543		/* This is used in the shared code */
544		hw->flash_address = (u8 *)adapter->flash;
545		adapter->osdep.flash_bus_space_tag =
546		    rman_get_bustag(adapter->flash);
547		adapter->osdep.flash_bus_space_handle =
548		    rman_get_bushandle(adapter->flash);
549	}
550
551	/* Do Shared Code initialization */
552	if (e1000_setup_init_funcs(hw, TRUE)) {
553		device_printf(dev, "Setup of Shared code failed\n");
554		error = ENXIO;
555		goto err_pci;
556	}
557
558	e1000_get_bus_info(hw);
559
560	/* Set up some sysctls for the tunable interrupt delays */
561	em_add_int_delay_sysctl(adapter, "rx_int_delay",
562	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
563	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564	em_add_int_delay_sysctl(adapter, "tx_int_delay",
565	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568	    "receive interrupt delay limit in usecs",
569	    &adapter->rx_abs_int_delay,
570	    E1000_REGISTER(hw, E1000_RADV),
571	    em_rx_abs_int_delay_dflt);
572	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573	    "transmit interrupt delay limit in usecs",
574	    &adapter->tx_abs_int_delay,
575	    E1000_REGISTER(hw, E1000_TADV),
576	    em_tx_abs_int_delay_dflt);
577	em_add_int_delay_sysctl(adapter, "itr",
578	    "interrupt delay limit in usecs/4",
579	    &adapter->tx_itr,
580	    E1000_REGISTER(hw, E1000_ITR),
581	    DEFAULT_ITR);
582
583	/* Sysctl for limiting the amount of work done in the taskqueue */
584	em_set_sysctl_value(adapter, "rx_processing_limit",
585	    "max number of rx packets to process", &adapter->rx_process_limit,
586	    em_rx_process_limit);
587
588	/*
589	 * Validate number of transmit and receive descriptors. It
590	 * must not exceed hardware maximum, and must be multiple
591	 * of E1000_DBA_ALIGN.
592	 */
593	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596		    EM_DEFAULT_TXD, em_txd);
597		adapter->num_tx_desc = EM_DEFAULT_TXD;
598	} else
599		adapter->num_tx_desc = em_txd;
600
601	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604		    EM_DEFAULT_RXD, em_rxd);
605		adapter->num_rx_desc = EM_DEFAULT_RXD;
606	} else
607		adapter->num_rx_desc = em_rxd;
608
609	hw->mac.autoneg = DO_AUTO_NEG;
610	hw->phy.autoneg_wait_to_complete = FALSE;
611	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613	/* Copper options */
614	if (hw->phy.media_type == e1000_media_type_copper) {
615		hw->phy.mdix = AUTO_ALL_MODES;
616		hw->phy.disable_polarity_correction = FALSE;
617		hw->phy.ms_type = EM_MASTER_SLAVE;
618	}
619
620	/*
621	 * Set the frame limits assuming
622	 * standard ethernet sized frames.
623	 */
624	adapter->hw.mac.max_frame_size =
625	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627	/*
628	 * This controls when hardware reports transmit completion
629	 * status.
630	 */
631	hw->mac.report_tx_early = 1;
632
633	/*
634	** Get queue/ring memory
635	*/
636	if (em_allocate_queues(adapter)) {
637		error = ENOMEM;
638		goto err_pci;
639	}
640
641	/* Allocate multicast array memory. */
642	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644	if (adapter->mta == NULL) {
645		device_printf(dev, "Can not allocate multicast setup array\n");
646		error = ENOMEM;
647		goto err_late;
648	}
649
650	/* Check SOL/IDER usage */
651	if (e1000_check_reset_block(hw))
652		device_printf(dev, "PHY reset is blocked"
653		    " due to SOL/IDER session.\n");
654
655	/* Sysctl for setting Energy Efficient Ethernet */
656	hw->dev_spec.ich8lan.eee_disable = eee_setting;
657	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660	    adapter, 0, em_sysctl_eee, "I",
661	    "Disable Energy Efficient Ethernet");
662
663	/*
664	** Start from a known state, this is
665	** important in reading the nvm and
666	** mac from that.
667	*/
668	e1000_reset_hw(hw);
669
670
671	/* Make sure we have a good EEPROM before we read from it */
672	if (e1000_validate_nvm_checksum(hw) < 0) {
673		/*
674		** Some PCI-E parts fail the first check due to
675		** the link being in sleep state, call it again,
676		** if it fails a second time its a real issue.
677		*/
678		if (e1000_validate_nvm_checksum(hw) < 0) {
679			device_printf(dev,
680			    "The EEPROM Checksum Is Not Valid\n");
681			error = EIO;
682			goto err_late;
683		}
684	}
685
686	/* Copy the permanent MAC address out of the EEPROM */
687	if (e1000_read_mac_addr(hw) < 0) {
688		device_printf(dev, "EEPROM read error while reading MAC"
689		    " address\n");
690		error = EIO;
691		goto err_late;
692	}
693
694	if (!em_is_valid_ether_addr(hw->mac.addr)) {
695		device_printf(dev, "Invalid MAC address\n");
696		error = EIO;
697		goto err_late;
698	}
699
700	/*
701	**  Do interrupt configuration
702	*/
703	if (adapter->msix > 1) /* Do MSIX */
704		error = em_allocate_msix(adapter);
705	else  /* MSI or Legacy */
706		error = em_allocate_legacy(adapter);
707	if (error)
708		goto err_late;
709
710	/*
711	 * Get Wake-on-Lan and Management info for later use
712	 */
713	em_get_wakeup(dev);
714
715	/* Setup OS specific network interface */
716	if (em_setup_interface(dev, adapter) != 0)
717		goto err_late;
718
719	em_reset(adapter);
720
721	/* Initialize statistics */
722	em_update_stats_counters(adapter);
723
724	hw->mac.get_link_status = 1;
725	em_update_link_status(adapter);
726
727	/* Register for VLAN events */
728	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732
733	em_add_hw_stats(adapter);
734
735	/* Non-AMT based hardware can now take control from firmware */
736	if (adapter->has_manage && !adapter->has_amt)
737		em_get_hw_control(adapter);
738
739	/* Tell the stack that the interface is not active */
740	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743	adapter->led_dev = led_create(em_led_func, adapter,
744	    device_get_nameunit(dev));
745#ifdef DEV_NETMAP
746	em_netmap_attach(adapter);
747#endif /* DEV_NETMAP */
748
749	INIT_DEBUGOUT("em_attach: end");
750
751	return (0);
752
753err_late:
754	em_free_transmit_structures(adapter);
755	em_free_receive_structures(adapter);
756	em_release_hw_control(adapter);
757	if (adapter->ifp != NULL)
758		if_free(adapter->ifp);
759err_pci:
760	em_free_pci_resources(adapter);
761	free(adapter->mta, M_DEVBUF);
762	EM_CORE_LOCK_DESTROY(adapter);
763
764	return (error);
765}
766
767/*********************************************************************
768 *  Device removal routine
769 *
770 *  The detach entry point is called when the driver is being removed.
771 *  This routine stops the adapter and deallocates all the resources
772 *  that were allocated for driver operation.
773 *
774 *  return 0 on success, positive on failure
775 *********************************************************************/
776
777static int
778em_detach(device_t dev)
779{
780	struct adapter	*adapter = device_get_softc(dev);
781	struct ifnet	*ifp = adapter->ifp;
782
783	INIT_DEBUGOUT("em_detach: begin");
784
785	/* Make sure VLANS are not using driver */
786	if (adapter->ifp->if_vlantrunk != NULL) {
787		device_printf(dev,"Vlan in use, detach first\n");
788		return (EBUSY);
789	}
790
791#ifdef DEVICE_POLLING
792	if (ifp->if_capenable & IFCAP_POLLING)
793		ether_poll_deregister(ifp);
794#endif
795
796	if (adapter->led_dev != NULL)
797		led_destroy(adapter->led_dev);
798
799	EM_CORE_LOCK(adapter);
800	adapter->in_detach = 1;
801	em_stop(adapter);
802	EM_CORE_UNLOCK(adapter);
803	EM_CORE_LOCK_DESTROY(adapter);
804
805	e1000_phy_hw_reset(&adapter->hw);
806
807	em_release_manageability(adapter);
808	em_release_hw_control(adapter);
809
810	/* Unregister VLAN events */
811	if (adapter->vlan_attach != NULL)
812		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813	if (adapter->vlan_detach != NULL)
814		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815
816	ether_ifdetach(adapter->ifp);
817	callout_drain(&adapter->timer);
818
819#ifdef DEV_NETMAP
820	netmap_detach(ifp);
821#endif /* DEV_NETMAP */
822
823	em_free_pci_resources(adapter);
824	bus_generic_detach(dev);
825	if_free(ifp);
826
827	em_free_transmit_structures(adapter);
828	em_free_receive_structures(adapter);
829
830	em_release_hw_control(adapter);
831	free(adapter->mta, M_DEVBUF);
832
833	return (0);
834}
835
836/*********************************************************************
837 *
838 *  Shutdown entry point
839 *
840 **********************************************************************/
841
842static int
843em_shutdown(device_t dev)
844{
845	return em_suspend(dev);
846}
847
848/*
849 * Suspend/resume device methods.
850 */
851static int
852em_suspend(device_t dev)
853{
854	struct adapter *adapter = device_get_softc(dev);
855
856	EM_CORE_LOCK(adapter);
857
858        em_release_manageability(adapter);
859	em_release_hw_control(adapter);
860	em_enable_wakeup(dev);
861
862	EM_CORE_UNLOCK(adapter);
863
864	return bus_generic_suspend(dev);
865}
866
867static int
868em_resume(device_t dev)
869{
870	struct adapter *adapter = device_get_softc(dev);
871	struct tx_ring	*txr = adapter->tx_rings;
872	struct ifnet *ifp = adapter->ifp;
873
874	EM_CORE_LOCK(adapter);
875	if (adapter->hw.mac.type == e1000_pch2lan)
876		e1000_resume_workarounds_pchlan(&adapter->hw);
877	em_init_locked(adapter);
878	em_init_manageability(adapter);
879
880	if ((ifp->if_flags & IFF_UP) &&
881	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883			EM_TX_LOCK(txr);
884#ifdef EM_MULTIQUEUE
885			if (!drbr_empty(ifp, txr->br))
886				em_mq_start_locked(ifp, txr, NULL);
887#else
888			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889				em_start_locked(ifp, txr);
890#endif
891			EM_TX_UNLOCK(txr);
892		}
893	}
894	EM_CORE_UNLOCK(adapter);
895
896	return bus_generic_resume(dev);
897}
898
899
900#ifdef EM_MULTIQUEUE
901/*********************************************************************
902 *  Multiqueue Transmit routines
903 *
904 *  em_mq_start is called by the stack to initiate a transmit.
905 *  however, if busy the driver can queue the request rather
906 *  than do an immediate send. It is this that is an advantage
907 *  in this driver, rather than also having multiple tx queues.
908 **********************************************************************/
909static int
910em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911{
912	struct adapter  *adapter = txr->adapter;
913        struct mbuf     *next;
914        int             err = 0, enq = 0;
915
916	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918		if (m != NULL)
919			err = drbr_enqueue(ifp, txr->br, m);
920		return (err);
921	}
922
923	enq = 0;
924	if (m != NULL) {
925		err = drbr_enqueue(ifp, txr->br, m);
926		if (err)
927			return (err);
928	}
929
930	/* Process the queue */
931	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932		if ((err = em_xmit(txr, &next)) != 0) {
933			if (next == NULL)
934				drbr_advance(ifp, txr->br);
935			else
936				drbr_putback(ifp, txr->br, next);
937			break;
938		}
939		drbr_advance(ifp, txr->br);
940		enq++;
941		ifp->if_obytes += next->m_pkthdr.len;
942		if (next->m_flags & M_MCAST)
943			ifp->if_omcasts++;
944		ETHER_BPF_MTAP(ifp, next);
945		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                        break;
947	}
948
949	if (enq > 0) {
950                /* Set the watchdog */
951                txr->queue_status = EM_QUEUE_WORKING;
952		txr->watchdog_time = ticks;
953	}
954
955	if (txr->tx_avail < EM_MAX_SCATTER)
956		em_txeof(txr);
957	if (txr->tx_avail < EM_MAX_SCATTER)
958		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959	return (err);
960}
961
962/*
963** Multiqueue capable stack interface
964*/
965static int
966em_mq_start(struct ifnet *ifp, struct mbuf *m)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970	int 		error;
971
972	if (EM_TX_TRYLOCK(txr)) {
973		error = em_mq_start_locked(ifp, txr, m);
974		EM_TX_UNLOCK(txr);
975	} else
976		error = drbr_enqueue(ifp, txr->br, m);
977
978	return (error);
979}
980
981/*
982** Flush all ring buffers
983*/
984static void
985em_qflush(struct ifnet *ifp)
986{
987	struct adapter  *adapter = ifp->if_softc;
988	struct tx_ring  *txr = adapter->tx_rings;
989	struct mbuf     *m;
990
991	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992		EM_TX_LOCK(txr);
993		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994			m_freem(m);
995		EM_TX_UNLOCK(txr);
996	}
997	if_qflush(ifp);
998}
999#else  /* !EM_MULTIQUEUE */
1000
1001static void
1002em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003{
1004	struct adapter	*adapter = ifp->if_softc;
1005	struct mbuf	*m_head;
1006
1007	EM_TX_LOCK_ASSERT(txr);
1008
1009	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010	    IFF_DRV_RUNNING)
1011		return;
1012
1013	if (!adapter->link_active)
1014		return;
1015
1016	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017        	/* Call cleanup if number of TX descriptors low */
1018		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019			em_txeof(txr);
1020		if (txr->tx_avail < EM_MAX_SCATTER) {
1021			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022			break;
1023		}
1024                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025		if (m_head == NULL)
1026			break;
1027		/*
1028		 *  Encapsulation can modify our pointer, and or make it
1029		 *  NULL on failure.  In that event, we can't requeue.
1030		 */
1031		if (em_xmit(txr, &m_head)) {
1032			if (m_head == NULL)
1033				break;
1034			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035			break;
1036		}
1037
1038		/* Send a copy of the frame to the BPF listener */
1039		ETHER_BPF_MTAP(ifp, m_head);
1040
1041		/* Set timeout in case hardware has problems transmitting. */
1042		txr->watchdog_time = ticks;
1043                txr->queue_status = EM_QUEUE_WORKING;
1044	}
1045
1046	return;
1047}
1048
1049static void
1050em_start(struct ifnet *ifp)
1051{
1052	struct adapter	*adapter = ifp->if_softc;
1053	struct tx_ring	*txr = adapter->tx_rings;
1054
1055	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056		EM_TX_LOCK(txr);
1057		em_start_locked(ifp, txr);
1058		EM_TX_UNLOCK(txr);
1059	}
1060	return;
1061}
1062#endif /* EM_MULTIQUEUE */
1063
1064/*********************************************************************
1065 *  Ioctl entry point
1066 *
1067 *  em_ioctl is called when the user wants to configure the
1068 *  interface.
1069 *
1070 *  return 0 on success, positive on failure
1071 **********************************************************************/
1072
1073static int
1074em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075{
1076	struct adapter	*adapter = ifp->if_softc;
1077	struct ifreq	*ifr = (struct ifreq *)data;
1078#if defined(INET) || defined(INET6)
1079	struct ifaddr	*ifa = (struct ifaddr *)data;
1080#endif
1081	bool		avoid_reset = FALSE;
1082	int		error = 0;
1083
1084	if (adapter->in_detach)
1085		return (error);
1086
1087	switch (command) {
1088	case SIOCSIFADDR:
1089#ifdef INET
1090		if (ifa->ifa_addr->sa_family == AF_INET)
1091			avoid_reset = TRUE;
1092#endif
1093#ifdef INET6
1094		if (ifa->ifa_addr->sa_family == AF_INET6)
1095			avoid_reset = TRUE;
1096#endif
1097		/*
1098		** Calling init results in link renegotiation,
1099		** so we avoid doing it when possible.
1100		*/
1101		if (avoid_reset) {
1102			ifp->if_flags |= IFF_UP;
1103			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104				em_init(adapter);
1105#ifdef INET
1106			if (!(ifp->if_flags & IFF_NOARP))
1107				arp_ifinit(ifp, ifa);
1108#endif
1109		} else
1110			error = ether_ioctl(ifp, command, data);
1111		break;
1112	case SIOCSIFMTU:
1113	    {
1114		int max_frame_size;
1115
1116		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118		EM_CORE_LOCK(adapter);
1119		switch (adapter->hw.mac.type) {
1120		case e1000_82571:
1121		case e1000_82572:
1122		case e1000_ich9lan:
1123		case e1000_ich10lan:
1124		case e1000_pch2lan:
1125		case e1000_pch_lpt:
1126		case e1000_82574:
1127		case e1000_82583:
1128		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129			max_frame_size = 9234;
1130			break;
1131		case e1000_pchlan:
1132			max_frame_size = 4096;
1133			break;
1134			/* Adapters that do not support jumbo frames */
1135		case e1000_ich8lan:
1136			max_frame_size = ETHER_MAX_LEN;
1137			break;
1138		default:
1139			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140		}
1141		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142		    ETHER_CRC_LEN) {
1143			EM_CORE_UNLOCK(adapter);
1144			error = EINVAL;
1145			break;
1146		}
1147
1148		ifp->if_mtu = ifr->ifr_mtu;
1149		adapter->hw.mac.max_frame_size =
1150		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151		em_init_locked(adapter);
1152		EM_CORE_UNLOCK(adapter);
1153		break;
1154	    }
1155	case SIOCSIFFLAGS:
1156		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157		    SIOCSIFFLAGS (Set Interface Flags)");
1158		EM_CORE_LOCK(adapter);
1159		if (ifp->if_flags & IFF_UP) {
1160			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161				if ((ifp->if_flags ^ adapter->if_flags) &
1162				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163					em_disable_promisc(adapter);
1164					em_set_promisc(adapter);
1165				}
1166			} else
1167				em_init_locked(adapter);
1168		} else
1169			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170				em_stop(adapter);
1171		adapter->if_flags = ifp->if_flags;
1172		EM_CORE_UNLOCK(adapter);
1173		break;
1174	case SIOCADDMULTI:
1175	case SIOCDELMULTI:
1176		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178			EM_CORE_LOCK(adapter);
1179			em_disable_intr(adapter);
1180			em_set_multi(adapter);
1181#ifdef DEVICE_POLLING
1182			if (!(ifp->if_capenable & IFCAP_POLLING))
1183#endif
1184				em_enable_intr(adapter);
1185			EM_CORE_UNLOCK(adapter);
1186		}
1187		break;
1188	case SIOCSIFMEDIA:
1189		/* Check SOL/IDER usage */
1190		EM_CORE_LOCK(adapter);
1191		if (e1000_check_reset_block(&adapter->hw)) {
1192			EM_CORE_UNLOCK(adapter);
1193			device_printf(adapter->dev, "Media change is"
1194			    " blocked due to SOL/IDER session.\n");
1195			break;
1196		}
1197		EM_CORE_UNLOCK(adapter);
1198		/* falls thru */
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(em_poll, ifp);
1215				if (error)
1216					return (error);
1217				EM_CORE_LOCK(adapter);
1218				em_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				EM_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				EM_CORE_LOCK(adapter);
1225				em_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				EM_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_VLAN_HWTAGGING) {
1240			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWFILTER) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWTSO) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249			reinit = 1;
1250		}
1251		if ((mask & IFCAP_WOL) &&
1252		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253			if (mask & IFCAP_WOL_MCAST)
1254				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255			if (mask & IFCAP_WOL_MAGIC)
1256				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257		}
1258		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259			em_init(adapter);
1260		VLAN_CAPABILITIES(ifp);
1261		break;
1262	    }
1263
1264	default:
1265		error = ether_ioctl(ifp, command, data);
1266		break;
1267	}
1268
1269	return (error);
1270}
1271
1272
1273/*********************************************************************
1274 *  Init entry point
1275 *
1276 *  This routine is used in two ways. It is used by the stack as
1277 *  init entry point in network interface structure. It is also used
1278 *  by the driver as a hw/sw initialization routine to get to a
1279 *  consistent state.
1280 *
1281 *  return 0 on success, positive on failure
1282 **********************************************************************/
1283
1284static void
1285em_init_locked(struct adapter *adapter)
1286{
1287	struct ifnet	*ifp = adapter->ifp;
1288	device_t	dev = adapter->dev;
1289
1290	INIT_DEBUGOUT("em_init: begin");
1291
1292	EM_CORE_LOCK_ASSERT(adapter);
1293
1294	em_disable_intr(adapter);
1295	callout_stop(&adapter->timer);
1296
1297	/* Get the latest mac address, User can use a LAA */
1298        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299              ETHER_ADDR_LEN);
1300
1301	/* Put the address into the Receive Address Array */
1302	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304	/*
1305	 * With the 82571 adapter, RAR[0] may be overwritten
1306	 * when the other port is reset, we make a duplicate
1307	 * in RAR[14] for that eventuality, this assures
1308	 * the interface continues to function.
1309	 */
1310	if (adapter->hw.mac.type == e1000_82571) {
1311		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313		    E1000_RAR_ENTRIES - 1);
1314	}
1315
1316	/* Initialize the hardware */
1317	em_reset(adapter);
1318	em_update_link_status(adapter);
1319
1320	/* Setup VLAN support, basic and offload if available */
1321	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323	/* Set hardware offload abilities */
1324	ifp->if_hwassist = 0;
1325	if (ifp->if_capenable & IFCAP_TXCSUM)
1326		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327	if (ifp->if_capenable & IFCAP_TSO4)
1328		ifp->if_hwassist |= CSUM_TSO;
1329
1330	/* Configure for OS presence */
1331	em_init_manageability(adapter);
1332
1333	/* Prepare transmit descriptors and buffers */
1334	em_setup_transmit_structures(adapter);
1335	em_initialize_transmit_unit(adapter);
1336
1337	/* Setup Multicast table */
1338	em_set_multi(adapter);
1339
1340	/*
1341	** Figure out the desired mbuf
1342	** pool for doing jumbos
1343	*/
1344	if (adapter->hw.mac.max_frame_size <= 2048)
1345		adapter->rx_mbuf_sz = MCLBYTES;
1346	else if (adapter->hw.mac.max_frame_size <= 4096)
1347		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348	else
1349		adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351	/* Prepare receive descriptors and buffers */
1352	if (em_setup_receive_structures(adapter)) {
1353		device_printf(dev, "Could not setup receive structures\n");
1354		em_stop(adapter);
1355		return;
1356	}
1357	em_initialize_receive_unit(adapter);
1358
1359	/* Use real VLAN Filter support? */
1360	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362			/* Use real VLAN Filter support */
1363			em_setup_vlan_hw_support(adapter);
1364		else {
1365			u32 ctrl;
1366			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367			ctrl |= E1000_CTRL_VME;
1368			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369		}
1370	}
1371
1372	/* Don't lose promiscuous settings */
1373	em_set_promisc(adapter);
1374
1375	/* Set the interface as ACTIVE */
1376	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382	/* MSI/X configuration for 82574 */
1383	if (adapter->hw.mac.type == e1000_82574) {
1384		int tmp;
1385		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386		tmp |= E1000_CTRL_EXT_PBA_CLR;
1387		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388		/* Set the IVAR - interrupt vector routing. */
1389		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390	}
1391
1392#ifdef DEVICE_POLLING
1393	/*
1394	 * Only enable interrupts if we are not polling, make sure
1395	 * they are off otherwise.
1396	 */
1397	if (ifp->if_capenable & IFCAP_POLLING)
1398		em_disable_intr(adapter);
1399	else
1400#endif /* DEVICE_POLLING */
1401		em_enable_intr(adapter);
1402
1403	/* AMT based hardware can now take control from firmware */
1404	if (adapter->has_manage && adapter->has_amt)
1405		em_get_hw_control(adapter);
1406}
1407
1408static void
1409em_init(void *arg)
1410{
1411	struct adapter *adapter = arg;
1412
1413	EM_CORE_LOCK(adapter);
1414	em_init_locked(adapter);
1415	EM_CORE_UNLOCK(adapter);
1416}
1417
1418
1419#ifdef DEVICE_POLLING
1420/*********************************************************************
1421 *
1422 *  Legacy polling routine: note this only works with single queue
1423 *
1424 *********************************************************************/
1425static int
1426em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427{
1428	struct adapter *adapter = ifp->if_softc;
1429	struct tx_ring	*txr = adapter->tx_rings;
1430	struct rx_ring	*rxr = adapter->rx_rings;
1431	u32		reg_icr;
1432	int		rx_done;
1433
1434	EM_CORE_LOCK(adapter);
1435	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436		EM_CORE_UNLOCK(adapter);
1437		return (0);
1438	}
1439
1440	if (cmd == POLL_AND_CHECK_STATUS) {
1441		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443			callout_stop(&adapter->timer);
1444			adapter->hw.mac.get_link_status = 1;
1445			em_update_link_status(adapter);
1446			callout_reset(&adapter->timer, hz,
1447			    em_local_timer, adapter);
1448		}
1449	}
1450	EM_CORE_UNLOCK(adapter);
1451
1452	em_rxeof(rxr, count, &rx_done);
1453
1454	EM_TX_LOCK(txr);
1455	em_txeof(txr);
1456#ifdef EM_MULTIQUEUE
1457	if (!drbr_empty(ifp, txr->br))
1458		em_mq_start_locked(ifp, txr, NULL);
1459#else
1460	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461		em_start_locked(ifp, txr);
1462#endif
1463	EM_TX_UNLOCK(txr);
1464
1465	return (rx_done);
1466}
1467#endif /* DEVICE_POLLING */
1468
1469
1470/*********************************************************************
1471 *
1472 *  Fast Legacy/MSI Combined Interrupt Service routine
1473 *
1474 *********************************************************************/
1475static int
1476em_irq_fast(void *arg)
1477{
1478	struct adapter	*adapter = arg;
1479	struct ifnet	*ifp;
1480	u32		reg_icr;
1481
1482	ifp = adapter->ifp;
1483
1484	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486	/* Hot eject?  */
1487	if (reg_icr == 0xffffffff)
1488		return FILTER_STRAY;
1489
1490	/* Definitely not our interrupt.  */
1491	if (reg_icr == 0x0)
1492		return FILTER_STRAY;
1493
1494	/*
1495	 * Starting with the 82571 chip, bit 31 should be used to
1496	 * determine whether the interrupt belongs to us.
1497	 */
1498	if (adapter->hw.mac.type >= e1000_82571 &&
1499	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500		return FILTER_STRAY;
1501
1502	em_disable_intr(adapter);
1503	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505	/* Link status change */
1506	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507		adapter->hw.mac.get_link_status = 1;
1508		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509	}
1510
1511	if (reg_icr & E1000_ICR_RXO)
1512		adapter->rx_overruns++;
1513	return FILTER_HANDLED;
1514}
1515
1516/* Combined RX/TX handler, used by Legacy and MSI */
1517static void
1518em_handle_que(void *context, int pending)
1519{
1520	struct adapter	*adapter = context;
1521	struct ifnet	*ifp = adapter->ifp;
1522	struct tx_ring	*txr = adapter->tx_rings;
1523	struct rx_ring	*rxr = adapter->rx_rings;
1524
1525
1526	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528		EM_TX_LOCK(txr);
1529		em_txeof(txr);
1530#ifdef EM_MULTIQUEUE
1531		if (!drbr_empty(ifp, txr->br))
1532			em_mq_start_locked(ifp, txr, NULL);
1533#else
1534		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535			em_start_locked(ifp, txr);
1536#endif
1537		EM_TX_UNLOCK(txr);
1538		if (more) {
1539			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540			return;
1541		}
1542	}
1543
1544	em_enable_intr(adapter);
1545	return;
1546}
1547
1548
1549/*********************************************************************
1550 *
1551 *  MSIX Interrupt Service Routines
1552 *
1553 **********************************************************************/
1554static void
1555em_msix_tx(void *arg)
1556{
1557	struct tx_ring *txr = arg;
1558	struct adapter *adapter = txr->adapter;
1559	struct ifnet	*ifp = adapter->ifp;
1560
1561	++txr->tx_irq;
1562	EM_TX_LOCK(txr);
1563	em_txeof(txr);
1564#ifdef EM_MULTIQUEUE
1565	if (!drbr_empty(ifp, txr->br))
1566		em_mq_start_locked(ifp, txr, NULL);
1567#else
1568	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569		em_start_locked(ifp, txr);
1570#endif
1571	/* Reenable this interrupt */
1572	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573	EM_TX_UNLOCK(txr);
1574	return;
1575}
1576
1577/*********************************************************************
1578 *
1579 *  MSIX RX Interrupt Service routine
1580 *
1581 **********************************************************************/
1582
1583static void
1584em_msix_rx(void *arg)
1585{
1586	struct rx_ring	*rxr = arg;
1587	struct adapter	*adapter = rxr->adapter;
1588	bool		more;
1589
1590	++rxr->rx_irq;
1591	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592		return;
1593	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594	if (more)
1595		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596	else
1597		/* Reenable this interrupt */
1598		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599	return;
1600}
1601
1602/*********************************************************************
1603 *
1604 *  MSIX Link Fast Interrupt Service routine
1605 *
1606 **********************************************************************/
1607static void
1608em_msix_link(void *arg)
1609{
1610	struct adapter	*adapter = arg;
1611	u32		reg_icr;
1612
1613	++adapter->link_irq;
1614	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617		adapter->hw.mac.get_link_status = 1;
1618		em_handle_link(adapter, 0);
1619	} else
1620		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621		    EM_MSIX_LINK | E1000_IMS_LSC);
1622	return;
1623}
1624
1625static void
1626em_handle_rx(void *context, int pending)
1627{
1628	struct rx_ring	*rxr = context;
1629	struct adapter	*adapter = rxr->adapter;
1630        bool            more;
1631
1632	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633	if (more)
1634		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635	else
1636		/* Reenable this interrupt */
1637		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638}
1639
1640static void
1641em_handle_tx(void *context, int pending)
1642{
1643	struct tx_ring	*txr = context;
1644	struct adapter	*adapter = txr->adapter;
1645	struct ifnet	*ifp = adapter->ifp;
1646
1647	EM_TX_LOCK(txr);
1648	em_txeof(txr);
1649#ifdef EM_MULTIQUEUE
1650	if (!drbr_empty(ifp, txr->br))
1651		em_mq_start_locked(ifp, txr, NULL);
1652#else
1653	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654		em_start_locked(ifp, txr);
1655#endif
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657	EM_TX_UNLOCK(txr);
1658}
1659
1660static void
1661em_handle_link(void *context, int pending)
1662{
1663	struct adapter	*adapter = context;
1664	struct tx_ring	*txr = adapter->tx_rings;
1665	struct ifnet *ifp = adapter->ifp;
1666
1667	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668		return;
1669
1670	EM_CORE_LOCK(adapter);
1671	callout_stop(&adapter->timer);
1672	em_update_link_status(adapter);
1673	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675	    EM_MSIX_LINK | E1000_IMS_LSC);
1676	if (adapter->link_active) {
1677		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678			EM_TX_LOCK(txr);
1679#ifdef EM_MULTIQUEUE
1680			if (!drbr_empty(ifp, txr->br))
1681				em_mq_start_locked(ifp, txr, NULL);
1682#else
1683			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684				em_start_locked(ifp, txr);
1685#endif
1686			EM_TX_UNLOCK(txr);
1687		}
1688	}
1689	EM_CORE_UNLOCK(adapter);
1690}
1691
1692
1693/*********************************************************************
1694 *
1695 *  Media Ioctl callback
1696 *
1697 *  This routine is called whenever the user queries the status of
1698 *  the interface using ifconfig.
1699 *
1700 **********************************************************************/
1701static void
1702em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703{
1704	struct adapter *adapter = ifp->if_softc;
1705	u_char fiber_type = IFM_1000_SX;
1706
1707	INIT_DEBUGOUT("em_media_status: begin");
1708
1709	EM_CORE_LOCK(adapter);
1710	em_update_link_status(adapter);
1711
1712	ifmr->ifm_status = IFM_AVALID;
1713	ifmr->ifm_active = IFM_ETHER;
1714
1715	if (!adapter->link_active) {
1716		EM_CORE_UNLOCK(adapter);
1717		return;
1718	}
1719
1720	ifmr->ifm_status |= IFM_ACTIVE;
1721
1722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725	} else {
1726		switch (adapter->link_speed) {
1727		case 10:
1728			ifmr->ifm_active |= IFM_10_T;
1729			break;
1730		case 100:
1731			ifmr->ifm_active |= IFM_100_TX;
1732			break;
1733		case 1000:
1734			ifmr->ifm_active |= IFM_1000_T;
1735			break;
1736		}
1737		if (adapter->link_duplex == FULL_DUPLEX)
1738			ifmr->ifm_active |= IFM_FDX;
1739		else
1740			ifmr->ifm_active |= IFM_HDX;
1741	}
1742	EM_CORE_UNLOCK(adapter);
1743}
1744
1745/*********************************************************************
1746 *
1747 *  Media Ioctl callback
1748 *
1749 *  This routine is called when the user changes speed/duplex using
1750 *  media/mediopt option with ifconfig.
1751 *
1752 **********************************************************************/
1753static int
1754em_media_change(struct ifnet *ifp)
1755{
1756	struct adapter *adapter = ifp->if_softc;
1757	struct ifmedia  *ifm = &adapter->media;
1758
1759	INIT_DEBUGOUT("em_media_change: begin");
1760
1761	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762		return (EINVAL);
1763
1764	EM_CORE_LOCK(adapter);
1765	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766	case IFM_AUTO:
1767		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769		break;
1770	case IFM_1000_LX:
1771	case IFM_1000_SX:
1772	case IFM_1000_T:
1773		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775		break;
1776	case IFM_100_TX:
1777		adapter->hw.mac.autoneg = FALSE;
1778		adapter->hw.phy.autoneg_advertised = 0;
1779		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781		else
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783		break;
1784	case IFM_10_T:
1785		adapter->hw.mac.autoneg = FALSE;
1786		adapter->hw.phy.autoneg_advertised = 0;
1787		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789		else
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791		break;
1792	default:
1793		device_printf(adapter->dev, "Unsupported media type\n");
1794	}
1795
1796	em_init_locked(adapter);
1797	EM_CORE_UNLOCK(adapter);
1798
1799	return (0);
1800}
1801
1802/*********************************************************************
1803 *
1804 *  This routine maps the mbufs to tx descriptors.
1805 *
1806 *  return 0 on success, positive on failure
1807 **********************************************************************/
1808
1809static int
1810em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811{
1812	struct adapter		*adapter = txr->adapter;
1813	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1814	bus_dmamap_t		map;
1815	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1816	struct e1000_tx_desc	*ctxd = NULL;
1817	struct mbuf		*m_head;
1818	struct ether_header	*eh;
1819	struct ip		*ip = NULL;
1820	struct tcphdr		*tp = NULL;
1821	u32			txd_upper, txd_lower, txd_used, txd_saved;
1822	int			ip_off, poff;
1823	int			nsegs, i, j, first, last = 0;
1824	int			error, do_tso, tso_desc = 0, remap = 1;
1825
1826retry:
1827	m_head = *m_headp;
1828	txd_upper = txd_lower = txd_used = txd_saved = 0;
1829	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830	ip_off = poff = 0;
1831
1832	/*
1833	 * Intel recommends entire IP/TCP header length reside in a single
1834	 * buffer. If multiple descriptors are used to describe the IP and
1835	 * TCP header, each descriptor should describe one or more
1836	 * complete headers; descriptors referencing only parts of headers
1837	 * are not supported. If all layer headers are not coalesced into
1838	 * a single buffer, each buffer should not cross a 4KB boundary,
1839	 * or be larger than the maximum read request size.
1840	 * Controller also requires modifing IP/TCP header to make TSO work
1841	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1842	 * IP/TCP header into a single buffer to meet the requirement of
1843	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1844	 * which also has similiar restrictions.
1845	 */
1846	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847		if (do_tso || (m_head->m_next != NULL &&
1848		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849			if (M_WRITABLE(*m_headp) == 0) {
1850				m_head = m_dup(*m_headp, M_NOWAIT);
1851				m_freem(*m_headp);
1852				if (m_head == NULL) {
1853					*m_headp = NULL;
1854					return (ENOBUFS);
1855				}
1856				*m_headp = m_head;
1857			}
1858		}
1859		/*
1860		 * XXX
1861		 * Assume IPv4, we don't have TSO/checksum offload support
1862		 * for IPv6 yet.
1863		 */
1864		ip_off = sizeof(struct ether_header);
1865		m_head = m_pullup(m_head, ip_off);
1866		if (m_head == NULL) {
1867			*m_headp = NULL;
1868			return (ENOBUFS);
1869		}
1870		eh = mtod(m_head, struct ether_header *);
1871		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872			ip_off = sizeof(struct ether_vlan_header);
1873			m_head = m_pullup(m_head, ip_off);
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878		}
1879		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880		if (m_head == NULL) {
1881			*m_headp = NULL;
1882			return (ENOBUFS);
1883		}
1884		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885		poff = ip_off + (ip->ip_hl << 2);
1886		if (do_tso) {
1887			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888			if (m_head == NULL) {
1889				*m_headp = NULL;
1890				return (ENOBUFS);
1891			}
1892			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893			/*
1894			 * TSO workaround:
1895			 *   pull 4 more bytes of data into it.
1896			 */
1897			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898			if (m_head == NULL) {
1899				*m_headp = NULL;
1900				return (ENOBUFS);
1901			}
1902			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903			ip->ip_len = 0;
1904			ip->ip_sum = 0;
1905			/*
1906			 * The pseudo TCP checksum does not include TCP payload
1907			 * length so driver should recompute the checksum here
1908			 * what hardware expect to see. This is adherence of
1909			 * Microsoft's Large Send specification.
1910			 */
1911			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916			if (m_head == NULL) {
1917				*m_headp = NULL;
1918				return (ENOBUFS);
1919			}
1920			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930			if (m_head == NULL) {
1931				*m_headp = NULL;
1932				return (ENOBUFS);
1933			}
1934			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935		}
1936		*m_headp = m_head;
1937	}
1938
1939	/*
1940	 * Map the packet for DMA
1941	 *
1942	 * Capture the first descriptor index,
1943	 * this descriptor will have the index
1944	 * of the EOP which is the only one that
1945	 * now gets a DONE bit writeback.
1946	 */
1947	first = txr->next_avail_desc;
1948	tx_buffer = &txr->tx_buffers[first];
1949	tx_buffer_mapped = tx_buffer;
1950	map = tx_buffer->map;
1951
1952	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955	/*
1956	 * There are two types of errors we can (try) to handle:
1957	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1958	 *   out of segments.  Defragment the mbuf chain and try again.
1959	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960	 *   at this point in time.  Defer sending and try again later.
1961	 * All other errors, in particular EINVAL, are fatal and prevent the
1962	 * mbuf chain from ever going through.  Drop it and report error.
1963	 */
1964	if (error == EFBIG && remap) {
1965		struct mbuf *m;
1966
1967		m = m_defrag(*m_headp, M_NOWAIT);
1968		if (m == NULL) {
1969			adapter->mbuf_alloc_failed++;
1970			m_freem(*m_headp);
1971			*m_headp = NULL;
1972			return (ENOBUFS);
1973		}
1974		*m_headp = m;
1975
1976		/* Try it again, but only once */
1977		remap = 0;
1978		goto retry;
1979	} else if (error == ENOMEM) {
1980		adapter->no_tx_dma_setup++;
1981		return (error);
1982	} else if (error != 0) {
1983		adapter->no_tx_dma_setup++;
1984		m_freem(*m_headp);
1985		*m_headp = NULL;
1986		return (error);
1987	}
1988
1989	/*
1990	 * TSO Hardware workaround, if this packet is not
1991	 * TSO, and is only a single descriptor long, and
1992	 * it follows a TSO burst, then we need to add a
1993	 * sentinel descriptor to prevent premature writeback.
1994	 */
1995	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996		if (nsegs == 1)
1997			tso_desc = TRUE;
1998		txr->tx_tso = FALSE;
1999	}
2000
2001        if (nsegs > (txr->tx_avail - 2)) {
2002                txr->no_desc_avail++;
2003		bus_dmamap_unload(txr->txtag, map);
2004		return (ENOBUFS);
2005        }
2006	m_head = *m_headp;
2007
2008	/* Do hardware assists */
2009	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010		em_tso_setup(txr, m_head, ip_off, ip, tp,
2011		    &txd_upper, &txd_lower);
2012		/* we need to make a final sentinel transmit desc */
2013		tso_desc = TRUE;
2014	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015		em_transmit_checksum_setup(txr, m_head,
2016		    ip_off, ip, &txd_upper, &txd_lower);
2017
2018	if (m_head->m_flags & M_VLANTAG) {
2019		/* Set the vlan id. */
2020		txd_upper |=
2021		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                /* Tell hardware to add tag */
2023                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024        }
2025
2026	i = txr->next_avail_desc;
2027
2028	/* Set up our transmit descriptors */
2029	for (j = 0; j < nsegs; j++) {
2030		bus_size_t seg_len;
2031		bus_addr_t seg_addr;
2032
2033		tx_buffer = &txr->tx_buffers[i];
2034		ctxd = &txr->tx_base[i];
2035		seg_addr = segs[j].ds_addr;
2036		seg_len  = segs[j].ds_len;
2037		/*
2038		** TSO Workaround:
2039		** If this is the last descriptor, we want to
2040		** split it so we have a small final sentinel
2041		*/
2042		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043			seg_len -= 4;
2044			ctxd->buffer_addr = htole64(seg_addr);
2045			ctxd->lower.data = htole32(
2046			adapter->txd_cmd | txd_lower | seg_len);
2047			ctxd->upper.data =
2048			    htole32(txd_upper);
2049			if (++i == adapter->num_tx_desc)
2050				i = 0;
2051			/* Now make the sentinel */
2052			++txd_used; /* using an extra txd */
2053			ctxd = &txr->tx_base[i];
2054			tx_buffer = &txr->tx_buffers[i];
2055			ctxd->buffer_addr =
2056			    htole64(seg_addr + seg_len);
2057			ctxd->lower.data = htole32(
2058			adapter->txd_cmd | txd_lower | 4);
2059			ctxd->upper.data =
2060			    htole32(txd_upper);
2061			last = i;
2062			if (++i == adapter->num_tx_desc)
2063				i = 0;
2064		} else {
2065			ctxd->buffer_addr = htole64(seg_addr);
2066			ctxd->lower.data = htole32(
2067			adapter->txd_cmd | txd_lower | seg_len);
2068			ctxd->upper.data =
2069			    htole32(txd_upper);
2070			last = i;
2071			if (++i == adapter->num_tx_desc)
2072				i = 0;
2073		}
2074		tx_buffer->m_head = NULL;
2075		tx_buffer->next_eop = -1;
2076	}
2077
2078	txr->next_avail_desc = i;
2079	txr->tx_avail -= nsegs;
2080	if (tso_desc) /* TSO used an extra for sentinel */
2081		txr->tx_avail -= txd_used;
2082
2083        tx_buffer->m_head = m_head;
2084	/*
2085	** Here we swap the map so the last descriptor,
2086	** which gets the completion interrupt has the
2087	** real map, and the first descriptor gets the
2088	** unused map from this descriptor.
2089	*/
2090	tx_buffer_mapped->map = tx_buffer->map;
2091	tx_buffer->map = map;
2092        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094        /*
2095         * Last Descriptor of Packet
2096	 * needs End Of Packet (EOP)
2097	 * and Report Status (RS)
2098         */
2099        ctxd->lower.data |=
2100	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101	/*
2102	 * Keep track in the first buffer which
2103	 * descriptor will be written back
2104	 */
2105	tx_buffer = &txr->tx_buffers[first];
2106	tx_buffer->next_eop = last;
2107	/* Update the watchdog time early and often */
2108	txr->watchdog_time = ticks;
2109
2110	/*
2111	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112	 * that this frame is available to transmit.
2113	 */
2114	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118	return (0);
2119}
2120
2121static void
2122em_set_promisc(struct adapter *adapter)
2123{
2124	struct ifnet	*ifp = adapter->ifp;
2125	u32		reg_rctl;
2126
2127	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129	if (ifp->if_flags & IFF_PROMISC) {
2130		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131		/* Turn this on if you want to see bad packets */
2132		if (em_debug_sbp)
2133			reg_rctl |= E1000_RCTL_SBP;
2134		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135	} else if (ifp->if_flags & IFF_ALLMULTI) {
2136		reg_rctl |= E1000_RCTL_MPE;
2137		reg_rctl &= ~E1000_RCTL_UPE;
2138		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139	}
2140}
2141
2142static void
2143em_disable_promisc(struct adapter *adapter)
2144{
2145	struct ifnet	*ifp = adapter->ifp;
2146	u32		reg_rctl;
2147	int		mcnt = 0;
2148
2149	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150	reg_rctl &=  (~E1000_RCTL_UPE);
2151	if (ifp->if_flags & IFF_ALLMULTI)
2152		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153	else {
2154		struct  ifmultiaddr *ifma;
2155#if __FreeBSD_version < 800000
2156		IF_ADDR_LOCK(ifp);
2157#else
2158		if_maddr_rlock(ifp);
2159#endif
2160		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161			if (ifma->ifma_addr->sa_family != AF_LINK)
2162				continue;
2163			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164				break;
2165			mcnt++;
2166		}
2167#if __FreeBSD_version < 800000
2168		IF_ADDR_UNLOCK(ifp);
2169#else
2170		if_maddr_runlock(ifp);
2171#endif
2172	}
2173	/* Don't disable if in MAX groups */
2174	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175		reg_rctl &=  (~E1000_RCTL_MPE);
2176	reg_rctl &=  (~E1000_RCTL_SBP);
2177	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178}
2179
2180
2181/*********************************************************************
2182 *  Multicast Update
2183 *
2184 *  This routine is called whenever multicast address list is updated.
2185 *
2186 **********************************************************************/
2187
2188static void
2189em_set_multi(struct adapter *adapter)
2190{
2191	struct ifnet	*ifp = adapter->ifp;
2192	struct ifmultiaddr *ifma;
2193	u32 reg_rctl = 0;
2194	u8  *mta; /* Multicast array memory */
2195	int mcnt = 0;
2196
2197	IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199	mta = adapter->mta;
2200	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202	if (adapter->hw.mac.type == e1000_82542 &&
2203	    adapter->hw.revision_id == E1000_REVISION_2) {
2204		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206			e1000_pci_clear_mwi(&adapter->hw);
2207		reg_rctl |= E1000_RCTL_RST;
2208		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209		msec_delay(5);
2210	}
2211
2212#if __FreeBSD_version < 800000
2213	IF_ADDR_LOCK(ifp);
2214#else
2215	if_maddr_rlock(ifp);
2216#endif
2217	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218		if (ifma->ifma_addr->sa_family != AF_LINK)
2219			continue;
2220
2221		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222			break;
2223
2224		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226		mcnt++;
2227	}
2228#if __FreeBSD_version < 800000
2229	IF_ADDR_UNLOCK(ifp);
2230#else
2231	if_maddr_runlock(ifp);
2232#endif
2233	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235		reg_rctl |= E1000_RCTL_MPE;
2236		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237	} else
2238		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240	if (adapter->hw.mac.type == e1000_82542 &&
2241	    adapter->hw.revision_id == E1000_REVISION_2) {
2242		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243		reg_rctl &= ~E1000_RCTL_RST;
2244		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245		msec_delay(5);
2246		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247			e1000_pci_set_mwi(&adapter->hw);
2248	}
2249}
2250
2251
2252/*********************************************************************
2253 *  Timer routine
2254 *
2255 *  This routine checks for link status and updates statistics.
2256 *
2257 **********************************************************************/
2258
2259static void
2260em_local_timer(void *arg)
2261{
2262	struct adapter	*adapter = arg;
2263	struct ifnet	*ifp = adapter->ifp;
2264	struct tx_ring	*txr = adapter->tx_rings;
2265	struct rx_ring	*rxr = adapter->rx_rings;
2266	u32		trigger;
2267
2268	EM_CORE_LOCK_ASSERT(adapter);
2269
2270	em_update_link_status(adapter);
2271	em_update_stats_counters(adapter);
2272
2273	/* Reset LAA into RAR[0] on 82571 */
2274	if ((adapter->hw.mac.type == e1000_82571) &&
2275	    e1000_get_laa_state_82571(&adapter->hw))
2276		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278	/* Mask to use in the irq trigger */
2279	if (adapter->msix_mem)
2280		trigger = rxr->ims;
2281	else
2282		trigger = E1000_ICS_RXDMT0;
2283
2284	/*
2285	** Check on the state of the TX queue(s), this
2286	** can be done without the lock because its RO
2287	** and the HUNG state will be static if set.
2288	*/
2289	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291		    (adapter->pause_frames == 0))
2292			goto hung;
2293		/* Schedule a TX tasklet if needed */
2294		if (txr->tx_avail <= EM_MAX_SCATTER)
2295			taskqueue_enqueue(txr->tq, &txr->tx_task);
2296	}
2297
2298	adapter->pause_frames = 0;
2299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300#ifndef DEVICE_POLLING
2301	/* Trigger an RX interrupt to guarantee mbuf refresh */
2302	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303#endif
2304	return;
2305hung:
2306	/* Looks like we're hung */
2307	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308	device_printf(adapter->dev,
2309	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313	    "Next TX to Clean = %d\n",
2314	    txr->me, txr->tx_avail, txr->next_to_clean);
2315	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316	adapter->watchdog_events++;
2317	adapter->pause_frames = 0;
2318	em_init_locked(adapter);
2319}
2320
2321
2322static void
2323em_update_link_status(struct adapter *adapter)
2324{
2325	struct e1000_hw *hw = &adapter->hw;
2326	struct ifnet *ifp = adapter->ifp;
2327	device_t dev = adapter->dev;
2328	struct tx_ring *txr = adapter->tx_rings;
2329	u32 link_check = 0;
2330
2331	/* Get the cached link value or read phy for real */
2332	switch (hw->phy.media_type) {
2333	case e1000_media_type_copper:
2334		if (hw->mac.get_link_status) {
2335			/* Do the work to read phy */
2336			e1000_check_for_link(hw);
2337			link_check = !hw->mac.get_link_status;
2338			if (link_check) /* ESB2 fix */
2339				e1000_cfg_on_link_up(hw);
2340		} else
2341			link_check = TRUE;
2342		break;
2343	case e1000_media_type_fiber:
2344		e1000_check_for_link(hw);
2345		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                 E1000_STATUS_LU);
2347		break;
2348	case e1000_media_type_internal_serdes:
2349		e1000_check_for_link(hw);
2350		link_check = adapter->hw.mac.serdes_has_link;
2351		break;
2352	default:
2353	case e1000_media_type_unknown:
2354		break;
2355	}
2356
2357	/* Now check for a transition */
2358	if (link_check && (adapter->link_active == 0)) {
2359		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360		    &adapter->link_duplex);
2361		/* Check if we must disable SPEED_MODE bit on PCI-E */
2362		if ((adapter->link_speed != SPEED_1000) &&
2363		    ((hw->mac.type == e1000_82571) ||
2364		    (hw->mac.type == e1000_82572))) {
2365			int tarc0;
2366			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367			tarc0 &= ~SPEED_MODE_BIT;
2368			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369		}
2370		if (bootverbose)
2371			device_printf(dev, "Link is up %d Mbps %s\n",
2372			    adapter->link_speed,
2373			    ((adapter->link_duplex == FULL_DUPLEX) ?
2374			    "Full Duplex" : "Half Duplex"));
2375		adapter->link_active = 1;
2376		adapter->smartspeed = 0;
2377		ifp->if_baudrate = adapter->link_speed * 1000000;
2378		if_link_state_change(ifp, LINK_STATE_UP);
2379	} else if (!link_check && (adapter->link_active == 1)) {
2380		ifp->if_baudrate = adapter->link_speed = 0;
2381		adapter->link_duplex = 0;
2382		if (bootverbose)
2383			device_printf(dev, "Link is Down\n");
2384		adapter->link_active = 0;
2385		/* Link down, disable watchdog */
2386		for (int i = 0; i < adapter->num_queues; i++, txr++)
2387			txr->queue_status = EM_QUEUE_IDLE;
2388		if_link_state_change(ifp, LINK_STATE_DOWN);
2389	}
2390}
2391
2392/*********************************************************************
2393 *
2394 *  This routine disables all traffic on the adapter by issuing a
2395 *  global reset on the MAC and deallocates TX/RX buffers.
2396 *
2397 *  This routine should always be called with BOTH the CORE
2398 *  and TX locks.
2399 **********************************************************************/
2400
2401static void
2402em_stop(void *arg)
2403{
2404	struct adapter	*adapter = arg;
2405	struct ifnet	*ifp = adapter->ifp;
2406	struct tx_ring	*txr = adapter->tx_rings;
2407
2408	EM_CORE_LOCK_ASSERT(adapter);
2409
2410	INIT_DEBUGOUT("em_stop: begin");
2411
2412	em_disable_intr(adapter);
2413	callout_stop(&adapter->timer);
2414
2415	/* Tell the stack that the interface is no longer active */
2416	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419        /* Unarm watchdog timer. */
2420	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421		EM_TX_LOCK(txr);
2422		txr->queue_status = EM_QUEUE_IDLE;
2423		EM_TX_UNLOCK(txr);
2424	}
2425
2426	e1000_reset_hw(&adapter->hw);
2427	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429	e1000_led_off(&adapter->hw);
2430	e1000_cleanup_led(&adapter->hw);
2431}
2432
2433
2434/*********************************************************************
2435 *
2436 *  Determine hardware revision.
2437 *
2438 **********************************************************************/
2439static void
2440em_identify_hardware(struct adapter *adapter)
2441{
2442	device_t dev = adapter->dev;
2443
2444	/* Make sure our PCI config space has the necessary stuff set */
2445	pci_enable_busmaster(dev);
2446	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2447
2448	/* Save off the information about this board */
2449	adapter->hw.vendor_id = pci_get_vendor(dev);
2450	adapter->hw.device_id = pci_get_device(dev);
2451	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2452	adapter->hw.subsystem_vendor_id =
2453	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2454	adapter->hw.subsystem_device_id =
2455	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2456
2457	/* Do Shared Code Init and Setup */
2458	if (e1000_set_mac_type(&adapter->hw)) {
2459		device_printf(dev, "Setup init failure\n");
2460		return;
2461	}
2462}
2463
2464static int
2465em_allocate_pci_resources(struct adapter *adapter)
2466{
2467	device_t	dev = adapter->dev;
2468	int		rid;
2469
2470	rid = PCIR_BAR(0);
2471	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2472	    &rid, RF_ACTIVE);
2473	if (adapter->memory == NULL) {
2474		device_printf(dev, "Unable to allocate bus resource: memory\n");
2475		return (ENXIO);
2476	}
2477	adapter->osdep.mem_bus_space_tag =
2478	    rman_get_bustag(adapter->memory);
2479	adapter->osdep.mem_bus_space_handle =
2480	    rman_get_bushandle(adapter->memory);
2481	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2482
2483	/* Default to a single queue */
2484	adapter->num_queues = 1;
2485
2486	/*
2487	 * Setup MSI/X or MSI if PCI Express
2488	 */
2489	adapter->msix = em_setup_msix(adapter);
2490
2491	adapter->hw.back = &adapter->osdep;
2492
2493	return (0);
2494}
2495
2496/*********************************************************************
2497 *
2498 *  Setup the Legacy or MSI Interrupt handler
2499 *
2500 **********************************************************************/
2501int
2502em_allocate_legacy(struct adapter *adapter)
2503{
2504	device_t dev = adapter->dev;
2505	struct tx_ring	*txr = adapter->tx_rings;
2506	int error, rid = 0;
2507
2508	/* Manually turn off all interrupts */
2509	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510
2511	if (adapter->msix == 1) /* using MSI */
2512		rid = 1;
2513	/* We allocate a single interrupt resource */
2514	adapter->res = bus_alloc_resource_any(dev,
2515	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516	if (adapter->res == NULL) {
2517		device_printf(dev, "Unable to allocate bus resource: "
2518		    "interrupt\n");
2519		return (ENXIO);
2520	}
2521
2522	/*
2523	 * Allocate a fast interrupt and the associated
2524	 * deferred processing contexts.
2525	 */
2526	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528	    taskqueue_thread_enqueue, &adapter->tq);
2529	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530	    device_get_nameunit(adapter->dev));
2531	/* Use a TX only tasklet for local timer */
2532	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534	    taskqueue_thread_enqueue, &txr->tq);
2535	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536	    device_get_nameunit(adapter->dev));
2537	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540		device_printf(dev, "Failed to register fast interrupt "
2541			    "handler: %d\n", error);
2542		taskqueue_free(adapter->tq);
2543		adapter->tq = NULL;
2544		return (error);
2545	}
2546
2547	return (0);
2548}
2549
2550/*********************************************************************
2551 *
2552 *  Setup the MSIX Interrupt handlers
2553 *   This is not really Multiqueue, rather
2554 *   its just seperate interrupt vectors
2555 *   for TX, RX, and Link.
2556 *
2557 **********************************************************************/
2558int
2559em_allocate_msix(struct adapter *adapter)
2560{
2561	device_t	dev = adapter->dev;
2562	struct		tx_ring *txr = adapter->tx_rings;
2563	struct		rx_ring *rxr = adapter->rx_rings;
2564	int		error, rid, vector = 0;
2565
2566
2567	/* Make sure all interrupts are disabled */
2568	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2569
2570	/* First set up ring resources */
2571	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2572
2573		/* RX ring */
2574		rid = vector + 1;
2575
2576		rxr->res = bus_alloc_resource_any(dev,
2577		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2578		if (rxr->res == NULL) {
2579			device_printf(dev,
2580			    "Unable to allocate bus resource: "
2581			    "RX MSIX Interrupt %d\n", i);
2582			return (ENXIO);
2583		}
2584		if ((error = bus_setup_intr(dev, rxr->res,
2585		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2586		    rxr, &rxr->tag)) != 0) {
2587			device_printf(dev, "Failed to register RX handler");
2588			return (error);
2589		}
2590#if __FreeBSD_version >= 800504
2591		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2592#endif
2593		rxr->msix = vector++; /* NOTE increment vector for TX */
2594		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2595		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2596		    taskqueue_thread_enqueue, &rxr->tq);
2597		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2598		    device_get_nameunit(adapter->dev));
2599		/*
2600		** Set the bit to enable interrupt
2601		** in E1000_IMS -- bits 20 and 21
2602		** are for RX0 and RX1, note this has
2603		** NOTHING to do with the MSIX vector
2604		*/
2605		rxr->ims = 1 << (20 + i);
2606		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2607
2608		/* TX ring */
2609		rid = vector + 1;
2610		txr->res = bus_alloc_resource_any(dev,
2611		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2612		if (txr->res == NULL) {
2613			device_printf(dev,
2614			    "Unable to allocate bus resource: "
2615			    "TX MSIX Interrupt %d\n", i);
2616			return (ENXIO);
2617		}
2618		if ((error = bus_setup_intr(dev, txr->res,
2619		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2620		    txr, &txr->tag)) != 0) {
2621			device_printf(dev, "Failed to register TX handler");
2622			return (error);
2623		}
2624#if __FreeBSD_version >= 800504
2625		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2626#endif
2627		txr->msix = vector++; /* Increment vector for next pass */
2628		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2629		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2630		    taskqueue_thread_enqueue, &txr->tq);
2631		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2632		    device_get_nameunit(adapter->dev));
2633		/*
2634		** Set the bit to enable interrupt
2635		** in E1000_IMS -- bits 22 and 23
2636		** are for TX0 and TX1, note this has
2637		** NOTHING to do with the MSIX vector
2638		*/
2639		txr->ims = 1 << (22 + i);
2640		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2641	}
2642
2643	/* Link interrupt */
2644	++rid;
2645	adapter->res = bus_alloc_resource_any(dev,
2646	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2647	if (!adapter->res) {
2648		device_printf(dev,"Unable to allocate "
2649		    "bus resource: Link interrupt [%d]\n", rid);
2650		return (ENXIO);
2651        }
2652	/* Set the link handler function */
2653	error = bus_setup_intr(dev, adapter->res,
2654	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2655	    em_msix_link, adapter, &adapter->tag);
2656	if (error) {
2657		adapter->res = NULL;
2658		device_printf(dev, "Failed to register LINK handler");
2659		return (error);
2660	}
2661#if __FreeBSD_version >= 800504
2662		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2663#endif
2664	adapter->linkvec = vector;
2665	adapter->ivars |=  (8 | vector) << 16;
2666	adapter->ivars |= 0x80000000;
2667
2668	return (0);
2669}
2670
2671
2672static void
2673em_free_pci_resources(struct adapter *adapter)
2674{
2675	device_t	dev = adapter->dev;
2676	struct tx_ring	*txr;
2677	struct rx_ring	*rxr;
2678	int		rid;
2679
2680
2681	/*
2682	** Release all the queue interrupt resources:
2683	*/
2684	for (int i = 0; i < adapter->num_queues; i++) {
2685		txr = &adapter->tx_rings[i];
2686		rxr = &adapter->rx_rings[i];
2687		/* an early abort? */
2688		if ((txr == NULL) || (rxr == NULL))
2689			break;
2690		rid = txr->msix +1;
2691		if (txr->tag != NULL) {
2692			bus_teardown_intr(dev, txr->res, txr->tag);
2693			txr->tag = NULL;
2694		}
2695		if (txr->res != NULL)
2696			bus_release_resource(dev, SYS_RES_IRQ,
2697			    rid, txr->res);
2698		rid = rxr->msix +1;
2699		if (rxr->tag != NULL) {
2700			bus_teardown_intr(dev, rxr->res, rxr->tag);
2701			rxr->tag = NULL;
2702		}
2703		if (rxr->res != NULL)
2704			bus_release_resource(dev, SYS_RES_IRQ,
2705			    rid, rxr->res);
2706	}
2707
2708        if (adapter->linkvec) /* we are doing MSIX */
2709                rid = adapter->linkvec + 1;
2710        else
2711                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2712
2713	if (adapter->tag != NULL) {
2714		bus_teardown_intr(dev, adapter->res, adapter->tag);
2715		adapter->tag = NULL;
2716	}
2717
2718	if (adapter->res != NULL)
2719		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2720
2721
2722	if (adapter->msix)
2723		pci_release_msi(dev);
2724
2725	if (adapter->msix_mem != NULL)
2726		bus_release_resource(dev, SYS_RES_MEMORY,
2727		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2728
2729	if (adapter->memory != NULL)
2730		bus_release_resource(dev, SYS_RES_MEMORY,
2731		    PCIR_BAR(0), adapter->memory);
2732
2733	if (adapter->flash != NULL)
2734		bus_release_resource(dev, SYS_RES_MEMORY,
2735		    EM_FLASH, adapter->flash);
2736}
2737
2738/*
2739 * Setup MSI or MSI/X
2740 */
2741static int
2742em_setup_msix(struct adapter *adapter)
2743{
2744	device_t dev = adapter->dev;
2745	int val;
2746
2747	/*
2748	** Setup MSI/X for Hartwell: tests have shown
2749	** use of two queues to be unstable, and to
2750	** provide no great gain anyway, so we simply
2751	** seperate the interrupts and use a single queue.
2752	*/
2753	if ((adapter->hw.mac.type == e1000_82574) &&
2754	    (em_enable_msix == TRUE)) {
2755		/* Map the MSIX BAR */
2756		int rid = PCIR_BAR(EM_MSIX_BAR);
2757		adapter->msix_mem = bus_alloc_resource_any(dev,
2758		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2759       		if (adapter->msix_mem == NULL) {
2760			/* May not be enabled */
2761               		device_printf(adapter->dev,
2762			    "Unable to map MSIX table \n");
2763			goto msi;
2764       		}
2765		val = pci_msix_count(dev);
2766		/* We only need/want 3 vectors */
2767		if (val >= 3)
2768			val = 3;
2769		else {
2770               		device_printf(adapter->dev,
2771			    "MSIX: insufficient vectors, using MSI\n");
2772			goto msi;
2773		}
2774
2775		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2776			device_printf(adapter->dev,
2777			    "Using MSIX interrupts "
2778			    "with %d vectors\n", val);
2779			return (val);
2780		}
2781
2782		/*
2783		** If MSIX alloc failed or provided us with
2784		** less than needed, free and fall through to MSI
2785		*/
2786		pci_release_msi(dev);
2787	}
2788msi:
2789	if (adapter->msix_mem != NULL) {
2790		bus_release_resource(dev, SYS_RES_MEMORY,
2791		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2792		adapter->msix_mem = NULL;
2793	}
2794       	val = 1;
2795       	if (pci_alloc_msi(dev, &val) == 0) {
2796               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2797		return (val);
2798	}
2799	/* Should only happen due to manual configuration */
2800	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2801	return (0);
2802}
2803
2804
2805/*********************************************************************
2806 *
2807 *  Initialize the hardware to a configuration
2808 *  as specified by the adapter structure.
2809 *
2810 **********************************************************************/
2811static void
2812em_reset(struct adapter *adapter)
2813{
2814	device_t	dev = adapter->dev;
2815	struct ifnet	*ifp = adapter->ifp;
2816	struct e1000_hw	*hw = &adapter->hw;
2817	u16		rx_buffer_size;
2818	u32		pba;
2819
2820	INIT_DEBUGOUT("em_reset: begin");
2821
2822	/* Set up smart power down as default off on newer adapters. */
2823	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2824	    hw->mac.type == e1000_82572)) {
2825		u16 phy_tmp = 0;
2826
2827		/* Speed up time to link by disabling smart power down. */
2828		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2829		phy_tmp &= ~IGP02E1000_PM_SPD;
2830		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2831	}
2832
2833	/*
2834	 * Packet Buffer Allocation (PBA)
2835	 * Writing PBA sets the receive portion of the buffer
2836	 * the remainder is used for the transmit buffer.
2837	 */
2838	switch (hw->mac.type) {
2839	/* Total Packet Buffer on these is 48K */
2840	case e1000_82571:
2841	case e1000_82572:
2842	case e1000_80003es2lan:
2843			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2844		break;
2845	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2846			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2847		break;
2848	case e1000_82574:
2849	case e1000_82583:
2850			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2851		break;
2852	case e1000_ich8lan:
2853		pba = E1000_PBA_8K;
2854		break;
2855	case e1000_ich9lan:
2856	case e1000_ich10lan:
2857		/* Boost Receive side for jumbo frames */
2858		if (adapter->hw.mac.max_frame_size > 4096)
2859			pba = E1000_PBA_14K;
2860		else
2861			pba = E1000_PBA_10K;
2862		break;
2863	case e1000_pchlan:
2864	case e1000_pch2lan:
2865	case e1000_pch_lpt:
2866		pba = E1000_PBA_26K;
2867		break;
2868	default:
2869		if (adapter->hw.mac.max_frame_size > 8192)
2870			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2871		else
2872			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2873	}
2874	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2875
2876	/*
2877	 * These parameters control the automatic generation (Tx) and
2878	 * response (Rx) to Ethernet PAUSE frames.
2879	 * - High water mark should allow for at least two frames to be
2880	 *   received after sending an XOFF.
2881	 * - Low water mark works best when it is very near the high water mark.
2882	 *   This allows the receiver to restart by sending XON when it has
2883	 *   drained a bit. Here we use an arbitary value of 1500 which will
2884	 *   restart after one full frame is pulled from the buffer. There
2885	 *   could be several smaller frames in the buffer and if so they will
2886	 *   not trigger the XON until their total number reduces the buffer
2887	 *   by 1500.
2888	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2889	 */
2890	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2891	hw->fc.high_water = rx_buffer_size -
2892	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2893	hw->fc.low_water = hw->fc.high_water - 1500;
2894
2895	if (adapter->fc) /* locally set flow control value? */
2896		hw->fc.requested_mode = adapter->fc;
2897	else
2898		hw->fc.requested_mode = e1000_fc_full;
2899
2900	if (hw->mac.type == e1000_80003es2lan)
2901		hw->fc.pause_time = 0xFFFF;
2902	else
2903		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2904
2905	hw->fc.send_xon = TRUE;
2906
2907	/* Device specific overrides/settings */
2908	switch (hw->mac.type) {
2909	case e1000_pchlan:
2910		/* Workaround: no TX flow ctrl for PCH */
2911                hw->fc.requested_mode = e1000_fc_rx_pause;
2912		hw->fc.pause_time = 0xFFFF; /* override */
2913		if (ifp->if_mtu > ETHERMTU) {
2914			hw->fc.high_water = 0x3500;
2915			hw->fc.low_water = 0x1500;
2916		} else {
2917			hw->fc.high_water = 0x5000;
2918			hw->fc.low_water = 0x3000;
2919		}
2920		hw->fc.refresh_time = 0x1000;
2921		break;
2922	case e1000_pch2lan:
2923	case e1000_pch_lpt:
2924		hw->fc.high_water = 0x5C20;
2925		hw->fc.low_water = 0x5048;
2926		hw->fc.pause_time = 0x0650;
2927		hw->fc.refresh_time = 0x0400;
2928		/* Jumbos need adjusted PBA */
2929		if (ifp->if_mtu > ETHERMTU)
2930			E1000_WRITE_REG(hw, E1000_PBA, 12);
2931		else
2932			E1000_WRITE_REG(hw, E1000_PBA, 26);
2933		break;
2934        case e1000_ich9lan:
2935        case e1000_ich10lan:
2936		if (ifp->if_mtu > ETHERMTU) {
2937			hw->fc.high_water = 0x2800;
2938			hw->fc.low_water = hw->fc.high_water - 8;
2939			break;
2940		}
2941		/* else fall thru */
2942	default:
2943		if (hw->mac.type == e1000_80003es2lan)
2944			hw->fc.pause_time = 0xFFFF;
2945		break;
2946	}
2947
2948	/* Issue a global reset */
2949	e1000_reset_hw(hw);
2950	E1000_WRITE_REG(hw, E1000_WUC, 0);
2951	em_disable_aspm(adapter);
2952	/* and a re-init */
2953	if (e1000_init_hw(hw) < 0) {
2954		device_printf(dev, "Hardware Initialization Failed\n");
2955		return;
2956	}
2957
2958	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2959	e1000_get_phy_info(hw);
2960	e1000_check_for_link(hw);
2961	return;
2962}
2963
2964/*********************************************************************
2965 *
2966 *  Setup networking device structure and register an interface.
2967 *
2968 **********************************************************************/
2969static int
2970em_setup_interface(device_t dev, struct adapter *adapter)
2971{
2972	struct ifnet   *ifp;
2973
2974	INIT_DEBUGOUT("em_setup_interface: begin");
2975
2976	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2977	if (ifp == NULL) {
2978		device_printf(dev, "can not allocate ifnet structure\n");
2979		return (-1);
2980	}
2981	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2982	ifp->if_init =  em_init;
2983	ifp->if_softc = adapter;
2984	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2985	ifp->if_ioctl = em_ioctl;
2986#ifdef EM_MULTIQUEUE
2987	/* Multiqueue stack interface */
2988	ifp->if_transmit = em_mq_start;
2989	ifp->if_qflush = em_qflush;
2990#else
2991	ifp->if_start = em_start;
2992	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2993	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2994	IFQ_SET_READY(&ifp->if_snd);
2995#endif
2996
2997	ether_ifattach(ifp, adapter->hw.mac.addr);
2998
2999	ifp->if_capabilities = ifp->if_capenable = 0;
3000
3001
3002	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3003	ifp->if_capabilities |= IFCAP_TSO4;
3004	/*
3005	 * Tell the upper layer(s) we
3006	 * support full VLAN capability
3007	 */
3008	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3009	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3010			     |  IFCAP_VLAN_HWTSO
3011			     |  IFCAP_VLAN_MTU;
3012	ifp->if_capenable = ifp->if_capabilities;
3013
3014	/*
3015	** Don't turn this on by default, if vlans are
3016	** created on another pseudo device (eg. lagg)
3017	** then vlan events are not passed thru, breaking
3018	** operation, but with HW FILTER off it works. If
3019	** using vlans directly on the em driver you can
3020	** enable this and get full hardware tag filtering.
3021	*/
3022	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3023
3024#ifdef DEVICE_POLLING
3025	ifp->if_capabilities |= IFCAP_POLLING;
3026#endif
3027
3028	/* Enable only WOL MAGIC by default */
3029	if (adapter->wol) {
3030		ifp->if_capabilities |= IFCAP_WOL;
3031		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3032	}
3033
3034	/*
3035	 * Specify the media types supported by this adapter and register
3036	 * callbacks to update media and link information
3037	 */
3038	ifmedia_init(&adapter->media, IFM_IMASK,
3039	    em_media_change, em_media_status);
3040	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3041	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3042		u_char fiber_type = IFM_1000_SX;	/* default type */
3043
3044		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3045			    0, NULL);
3046		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3047	} else {
3048		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3049		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3050			    0, NULL);
3051		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3052			    0, NULL);
3053		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3054			    0, NULL);
3055		if (adapter->hw.phy.type != e1000_phy_ife) {
3056			ifmedia_add(&adapter->media,
3057				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3058			ifmedia_add(&adapter->media,
3059				IFM_ETHER | IFM_1000_T, 0, NULL);
3060		}
3061	}
3062	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3063	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3064	return (0);
3065}
3066
3067
3068/*
3069 * Manage DMA'able memory.
3070 */
3071static void
3072em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3073{
3074	if (error)
3075		return;
3076	*(bus_addr_t *) arg = segs[0].ds_addr;
3077}
3078
3079static int
3080em_dma_malloc(struct adapter *adapter, bus_size_t size,
3081        struct em_dma_alloc *dma, int mapflags)
3082{
3083	int error;
3084
3085	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3086				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3087				BUS_SPACE_MAXADDR,	/* lowaddr */
3088				BUS_SPACE_MAXADDR,	/* highaddr */
3089				NULL, NULL,		/* filter, filterarg */
3090				size,			/* maxsize */
3091				1,			/* nsegments */
3092				size,			/* maxsegsize */
3093				0,			/* flags */
3094				NULL,			/* lockfunc */
3095				NULL,			/* lockarg */
3096				&dma->dma_tag);
3097	if (error) {
3098		device_printf(adapter->dev,
3099		    "%s: bus_dma_tag_create failed: %d\n",
3100		    __func__, error);
3101		goto fail_0;
3102	}
3103
3104	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3105	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3106	if (error) {
3107		device_printf(adapter->dev,
3108		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3109		    __func__, (uintmax_t)size, error);
3110		goto fail_2;
3111	}
3112
3113	dma->dma_paddr = 0;
3114	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3115	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3116	if (error || dma->dma_paddr == 0) {
3117		device_printf(adapter->dev,
3118		    "%s: bus_dmamap_load failed: %d\n",
3119		    __func__, error);
3120		goto fail_3;
3121	}
3122
3123	return (0);
3124
3125fail_3:
3126	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3127fail_2:
3128	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3129	bus_dma_tag_destroy(dma->dma_tag);
3130fail_0:
3131	dma->dma_map = NULL;
3132	dma->dma_tag = NULL;
3133
3134	return (error);
3135}
3136
3137static void
3138em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3139{
3140	if (dma->dma_tag == NULL)
3141		return;
3142	if (dma->dma_map != NULL) {
3143		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3144		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3145		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3146		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3147		dma->dma_map = NULL;
3148	}
3149	bus_dma_tag_destroy(dma->dma_tag);
3150	dma->dma_tag = NULL;
3151}
3152
3153
3154/*********************************************************************
3155 *
3156 *  Allocate memory for the transmit and receive rings, and then
3157 *  the descriptors associated with each, called only once at attach.
3158 *
3159 **********************************************************************/
3160static int
3161em_allocate_queues(struct adapter *adapter)
3162{
3163	device_t		dev = adapter->dev;
3164	struct tx_ring		*txr = NULL;
3165	struct rx_ring		*rxr = NULL;
3166	int rsize, tsize, error = E1000_SUCCESS;
3167	int txconf = 0, rxconf = 0;
3168
3169
3170	/* Allocate the TX ring struct memory */
3171	if (!(adapter->tx_rings =
3172	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3173	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3174		device_printf(dev, "Unable to allocate TX ring memory\n");
3175		error = ENOMEM;
3176		goto fail;
3177	}
3178
3179	/* Now allocate the RX */
3180	if (!(adapter->rx_rings =
3181	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3182	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183		device_printf(dev, "Unable to allocate RX ring memory\n");
3184		error = ENOMEM;
3185		goto rx_fail;
3186	}
3187
3188	tsize = roundup2(adapter->num_tx_desc *
3189	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3190	/*
3191	 * Now set up the TX queues, txconf is needed to handle the
3192	 * possibility that things fail midcourse and we need to
3193	 * undo memory gracefully
3194	 */
3195	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3196		/* Set up some basics */
3197		txr = &adapter->tx_rings[i];
3198		txr->adapter = adapter;
3199		txr->me = i;
3200
3201		/* Initialize the TX lock */
3202		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3203		    device_get_nameunit(dev), txr->me);
3204		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3205
3206		if (em_dma_malloc(adapter, tsize,
3207			&txr->txdma, BUS_DMA_NOWAIT)) {
3208			device_printf(dev,
3209			    "Unable to allocate TX Descriptor memory\n");
3210			error = ENOMEM;
3211			goto err_tx_desc;
3212		}
3213		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3214		bzero((void *)txr->tx_base, tsize);
3215
3216        	if (em_allocate_transmit_buffers(txr)) {
3217			device_printf(dev,
3218			    "Critical Failure setting up transmit buffers\n");
3219			error = ENOMEM;
3220			goto err_tx_desc;
3221        	}
3222#if __FreeBSD_version >= 800000
3223		/* Allocate a buf ring */
3224		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3225		    M_WAITOK, &txr->tx_mtx);
3226#endif
3227	}
3228
3229	/*
3230	 * Next the RX queues...
3231	 */
3232	rsize = roundup2(adapter->num_rx_desc *
3233	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3234	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3235		rxr = &adapter->rx_rings[i];
3236		rxr->adapter = adapter;
3237		rxr->me = i;
3238
3239		/* Initialize the RX lock */
3240		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3241		    device_get_nameunit(dev), txr->me);
3242		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3243
3244		if (em_dma_malloc(adapter, rsize,
3245			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3246			device_printf(dev,
3247			    "Unable to allocate RxDescriptor memory\n");
3248			error = ENOMEM;
3249			goto err_rx_desc;
3250		}
3251		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3252		bzero((void *)rxr->rx_base, rsize);
3253
3254        	/* Allocate receive buffers for the ring*/
3255		if (em_allocate_receive_buffers(rxr)) {
3256			device_printf(dev,
3257			    "Critical Failure setting up receive buffers\n");
3258			error = ENOMEM;
3259			goto err_rx_desc;
3260		}
3261	}
3262
3263	return (0);
3264
3265err_rx_desc:
3266	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3267		em_dma_free(adapter, &rxr->rxdma);
3268err_tx_desc:
3269	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3270		em_dma_free(adapter, &txr->txdma);
3271	free(adapter->rx_rings, M_DEVBUF);
3272rx_fail:
3273#if __FreeBSD_version >= 800000
3274	buf_ring_free(txr->br, M_DEVBUF);
3275#endif
3276	free(adapter->tx_rings, M_DEVBUF);
3277fail:
3278	return (error);
3279}
3280
3281
3282/*********************************************************************
3283 *
3284 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3285 *  the information needed to transmit a packet on the wire. This is
3286 *  called only once at attach, setup is done every reset.
3287 *
3288 **********************************************************************/
3289static int
3290em_allocate_transmit_buffers(struct tx_ring *txr)
3291{
3292	struct adapter *adapter = txr->adapter;
3293	device_t dev = adapter->dev;
3294	struct em_buffer *txbuf;
3295	int error, i;
3296
3297	/*
3298	 * Setup DMA descriptor areas.
3299	 */
3300	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3301			       1, 0,			/* alignment, bounds */
3302			       BUS_SPACE_MAXADDR,	/* lowaddr */
3303			       BUS_SPACE_MAXADDR,	/* highaddr */
3304			       NULL, NULL,		/* filter, filterarg */
3305			       EM_TSO_SIZE,		/* maxsize */
3306			       EM_MAX_SCATTER,		/* nsegments */
3307			       PAGE_SIZE,		/* maxsegsize */
3308			       0,			/* flags */
3309			       NULL,			/* lockfunc */
3310			       NULL,			/* lockfuncarg */
3311			       &txr->txtag))) {
3312		device_printf(dev,"Unable to allocate TX DMA tag\n");
3313		goto fail;
3314	}
3315
3316	if (!(txr->tx_buffers =
3317	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3318	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3319		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3320		error = ENOMEM;
3321		goto fail;
3322	}
3323
3324        /* Create the descriptor buffer dma maps */
3325	txbuf = txr->tx_buffers;
3326	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3328		if (error != 0) {
3329			device_printf(dev, "Unable to create TX DMA map\n");
3330			goto fail;
3331		}
3332	}
3333
3334	return 0;
3335fail:
3336	/* We free all, it handles case where we are in the middle */
3337	em_free_transmit_structures(adapter);
3338	return (error);
3339}
3340
3341/*********************************************************************
3342 *
3343 *  Initialize a transmit ring.
3344 *
3345 **********************************************************************/
3346static void
3347em_setup_transmit_ring(struct tx_ring *txr)
3348{
3349	struct adapter *adapter = txr->adapter;
3350	struct em_buffer *txbuf;
3351	int i;
3352#ifdef DEV_NETMAP
3353	struct netmap_adapter *na = NA(adapter->ifp);
3354	struct netmap_slot *slot;
3355#endif /* DEV_NETMAP */
3356
3357	/* Clear the old descriptor contents */
3358	EM_TX_LOCK(txr);
3359#ifdef DEV_NETMAP
3360	slot = netmap_reset(na, NR_TX, txr->me, 0);
3361#endif /* DEV_NETMAP */
3362
3363	bzero((void *)txr->tx_base,
3364	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3365	/* Reset indices */
3366	txr->next_avail_desc = 0;
3367	txr->next_to_clean = 0;
3368
3369	/* Free any existing tx buffers. */
3370        txbuf = txr->tx_buffers;
3371	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3372		if (txbuf->m_head != NULL) {
3373			bus_dmamap_sync(txr->txtag, txbuf->map,
3374			    BUS_DMASYNC_POSTWRITE);
3375			bus_dmamap_unload(txr->txtag, txbuf->map);
3376			m_freem(txbuf->m_head);
3377			txbuf->m_head = NULL;
3378		}
3379#ifdef DEV_NETMAP
3380		if (slot) {
3381			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3382			uint64_t paddr;
3383			void *addr;
3384
3385			addr = PNMB(slot + si, &paddr);
3386			txr->tx_base[i].buffer_addr = htole64(paddr);
3387			/* reload the map for netmap mode */
3388			netmap_load_map(txr->txtag, txbuf->map, addr);
3389		}
3390#endif /* DEV_NETMAP */
3391
3392		/* clear the watch index */
3393		txbuf->next_eop = -1;
3394        }
3395
3396	/* Set number of descriptors available */
3397	txr->tx_avail = adapter->num_tx_desc;
3398	txr->queue_status = EM_QUEUE_IDLE;
3399
3400	/* Clear checksum offload context. */
3401	txr->last_hw_offload = 0;
3402	txr->last_hw_ipcss = 0;
3403	txr->last_hw_ipcso = 0;
3404	txr->last_hw_tucss = 0;
3405	txr->last_hw_tucso = 0;
3406
3407	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3408	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3409	EM_TX_UNLOCK(txr);
3410}
3411
3412/*********************************************************************
3413 *
3414 *  Initialize all transmit rings.
3415 *
3416 **********************************************************************/
3417static void
3418em_setup_transmit_structures(struct adapter *adapter)
3419{
3420	struct tx_ring *txr = adapter->tx_rings;
3421
3422	for (int i = 0; i < adapter->num_queues; i++, txr++)
3423		em_setup_transmit_ring(txr);
3424
3425	return;
3426}
3427
3428/*********************************************************************
3429 *
3430 *  Enable transmit unit.
3431 *
3432 **********************************************************************/
3433static void
3434em_initialize_transmit_unit(struct adapter *adapter)
3435{
3436	struct tx_ring	*txr = adapter->tx_rings;
3437	struct e1000_hw	*hw = &adapter->hw;
3438	u32	tctl, tarc, tipg = 0;
3439
3440	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3441
3442	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3443		u64 bus_addr = txr->txdma.dma_paddr;
3444		/* Base and Len of TX Ring */
3445		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3446	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3447		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3448	    	    (u32)(bus_addr >> 32));
3449		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3450	    	    (u32)bus_addr);
3451		/* Init the HEAD/TAIL indices */
3452		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3453		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3454
3455		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3456		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3457		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3458
3459		txr->queue_status = EM_QUEUE_IDLE;
3460	}
3461
3462	/* Set the default values for the Tx Inter Packet Gap timer */
3463	switch (adapter->hw.mac.type) {
3464	case e1000_80003es2lan:
3465		tipg = DEFAULT_82543_TIPG_IPGR1;
3466		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3467		    E1000_TIPG_IPGR2_SHIFT;
3468		break;
3469	default:
3470		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3471		    (adapter->hw.phy.media_type ==
3472		    e1000_media_type_internal_serdes))
3473			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3474		else
3475			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3476		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3477		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3478	}
3479
3480	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3481	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3482
3483	if(adapter->hw.mac.type >= e1000_82540)
3484		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3485		    adapter->tx_abs_int_delay.value);
3486
3487	if ((adapter->hw.mac.type == e1000_82571) ||
3488	    (adapter->hw.mac.type == e1000_82572)) {
3489		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3490		tarc |= SPEED_MODE_BIT;
3491		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3492	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3493		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3494		tarc |= 1;
3495		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3496		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3497		tarc |= 1;
3498		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3499	}
3500
3501	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3502	if (adapter->tx_int_delay.value > 0)
3503		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3504
3505	/* Program the Transmit Control Register */
3506	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3507	tctl &= ~E1000_TCTL_CT;
3508	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3509		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3510
3511	if (adapter->hw.mac.type >= e1000_82571)
3512		tctl |= E1000_TCTL_MULR;
3513
3514	/* This write will effectively turn on the transmit unit. */
3515	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3516
3517}
3518
3519
3520/*********************************************************************
3521 *
3522 *  Free all transmit rings.
3523 *
3524 **********************************************************************/
3525static void
3526em_free_transmit_structures(struct adapter *adapter)
3527{
3528	struct tx_ring *txr = adapter->tx_rings;
3529
3530	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3531		EM_TX_LOCK(txr);
3532		em_free_transmit_buffers(txr);
3533		em_dma_free(adapter, &txr->txdma);
3534		EM_TX_UNLOCK(txr);
3535		EM_TX_LOCK_DESTROY(txr);
3536	}
3537
3538	free(adapter->tx_rings, M_DEVBUF);
3539}
3540
3541/*********************************************************************
3542 *
3543 *  Free transmit ring related data structures.
3544 *
3545 **********************************************************************/
3546static void
3547em_free_transmit_buffers(struct tx_ring *txr)
3548{
3549	struct adapter		*adapter = txr->adapter;
3550	struct em_buffer	*txbuf;
3551
3552	INIT_DEBUGOUT("free_transmit_ring: begin");
3553
3554	if (txr->tx_buffers == NULL)
3555		return;
3556
3557	for (int i = 0; i < adapter->num_tx_desc; i++) {
3558		txbuf = &txr->tx_buffers[i];
3559		if (txbuf->m_head != NULL) {
3560			bus_dmamap_sync(txr->txtag, txbuf->map,
3561			    BUS_DMASYNC_POSTWRITE);
3562			bus_dmamap_unload(txr->txtag,
3563			    txbuf->map);
3564			m_freem(txbuf->m_head);
3565			txbuf->m_head = NULL;
3566			if (txbuf->map != NULL) {
3567				bus_dmamap_destroy(txr->txtag,
3568				    txbuf->map);
3569				txbuf->map = NULL;
3570			}
3571		} else if (txbuf->map != NULL) {
3572			bus_dmamap_unload(txr->txtag,
3573			    txbuf->map);
3574			bus_dmamap_destroy(txr->txtag,
3575			    txbuf->map);
3576			txbuf->map = NULL;
3577		}
3578	}
3579#if __FreeBSD_version >= 800000
3580	if (txr->br != NULL)
3581		buf_ring_free(txr->br, M_DEVBUF);
3582#endif
3583	if (txr->tx_buffers != NULL) {
3584		free(txr->tx_buffers, M_DEVBUF);
3585		txr->tx_buffers = NULL;
3586	}
3587	if (txr->txtag != NULL) {
3588		bus_dma_tag_destroy(txr->txtag);
3589		txr->txtag = NULL;
3590	}
3591	return;
3592}
3593
3594
3595/*********************************************************************
3596 *  The offload context is protocol specific (TCP/UDP) and thus
3597 *  only needs to be set when the protocol changes. The occasion
3598 *  of a context change can be a performance detriment, and
3599 *  might be better just disabled. The reason arises in the way
3600 *  in which the controller supports pipelined requests from the
3601 *  Tx data DMA. Up to four requests can be pipelined, and they may
3602 *  belong to the same packet or to multiple packets. However all
3603 *  requests for one packet are issued before a request is issued
3604 *  for a subsequent packet and if a request for the next packet
3605 *  requires a context change, that request will be stalled
3606 *  until the previous request completes. This means setting up
3607 *  a new context effectively disables pipelined Tx data DMA which
3608 *  in turn greatly slow down performance to send small sized
3609 *  frames.
3610 **********************************************************************/
3611static void
3612em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3613    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3614{
3615	struct adapter			*adapter = txr->adapter;
3616	struct e1000_context_desc	*TXD = NULL;
3617	struct em_buffer		*tx_buffer;
3618	int				cur, hdr_len;
3619	u32				cmd = 0;
3620	u16				offload = 0;
3621	u8				ipcso, ipcss, tucso, tucss;
3622
3623	ipcss = ipcso = tucss = tucso = 0;
3624	hdr_len = ip_off + (ip->ip_hl << 2);
3625	cur = txr->next_avail_desc;
3626
3627	/* Setup of IP header checksum. */
3628	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3629		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3630		offload |= CSUM_IP;
3631		ipcss = ip_off;
3632		ipcso = ip_off + offsetof(struct ip, ip_sum);
3633		/*
3634		 * Start offset for header checksum calculation.
3635		 * End offset for header checksum calculation.
3636		 * Offset of place to put the checksum.
3637		 */
3638		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3639		TXD->lower_setup.ip_fields.ipcss = ipcss;
3640		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3641		TXD->lower_setup.ip_fields.ipcso = ipcso;
3642		cmd |= E1000_TXD_CMD_IP;
3643	}
3644
3645	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3646 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3647 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3648 		offload |= CSUM_TCP;
3649 		tucss = hdr_len;
3650 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3651 		/*
3652 		 * Setting up new checksum offload context for every frames
3653 		 * takes a lot of processing time for hardware. This also
3654 		 * reduces performance a lot for small sized frames so avoid
3655 		 * it if driver can use previously configured checksum
3656 		 * offload context.
3657 		 */
3658 		if (txr->last_hw_offload == offload) {
3659 			if (offload & CSUM_IP) {
3660 				if (txr->last_hw_ipcss == ipcss &&
3661 				    txr->last_hw_ipcso == ipcso &&
3662 				    txr->last_hw_tucss == tucss &&
3663 				    txr->last_hw_tucso == tucso)
3664 					return;
3665 			} else {
3666 				if (txr->last_hw_tucss == tucss &&
3667 				    txr->last_hw_tucso == tucso)
3668 					return;
3669 			}
3670  		}
3671 		txr->last_hw_offload = offload;
3672 		txr->last_hw_tucss = tucss;
3673 		txr->last_hw_tucso = tucso;
3674 		/*
3675 		 * Start offset for payload checksum calculation.
3676 		 * End offset for payload checksum calculation.
3677 		 * Offset of place to put the checksum.
3678 		 */
3679		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3680 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3681 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3682 		TXD->upper_setup.tcp_fields.tucso = tucso;
3683 		cmd |= E1000_TXD_CMD_TCP;
3684 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3685 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3686 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3687 		tucss = hdr_len;
3688 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3689 		/*
3690 		 * Setting up new checksum offload context for every frames
3691 		 * takes a lot of processing time for hardware. This also
3692 		 * reduces performance a lot for small sized frames so avoid
3693 		 * it if driver can use previously configured checksum
3694 		 * offload context.
3695 		 */
3696 		if (txr->last_hw_offload == offload) {
3697 			if (offload & CSUM_IP) {
3698 				if (txr->last_hw_ipcss == ipcss &&
3699 				    txr->last_hw_ipcso == ipcso &&
3700 				    txr->last_hw_tucss == tucss &&
3701 				    txr->last_hw_tucso == tucso)
3702 					return;
3703 			} else {
3704 				if (txr->last_hw_tucss == tucss &&
3705 				    txr->last_hw_tucso == tucso)
3706 					return;
3707 			}
3708 		}
3709 		txr->last_hw_offload = offload;
3710 		txr->last_hw_tucss = tucss;
3711 		txr->last_hw_tucso = tucso;
3712 		/*
3713 		 * Start offset for header checksum calculation.
3714 		 * End offset for header checksum calculation.
3715 		 * Offset of place to put the checksum.
3716 		 */
3717		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3718 		TXD->upper_setup.tcp_fields.tucss = tucss;
3719 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3720 		TXD->upper_setup.tcp_fields.tucso = tucso;
3721  	}
3722
3723 	if (offload & CSUM_IP) {
3724 		txr->last_hw_ipcss = ipcss;
3725 		txr->last_hw_ipcso = ipcso;
3726  	}
3727
3728	TXD->tcp_seg_setup.data = htole32(0);
3729	TXD->cmd_and_length =
3730	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3731	tx_buffer = &txr->tx_buffers[cur];
3732	tx_buffer->m_head = NULL;
3733	tx_buffer->next_eop = -1;
3734
3735	if (++cur == adapter->num_tx_desc)
3736		cur = 0;
3737
3738	txr->tx_avail--;
3739	txr->next_avail_desc = cur;
3740}
3741
3742
3743/**********************************************************************
3744 *
3745 *  Setup work for hardware segmentation offload (TSO)
3746 *
3747 **********************************************************************/
3748static void
3749em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3750    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3751{
3752	struct adapter			*adapter = txr->adapter;
3753	struct e1000_context_desc	*TXD;
3754	struct em_buffer		*tx_buffer;
3755	int cur, hdr_len;
3756
3757	/*
3758	 * In theory we can use the same TSO context if and only if
3759	 * frame is the same type(IP/TCP) and the same MSS. However
3760	 * checking whether a frame has the same IP/TCP structure is
3761	 * hard thing so just ignore that and always restablish a
3762	 * new TSO context.
3763	 */
3764	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3765	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3766		      E1000_TXD_DTYP_D |	/* Data descr type */
3767		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3768
3769	/* IP and/or TCP header checksum calculation and insertion. */
3770	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3771
3772	cur = txr->next_avail_desc;
3773	tx_buffer = &txr->tx_buffers[cur];
3774	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3775
3776	/*
3777	 * Start offset for header checksum calculation.
3778	 * End offset for header checksum calculation.
3779	 * Offset of place put the checksum.
3780	 */
3781	TXD->lower_setup.ip_fields.ipcss = ip_off;
3782	TXD->lower_setup.ip_fields.ipcse =
3783	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3784	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3785	/*
3786	 * Start offset for payload checksum calculation.
3787	 * End offset for payload checksum calculation.
3788	 * Offset of place to put the checksum.
3789	 */
3790	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3791	TXD->upper_setup.tcp_fields.tucse = 0;
3792	TXD->upper_setup.tcp_fields.tucso =
3793	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3794	/*
3795	 * Payload size per packet w/o any headers.
3796	 * Length of all headers up to payload.
3797	 */
3798	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3799	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3800
3801	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3802				E1000_TXD_CMD_DEXT |	/* Extended descr */
3803				E1000_TXD_CMD_TSE |	/* TSE context */
3804				E1000_TXD_CMD_IP |	/* Do IP csum */
3805				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3806				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3807
3808	tx_buffer->m_head = NULL;
3809	tx_buffer->next_eop = -1;
3810
3811	if (++cur == adapter->num_tx_desc)
3812		cur = 0;
3813
3814	txr->tx_avail--;
3815	txr->next_avail_desc = cur;
3816	txr->tx_tso = TRUE;
3817}
3818
3819
3820/**********************************************************************
3821 *
3822 *  Examine each tx_buffer in the used queue. If the hardware is done
3823 *  processing the packet then free associated resources. The
3824 *  tx_buffer is put back on the free queue.
3825 *
3826 **********************************************************************/
3827static void
3828em_txeof(struct tx_ring *txr)
3829{
3830	struct adapter	*adapter = txr->adapter;
3831        int first, last, done, processed;
3832        struct em_buffer *tx_buffer;
3833        struct e1000_tx_desc   *tx_desc, *eop_desc;
3834	struct ifnet   *ifp = adapter->ifp;
3835
3836	EM_TX_LOCK_ASSERT(txr);
3837#ifdef DEV_NETMAP
3838	if (netmap_tx_irq(ifp, txr->me))
3839		return;
3840#endif /* DEV_NETMAP */
3841
3842	/* No work, make sure watchdog is off */
3843        if (txr->tx_avail == adapter->num_tx_desc) {
3844		txr->queue_status = EM_QUEUE_IDLE;
3845                return;
3846	}
3847
3848	processed = 0;
3849        first = txr->next_to_clean;
3850        tx_desc = &txr->tx_base[first];
3851        tx_buffer = &txr->tx_buffers[first];
3852	last = tx_buffer->next_eop;
3853        eop_desc = &txr->tx_base[last];
3854
3855	/*
3856	 * What this does is get the index of the
3857	 * first descriptor AFTER the EOP of the
3858	 * first packet, that way we can do the
3859	 * simple comparison on the inner while loop.
3860	 */
3861	if (++last == adapter->num_tx_desc)
3862 		last = 0;
3863	done = last;
3864
3865        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3866            BUS_DMASYNC_POSTREAD);
3867
3868        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3869		/* We clean the range of the packet */
3870		while (first != done) {
3871                	tx_desc->upper.data = 0;
3872                	tx_desc->lower.data = 0;
3873                	tx_desc->buffer_addr = 0;
3874                	++txr->tx_avail;
3875			++processed;
3876
3877			if (tx_buffer->m_head) {
3878				bus_dmamap_sync(txr->txtag,
3879				    tx_buffer->map,
3880				    BUS_DMASYNC_POSTWRITE);
3881				bus_dmamap_unload(txr->txtag,
3882				    tx_buffer->map);
3883                        	m_freem(tx_buffer->m_head);
3884                        	tx_buffer->m_head = NULL;
3885                	}
3886			tx_buffer->next_eop = -1;
3887			txr->watchdog_time = ticks;
3888
3889	                if (++first == adapter->num_tx_desc)
3890				first = 0;
3891
3892	                tx_buffer = &txr->tx_buffers[first];
3893			tx_desc = &txr->tx_base[first];
3894		}
3895		++ifp->if_opackets;
3896		/* See if we can continue to the next packet */
3897		last = tx_buffer->next_eop;
3898		if (last != -1) {
3899        		eop_desc = &txr->tx_base[last];
3900			/* Get new done point */
3901			if (++last == adapter->num_tx_desc) last = 0;
3902			done = last;
3903		} else
3904			break;
3905        }
3906        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3907            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3908
3909        txr->next_to_clean = first;
3910
3911	/*
3912	** Watchdog calculation, we know there's
3913	** work outstanding or the first return
3914	** would have been taken, so none processed
3915	** for too long indicates a hang. local timer
3916	** will examine this and do a reset if needed.
3917	*/
3918	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3919		txr->queue_status = EM_QUEUE_HUNG;
3920
3921        /*
3922         * If we have a minimum free, clear IFF_DRV_OACTIVE
3923         * to tell the stack that it is OK to send packets.
3924	 * Notice that all writes of OACTIVE happen under the
3925	 * TX lock which, with a single queue, guarantees
3926	 * sanity.
3927         */
3928        if (txr->tx_avail >= EM_MAX_SCATTER)
3929		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3930
3931	/* Disable watchdog if all clean */
3932	if (txr->tx_avail == adapter->num_tx_desc) {
3933		txr->queue_status = EM_QUEUE_IDLE;
3934	}
3935}
3936
3937
3938/*********************************************************************
3939 *
3940 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3941 *
3942 **********************************************************************/
3943static void
3944em_refresh_mbufs(struct rx_ring *rxr, int limit)
3945{
3946	struct adapter		*adapter = rxr->adapter;
3947	struct mbuf		*m;
3948	bus_dma_segment_t	segs[1];
3949	struct em_buffer	*rxbuf;
3950	int			i, j, error, nsegs;
3951	bool			cleaned = FALSE;
3952
3953	i = j = rxr->next_to_refresh;
3954	/*
3955	** Get one descriptor beyond
3956	** our work mark to control
3957	** the loop.
3958	*/
3959	if (++j == adapter->num_rx_desc)
3960		j = 0;
3961
3962	while (j != limit) {
3963		rxbuf = &rxr->rx_buffers[i];
3964		if (rxbuf->m_head == NULL) {
3965			m = m_getjcl(M_NOWAIT, MT_DATA,
3966			    M_PKTHDR, adapter->rx_mbuf_sz);
3967			/*
3968			** If we have a temporary resource shortage
3969			** that causes a failure, just abort refresh
3970			** for now, we will return to this point when
3971			** reinvoked from em_rxeof.
3972			*/
3973			if (m == NULL)
3974				goto update;
3975		} else
3976			m = rxbuf->m_head;
3977
3978		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3979		m->m_flags |= M_PKTHDR;
3980		m->m_data = m->m_ext.ext_buf;
3981
3982		/* Use bus_dma machinery to setup the memory mapping  */
3983		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3984		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3985		if (error != 0) {
3986			printf("Refresh mbufs: hdr dmamap load"
3987			    " failure - %d\n", error);
3988			m_free(m);
3989			rxbuf->m_head = NULL;
3990			goto update;
3991		}
3992		rxbuf->m_head = m;
3993		bus_dmamap_sync(rxr->rxtag,
3994		    rxbuf->map, BUS_DMASYNC_PREREAD);
3995		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3996		cleaned = TRUE;
3997
3998		i = j; /* Next is precalulated for us */
3999		rxr->next_to_refresh = i;
4000		/* Calculate next controlling index */
4001		if (++j == adapter->num_rx_desc)
4002			j = 0;
4003	}
4004update:
4005	/*
4006	** Update the tail pointer only if,
4007	** and as far as we have refreshed.
4008	*/
4009	if (cleaned)
4010		E1000_WRITE_REG(&adapter->hw,
4011		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4012
4013	return;
4014}
4015
4016
4017/*********************************************************************
4018 *
4019 *  Allocate memory for rx_buffer structures. Since we use one
4020 *  rx_buffer per received packet, the maximum number of rx_buffer's
4021 *  that we'll need is equal to the number of receive descriptors
4022 *  that we've allocated.
4023 *
4024 **********************************************************************/
4025static int
4026em_allocate_receive_buffers(struct rx_ring *rxr)
4027{
4028	struct adapter		*adapter = rxr->adapter;
4029	device_t		dev = adapter->dev;
4030	struct em_buffer	*rxbuf;
4031	int			error;
4032
4033	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4034	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4035	if (rxr->rx_buffers == NULL) {
4036		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4037		return (ENOMEM);
4038	}
4039
4040	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4041				1, 0,			/* alignment, bounds */
4042				BUS_SPACE_MAXADDR,	/* lowaddr */
4043				BUS_SPACE_MAXADDR,	/* highaddr */
4044				NULL, NULL,		/* filter, filterarg */
4045				MJUM9BYTES,		/* maxsize */
4046				1,			/* nsegments */
4047				MJUM9BYTES,		/* maxsegsize */
4048				0,			/* flags */
4049				NULL,			/* lockfunc */
4050				NULL,			/* lockarg */
4051				&rxr->rxtag);
4052	if (error) {
4053		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4054		    __func__, error);
4055		goto fail;
4056	}
4057
4058	rxbuf = rxr->rx_buffers;
4059	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4060		rxbuf = &rxr->rx_buffers[i];
4061		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4062		if (error) {
4063			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4064			    __func__, error);
4065			goto fail;
4066		}
4067	}
4068
4069	return (0);
4070
4071fail:
4072	em_free_receive_structures(adapter);
4073	return (error);
4074}
4075
4076
4077/*********************************************************************
4078 *
4079 *  Initialize a receive ring and its buffers.
4080 *
4081 **********************************************************************/
4082static int
4083em_setup_receive_ring(struct rx_ring *rxr)
4084{
4085	struct	adapter 	*adapter = rxr->adapter;
4086	struct em_buffer	*rxbuf;
4087	bus_dma_segment_t	seg[1];
4088	int			rsize, nsegs, error = 0;
4089#ifdef DEV_NETMAP
4090	struct netmap_adapter *na = NA(adapter->ifp);
4091	struct netmap_slot *slot;
4092#endif
4093
4094
4095	/* Clear the ring contents */
4096	EM_RX_LOCK(rxr);
4097	rsize = roundup2(adapter->num_rx_desc *
4098	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4099	bzero((void *)rxr->rx_base, rsize);
4100#ifdef DEV_NETMAP
4101	slot = netmap_reset(na, NR_RX, 0, 0);
4102#endif
4103
4104	/*
4105	** Free current RX buffer structs and their mbufs
4106	*/
4107	for (int i = 0; i < adapter->num_rx_desc; i++) {
4108		rxbuf = &rxr->rx_buffers[i];
4109		if (rxbuf->m_head != NULL) {
4110			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4111			    BUS_DMASYNC_POSTREAD);
4112			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4113			m_freem(rxbuf->m_head);
4114			rxbuf->m_head = NULL; /* mark as freed */
4115		}
4116	}
4117
4118	/* Now replenish the mbufs */
4119        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4120		rxbuf = &rxr->rx_buffers[j];
4121#ifdef DEV_NETMAP
4122		if (slot) {
4123			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4124			uint64_t paddr;
4125			void *addr;
4126
4127			addr = PNMB(slot + si, &paddr);
4128			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4129			/* Update descriptor */
4130			rxr->rx_base[j].buffer_addr = htole64(paddr);
4131			continue;
4132		}
4133#endif /* DEV_NETMAP */
4134		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4135		    M_PKTHDR, adapter->rx_mbuf_sz);
4136		if (rxbuf->m_head == NULL) {
4137			error = ENOBUFS;
4138			goto fail;
4139		}
4140		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4141		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4142		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4143
4144		/* Get the memory mapping */
4145		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4146		    rxbuf->map, rxbuf->m_head, seg,
4147		    &nsegs, BUS_DMA_NOWAIT);
4148		if (error != 0) {
4149			m_freem(rxbuf->m_head);
4150			rxbuf->m_head = NULL;
4151			goto fail;
4152		}
4153		bus_dmamap_sync(rxr->rxtag,
4154		    rxbuf->map, BUS_DMASYNC_PREREAD);
4155
4156		/* Update descriptor */
4157		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4158	}
4159	rxr->next_to_check = 0;
4160	rxr->next_to_refresh = 0;
4161	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4162	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4163
4164fail:
4165	EM_RX_UNLOCK(rxr);
4166	return (error);
4167}
4168
4169/*********************************************************************
4170 *
4171 *  Initialize all receive rings.
4172 *
4173 **********************************************************************/
4174static int
4175em_setup_receive_structures(struct adapter *adapter)
4176{
4177	struct rx_ring *rxr = adapter->rx_rings;
4178	int q;
4179
4180	for (q = 0; q < adapter->num_queues; q++, rxr++)
4181		if (em_setup_receive_ring(rxr))
4182			goto fail;
4183
4184	return (0);
4185fail:
4186	/*
4187	 * Free RX buffers allocated so far, we will only handle
4188	 * the rings that completed, the failing case will have
4189	 * cleaned up for itself. 'q' failed, so its the terminus.
4190	 */
4191	for (int i = 0; i < q; ++i) {
4192		rxr = &adapter->rx_rings[i];
4193		for (int n = 0; n < adapter->num_rx_desc; n++) {
4194			struct em_buffer *rxbuf;
4195			rxbuf = &rxr->rx_buffers[n];
4196			if (rxbuf->m_head != NULL) {
4197				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4198			  	  BUS_DMASYNC_POSTREAD);
4199				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4200				m_freem(rxbuf->m_head);
4201				rxbuf->m_head = NULL;
4202			}
4203		}
4204		rxr->next_to_check = 0;
4205		rxr->next_to_refresh = 0;
4206	}
4207
4208	return (ENOBUFS);
4209}
4210
4211/*********************************************************************
4212 *
4213 *  Free all receive rings.
4214 *
4215 **********************************************************************/
4216static void
4217em_free_receive_structures(struct adapter *adapter)
4218{
4219	struct rx_ring *rxr = adapter->rx_rings;
4220
4221	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4222		em_free_receive_buffers(rxr);
4223		/* Free the ring memory as well */
4224		em_dma_free(adapter, &rxr->rxdma);
4225		EM_RX_LOCK_DESTROY(rxr);
4226	}
4227
4228	free(adapter->rx_rings, M_DEVBUF);
4229}
4230
4231
4232/*********************************************************************
4233 *
4234 *  Free receive ring data structures
4235 *
4236 **********************************************************************/
4237static void
4238em_free_receive_buffers(struct rx_ring *rxr)
4239{
4240	struct adapter		*adapter = rxr->adapter;
4241	struct em_buffer	*rxbuf = NULL;
4242
4243	INIT_DEBUGOUT("free_receive_buffers: begin");
4244
4245	if (rxr->rx_buffers != NULL) {
4246		for (int i = 0; i < adapter->num_rx_desc; i++) {
4247			rxbuf = &rxr->rx_buffers[i];
4248			if (rxbuf->map != NULL) {
4249				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4250				    BUS_DMASYNC_POSTREAD);
4251				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4252				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4253			}
4254			if (rxbuf->m_head != NULL) {
4255				m_freem(rxbuf->m_head);
4256				rxbuf->m_head = NULL;
4257			}
4258		}
4259		free(rxr->rx_buffers, M_DEVBUF);
4260		rxr->rx_buffers = NULL;
4261		rxr->next_to_check = 0;
4262		rxr->next_to_refresh = 0;
4263	}
4264
4265	if (rxr->rxtag != NULL) {
4266		bus_dma_tag_destroy(rxr->rxtag);
4267		rxr->rxtag = NULL;
4268	}
4269
4270	return;
4271}
4272
4273
4274/*********************************************************************
4275 *
4276 *  Enable receive unit.
4277 *
4278 **********************************************************************/
4279
4280static void
4281em_initialize_receive_unit(struct adapter *adapter)
4282{
4283	struct rx_ring	*rxr = adapter->rx_rings;
4284	struct ifnet	*ifp = adapter->ifp;
4285	struct e1000_hw	*hw = &adapter->hw;
4286	u64	bus_addr;
4287	u32	rctl, rxcsum;
4288
4289	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4290
4291	/*
4292	 * Make sure receives are disabled while setting
4293	 * up the descriptor ring
4294	 */
4295	rctl = E1000_READ_REG(hw, E1000_RCTL);
4296	/* Do not disable if ever enabled on this hardware */
4297	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4298		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4299
4300	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4301	    adapter->rx_abs_int_delay.value);
4302	/*
4303	 * Set the interrupt throttling rate. Value is calculated
4304	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4305	 */
4306	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4307
4308	/*
4309	** When using MSIX interrupts we need to throttle
4310	** using the EITR register (82574 only)
4311	*/
4312	if (hw->mac.type == e1000_82574) {
4313		for (int i = 0; i < 4; i++)
4314			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4315			    DEFAULT_ITR);
4316		/* Disable accelerated acknowledge */
4317		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4318	}
4319
4320	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4321	if (ifp->if_capenable & IFCAP_RXCSUM)
4322		rxcsum |= E1000_RXCSUM_TUOFL;
4323	else
4324		rxcsum &= ~E1000_RXCSUM_TUOFL;
4325	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4326
4327	/*
4328	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4329	** long latencies are observed, like Lenovo X60. This
4330	** change eliminates the problem, but since having positive
4331	** values in RDTR is a known source of problems on other
4332	** platforms another solution is being sought.
4333	*/
4334	if (hw->mac.type == e1000_82573)
4335		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4336
4337	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4338		/* Setup the Base and Length of the Rx Descriptor Ring */
4339		u32 rdt = adapter->num_rx_desc - 1; /* default */
4340
4341		bus_addr = rxr->rxdma.dma_paddr;
4342		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4343		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4344		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4345		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4346		/* Setup the Head and Tail Descriptor Pointers */
4347		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4348#ifdef DEV_NETMAP
4349		/*
4350		 * an init() while a netmap client is active must
4351		 * preserve the rx buffers passed to userspace.
4352		 */
4353		if (ifp->if_capenable & IFCAP_NETMAP)
4354			rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4355#endif /* DEV_NETMAP */
4356		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4357	}
4358
4359	/* Set PTHRESH for improved jumbo performance */
4360	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4361	    (adapter->hw.mac.type == e1000_pch2lan) ||
4362	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4363	    (ifp->if_mtu > ETHERMTU)) {
4364		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4365		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4366	}
4367
4368	if (adapter->hw.mac.type >= e1000_pch2lan) {
4369		if (ifp->if_mtu > ETHERMTU)
4370			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4371		else
4372			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4373	}
4374
4375	/* Setup the Receive Control Register */
4376	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4377	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4378	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4379	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4380
4381        /* Strip the CRC */
4382        rctl |= E1000_RCTL_SECRC;
4383
4384        /* Make sure VLAN Filters are off */
4385        rctl &= ~E1000_RCTL_VFE;
4386	rctl &= ~E1000_RCTL_SBP;
4387
4388	if (adapter->rx_mbuf_sz == MCLBYTES)
4389		rctl |= E1000_RCTL_SZ_2048;
4390	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4391		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4392	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4393		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4394
4395	if (ifp->if_mtu > ETHERMTU)
4396		rctl |= E1000_RCTL_LPE;
4397	else
4398		rctl &= ~E1000_RCTL_LPE;
4399
4400	/* Write out the settings */
4401	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4402
4403	return;
4404}
4405
4406
4407/*********************************************************************
4408 *
4409 *  This routine executes in interrupt context. It replenishes
4410 *  the mbufs in the descriptor and sends data which has been
4411 *  dma'ed into host memory to upper layer.
4412 *
4413 *  We loop at most count times if count is > 0, or until done if
4414 *  count < 0.
4415 *
4416 *  For polling we also now return the number of cleaned packets
4417 *********************************************************************/
4418static bool
4419em_rxeof(struct rx_ring *rxr, int count, int *done)
4420{
4421	struct adapter		*adapter = rxr->adapter;
4422	struct ifnet		*ifp = adapter->ifp;
4423	struct mbuf		*mp, *sendmp;
4424	u8			status = 0;
4425	u16 			len;
4426	int			i, processed, rxdone = 0;
4427	bool			eop;
4428	struct e1000_rx_desc	*cur;
4429
4430	EM_RX_LOCK(rxr);
4431
4432#ifdef DEV_NETMAP
4433	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4434		EM_RX_UNLOCK(rxr);
4435		return (FALSE);
4436	}
4437#endif /* DEV_NETMAP */
4438
4439	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4440
4441		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4442			break;
4443
4444		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4445		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4446
4447		cur = &rxr->rx_base[i];
4448		status = cur->status;
4449		mp = sendmp = NULL;
4450
4451		if ((status & E1000_RXD_STAT_DD) == 0)
4452			break;
4453
4454		len = le16toh(cur->length);
4455		eop = (status & E1000_RXD_STAT_EOP) != 0;
4456
4457		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4458		    (rxr->discard == TRUE)) {
4459			adapter->dropped_pkts++;
4460			++rxr->rx_discarded;
4461			if (!eop) /* Catch subsequent segs */
4462				rxr->discard = TRUE;
4463			else
4464				rxr->discard = FALSE;
4465			em_rx_discard(rxr, i);
4466			goto next_desc;
4467		}
4468		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4469
4470		/* Assign correct length to the current fragment */
4471		mp = rxr->rx_buffers[i].m_head;
4472		mp->m_len = len;
4473
4474		/* Trigger for refresh */
4475		rxr->rx_buffers[i].m_head = NULL;
4476
4477		/* First segment? */
4478		if (rxr->fmp == NULL) {
4479			mp->m_pkthdr.len = len;
4480			rxr->fmp = rxr->lmp = mp;
4481		} else {
4482			/* Chain mbuf's together */
4483			mp->m_flags &= ~M_PKTHDR;
4484			rxr->lmp->m_next = mp;
4485			rxr->lmp = mp;
4486			rxr->fmp->m_pkthdr.len += len;
4487		}
4488
4489		if (eop) {
4490			--count;
4491			sendmp = rxr->fmp;
4492			sendmp->m_pkthdr.rcvif = ifp;
4493			ifp->if_ipackets++;
4494			em_receive_checksum(cur, sendmp);
4495#ifndef __NO_STRICT_ALIGNMENT
4496			if (adapter->hw.mac.max_frame_size >
4497			    (MCLBYTES - ETHER_ALIGN) &&
4498			    em_fixup_rx(rxr) != 0)
4499				goto skip;
4500#endif
4501			if (status & E1000_RXD_STAT_VP) {
4502				sendmp->m_pkthdr.ether_vtag =
4503				    le16toh(cur->special);
4504				sendmp->m_flags |= M_VLANTAG;
4505			}
4506#ifndef __NO_STRICT_ALIGNMENT
4507skip:
4508#endif
4509			rxr->fmp = rxr->lmp = NULL;
4510		}
4511next_desc:
4512		/* Zero out the receive descriptors status. */
4513		cur->status = 0;
4514		++rxdone;	/* cumulative for POLL */
4515		++processed;
4516
4517		/* Advance our pointers to the next descriptor. */
4518		if (++i == adapter->num_rx_desc)
4519			i = 0;
4520
4521		/* Send to the stack */
4522		if (sendmp != NULL) {
4523			rxr->next_to_check = i;
4524			EM_RX_UNLOCK(rxr);
4525			(*ifp->if_input)(ifp, sendmp);
4526			EM_RX_LOCK(rxr);
4527			i = rxr->next_to_check;
4528		}
4529
4530		/* Only refresh mbufs every 8 descriptors */
4531		if (processed == 8) {
4532			em_refresh_mbufs(rxr, i);
4533			processed = 0;
4534		}
4535	}
4536
4537	/* Catch any remaining refresh work */
4538	if (e1000_rx_unrefreshed(rxr))
4539		em_refresh_mbufs(rxr, i);
4540
4541	rxr->next_to_check = i;
4542	if (done != NULL)
4543		*done = rxdone;
4544	EM_RX_UNLOCK(rxr);
4545
4546	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4547}
4548
4549static __inline void
4550em_rx_discard(struct rx_ring *rxr, int i)
4551{
4552	struct em_buffer	*rbuf;
4553
4554	rbuf = &rxr->rx_buffers[i];
4555	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4556
4557	/* Free any previous pieces */
4558	if (rxr->fmp != NULL) {
4559		rxr->fmp->m_flags |= M_PKTHDR;
4560		m_freem(rxr->fmp);
4561		rxr->fmp = NULL;
4562		rxr->lmp = NULL;
4563	}
4564	/*
4565	** Free buffer and allow em_refresh_mbufs()
4566	** to clean up and recharge buffer.
4567	*/
4568	if (rbuf->m_head) {
4569		m_free(rbuf->m_head);
4570		rbuf->m_head = NULL;
4571	}
4572	return;
4573}
4574
4575#ifndef __NO_STRICT_ALIGNMENT
4576/*
4577 * When jumbo frames are enabled we should realign entire payload on
4578 * architecures with strict alignment. This is serious design mistake of 8254x
4579 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4580 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4581 * payload. On architecures without strict alignment restrictions 8254x still
4582 * performs unaligned memory access which would reduce the performance too.
4583 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4584 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4585 * existing mbuf chain.
4586 *
4587 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4588 * not used at all on architectures with strict alignment.
4589 */
4590static int
4591em_fixup_rx(struct rx_ring *rxr)
4592{
4593	struct adapter *adapter = rxr->adapter;
4594	struct mbuf *m, *n;
4595	int error;
4596
4597	error = 0;
4598	m = rxr->fmp;
4599	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4600		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4601		m->m_data += ETHER_HDR_LEN;
4602	} else {
4603		MGETHDR(n, M_NOWAIT, MT_DATA);
4604		if (n != NULL) {
4605			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4606			m->m_data += ETHER_HDR_LEN;
4607			m->m_len -= ETHER_HDR_LEN;
4608			n->m_len = ETHER_HDR_LEN;
4609			M_MOVE_PKTHDR(n, m);
4610			n->m_next = m;
4611			rxr->fmp = n;
4612		} else {
4613			adapter->dropped_pkts++;
4614			m_freem(rxr->fmp);
4615			rxr->fmp = NULL;
4616			error = ENOMEM;
4617		}
4618	}
4619
4620	return (error);
4621}
4622#endif
4623
4624/*********************************************************************
4625 *
4626 *  Verify that the hardware indicated that the checksum is valid.
4627 *  Inform the stack about the status of checksum so that stack
4628 *  doesn't spend time verifying the checksum.
4629 *
4630 *********************************************************************/
4631static void
4632em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4633{
4634	mp->m_pkthdr.csum_flags = 0;
4635
4636	/* Ignore Checksum bit is set */
4637	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4638		return;
4639
4640	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4641		return;
4642
4643	/* IP Checksum Good? */
4644	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4645		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4646
4647	/* TCP or UDP checksum */
4648	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4649		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4650		mp->m_pkthdr.csum_data = htons(0xffff);
4651	}
4652}
4653
4654/*
4655 * This routine is run via an vlan
4656 * config EVENT
4657 */
4658static void
4659em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4660{
4661	struct adapter	*adapter = ifp->if_softc;
4662	u32		index, bit;
4663
4664	if (ifp->if_softc !=  arg)   /* Not our event */
4665		return;
4666
4667	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4668                return;
4669
4670	EM_CORE_LOCK(adapter);
4671	index = (vtag >> 5) & 0x7F;
4672	bit = vtag & 0x1F;
4673	adapter->shadow_vfta[index] |= (1 << bit);
4674	++adapter->num_vlans;
4675	/* Re-init to load the changes */
4676	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4677		em_init_locked(adapter);
4678	EM_CORE_UNLOCK(adapter);
4679}
4680
4681/*
4682 * This routine is run via an vlan
4683 * unconfig EVENT
4684 */
4685static void
4686em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4687{
4688	struct adapter	*adapter = ifp->if_softc;
4689	u32		index, bit;
4690
4691	if (ifp->if_softc !=  arg)
4692		return;
4693
4694	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4695                return;
4696
4697	EM_CORE_LOCK(adapter);
4698	index = (vtag >> 5) & 0x7F;
4699	bit = vtag & 0x1F;
4700	adapter->shadow_vfta[index] &= ~(1 << bit);
4701	--adapter->num_vlans;
4702	/* Re-init to load the changes */
4703	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4704		em_init_locked(adapter);
4705	EM_CORE_UNLOCK(adapter);
4706}
4707
4708static void
4709em_setup_vlan_hw_support(struct adapter *adapter)
4710{
4711	struct e1000_hw *hw = &adapter->hw;
4712	u32             reg;
4713
4714	/*
4715	** We get here thru init_locked, meaning
4716	** a soft reset, this has already cleared
4717	** the VFTA and other state, so if there
4718	** have been no vlan's registered do nothing.
4719	*/
4720	if (adapter->num_vlans == 0)
4721                return;
4722
4723	/*
4724	** A soft reset zero's out the VFTA, so
4725	** we need to repopulate it now.
4726	*/
4727	for (int i = 0; i < EM_VFTA_SIZE; i++)
4728                if (adapter->shadow_vfta[i] != 0)
4729			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4730                            i, adapter->shadow_vfta[i]);
4731
4732	reg = E1000_READ_REG(hw, E1000_CTRL);
4733	reg |= E1000_CTRL_VME;
4734	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4735
4736	/* Enable the Filter Table */
4737	reg = E1000_READ_REG(hw, E1000_RCTL);
4738	reg &= ~E1000_RCTL_CFIEN;
4739	reg |= E1000_RCTL_VFE;
4740	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4741}
4742
4743static void
4744em_enable_intr(struct adapter *adapter)
4745{
4746	struct e1000_hw *hw = &adapter->hw;
4747	u32 ims_mask = IMS_ENABLE_MASK;
4748
4749	if (hw->mac.type == e1000_82574) {
4750		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4751		ims_mask |= EM_MSIX_MASK;
4752	}
4753	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4754}
4755
4756static void
4757em_disable_intr(struct adapter *adapter)
4758{
4759	struct e1000_hw *hw = &adapter->hw;
4760
4761	if (hw->mac.type == e1000_82574)
4762		E1000_WRITE_REG(hw, EM_EIAC, 0);
4763	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4764}
4765
4766/*
4767 * Bit of a misnomer, what this really means is
4768 * to enable OS management of the system... aka
4769 * to disable special hardware management features
4770 */
4771static void
4772em_init_manageability(struct adapter *adapter)
4773{
4774	/* A shared code workaround */
4775#define E1000_82542_MANC2H E1000_MANC2H
4776	if (adapter->has_manage) {
4777		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4778		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4779
4780		/* disable hardware interception of ARP */
4781		manc &= ~(E1000_MANC_ARP_EN);
4782
4783                /* enable receiving management packets to the host */
4784		manc |= E1000_MANC_EN_MNG2HOST;
4785#define E1000_MNG2HOST_PORT_623 (1 << 5)
4786#define E1000_MNG2HOST_PORT_664 (1 << 6)
4787		manc2h |= E1000_MNG2HOST_PORT_623;
4788		manc2h |= E1000_MNG2HOST_PORT_664;
4789		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4790		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4791	}
4792}
4793
4794/*
4795 * Give control back to hardware management
4796 * controller if there is one.
4797 */
4798static void
4799em_release_manageability(struct adapter *adapter)
4800{
4801	if (adapter->has_manage) {
4802		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4803
4804		/* re-enable hardware interception of ARP */
4805		manc |= E1000_MANC_ARP_EN;
4806		manc &= ~E1000_MANC_EN_MNG2HOST;
4807
4808		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4809	}
4810}
4811
4812/*
4813 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4814 * For ASF and Pass Through versions of f/w this means
4815 * that the driver is loaded. For AMT version type f/w
4816 * this means that the network i/f is open.
4817 */
4818static void
4819em_get_hw_control(struct adapter *adapter)
4820{
4821	u32 ctrl_ext, swsm;
4822
4823	if (adapter->hw.mac.type == e1000_82573) {
4824		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4825		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4826		    swsm | E1000_SWSM_DRV_LOAD);
4827		return;
4828	}
4829	/* else */
4830	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4831	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4832	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4833	return;
4834}
4835
4836/*
4837 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4838 * For ASF and Pass Through versions of f/w this means that
4839 * the driver is no longer loaded. For AMT versions of the
4840 * f/w this means that the network i/f is closed.
4841 */
4842static void
4843em_release_hw_control(struct adapter *adapter)
4844{
4845	u32 ctrl_ext, swsm;
4846
4847	if (!adapter->has_manage)
4848		return;
4849
4850	if (adapter->hw.mac.type == e1000_82573) {
4851		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4852		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4853		    swsm & ~E1000_SWSM_DRV_LOAD);
4854		return;
4855	}
4856	/* else */
4857	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4858	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4859	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4860	return;
4861}
4862
4863static int
4864em_is_valid_ether_addr(u8 *addr)
4865{
4866	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4867
4868	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4869		return (FALSE);
4870	}
4871
4872	return (TRUE);
4873}
4874
4875/*
4876** Parse the interface capabilities with regard
4877** to both system management and wake-on-lan for
4878** later use.
4879*/
4880static void
4881em_get_wakeup(device_t dev)
4882{
4883	struct adapter	*adapter = device_get_softc(dev);
4884	u16		eeprom_data = 0, device_id, apme_mask;
4885
4886	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4887	apme_mask = EM_EEPROM_APME;
4888
4889	switch (adapter->hw.mac.type) {
4890	case e1000_82573:
4891	case e1000_82583:
4892		adapter->has_amt = TRUE;
4893		/* Falls thru */
4894	case e1000_82571:
4895	case e1000_82572:
4896	case e1000_80003es2lan:
4897		if (adapter->hw.bus.func == 1) {
4898			e1000_read_nvm(&adapter->hw,
4899			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4900			break;
4901		} else
4902			e1000_read_nvm(&adapter->hw,
4903			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4904		break;
4905	case e1000_ich8lan:
4906	case e1000_ich9lan:
4907	case e1000_ich10lan:
4908	case e1000_pchlan:
4909	case e1000_pch2lan:
4910		apme_mask = E1000_WUC_APME;
4911		adapter->has_amt = TRUE;
4912		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4913		break;
4914	default:
4915		e1000_read_nvm(&adapter->hw,
4916		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4917		break;
4918	}
4919	if (eeprom_data & apme_mask)
4920		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4921	/*
4922         * We have the eeprom settings, now apply the special cases
4923         * where the eeprom may be wrong or the board won't support
4924         * wake on lan on a particular port
4925	 */
4926	device_id = pci_get_device(dev);
4927        switch (device_id) {
4928	case E1000_DEV_ID_82571EB_FIBER:
4929		/* Wake events only supported on port A for dual fiber
4930		 * regardless of eeprom setting */
4931		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4932		    E1000_STATUS_FUNC_1)
4933			adapter->wol = 0;
4934		break;
4935	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4936	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4937	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4938                /* if quad port adapter, disable WoL on all but port A */
4939		if (global_quad_port_a != 0)
4940			adapter->wol = 0;
4941		/* Reset for multiple quad port adapters */
4942		if (++global_quad_port_a == 4)
4943			global_quad_port_a = 0;
4944                break;
4945	}
4946	return;
4947}
4948
4949
4950/*
4951 * Enable PCI Wake On Lan capability
4952 */
4953static void
4954em_enable_wakeup(device_t dev)
4955{
4956	struct adapter	*adapter = device_get_softc(dev);
4957	struct ifnet	*ifp = adapter->ifp;
4958	u32		pmc, ctrl, ctrl_ext, rctl;
4959	u16     	status;
4960
4961	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4962		return;
4963
4964	/* Advertise the wakeup capability */
4965	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4966	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4967	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4968	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4969
4970	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4971	    (adapter->hw.mac.type == e1000_pchlan) ||
4972	    (adapter->hw.mac.type == e1000_ich9lan) ||
4973	    (adapter->hw.mac.type == e1000_ich10lan))
4974		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4975
4976	/* Keep the laser running on Fiber adapters */
4977	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4978	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4979		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4980		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4981		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4982	}
4983
4984	/*
4985	** Determine type of Wakeup: note that wol
4986	** is set with all bits on by default.
4987	*/
4988	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4989		adapter->wol &= ~E1000_WUFC_MAG;
4990
4991	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4992		adapter->wol &= ~E1000_WUFC_MC;
4993	else {
4994		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4995		rctl |= E1000_RCTL_MPE;
4996		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4997	}
4998
4999	if ((adapter->hw.mac.type == e1000_pchlan) ||
5000	    (adapter->hw.mac.type == e1000_pch2lan)) {
5001		if (em_enable_phy_wakeup(adapter))
5002			return;
5003	} else {
5004		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5005		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5006	}
5007
5008	if (adapter->hw.phy.type == e1000_phy_igp_3)
5009		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5010
5011        /* Request PME */
5012        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5013	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5014	if (ifp->if_capenable & IFCAP_WOL)
5015		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5016        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5017
5018	return;
5019}
5020
5021/*
5022** WOL in the newer chipset interfaces (pchlan)
5023** require thing to be copied into the phy
5024*/
5025static int
5026em_enable_phy_wakeup(struct adapter *adapter)
5027{
5028	struct e1000_hw *hw = &adapter->hw;
5029	u32 mreg, ret = 0;
5030	u16 preg;
5031
5032	/* copy MAC RARs to PHY RARs */
5033	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5034
5035	/* copy MAC MTA to PHY MTA */
5036	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5037		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5038		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5039		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5040		    (u16)((mreg >> 16) & 0xFFFF));
5041	}
5042
5043	/* configure PHY Rx Control register */
5044	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5045	mreg = E1000_READ_REG(hw, E1000_RCTL);
5046	if (mreg & E1000_RCTL_UPE)
5047		preg |= BM_RCTL_UPE;
5048	if (mreg & E1000_RCTL_MPE)
5049		preg |= BM_RCTL_MPE;
5050	preg &= ~(BM_RCTL_MO_MASK);
5051	if (mreg & E1000_RCTL_MO_3)
5052		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5053				<< BM_RCTL_MO_SHIFT);
5054	if (mreg & E1000_RCTL_BAM)
5055		preg |= BM_RCTL_BAM;
5056	if (mreg & E1000_RCTL_PMCF)
5057		preg |= BM_RCTL_PMCF;
5058	mreg = E1000_READ_REG(hw, E1000_CTRL);
5059	if (mreg & E1000_CTRL_RFCE)
5060		preg |= BM_RCTL_RFCE;
5061	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5062
5063	/* enable PHY wakeup in MAC register */
5064	E1000_WRITE_REG(hw, E1000_WUC,
5065	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5066	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5067
5068	/* configure and enable PHY wakeup in PHY registers */
5069	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5070	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5071
5072	/* activate PHY wakeup */
5073	ret = hw->phy.ops.acquire(hw);
5074	if (ret) {
5075		printf("Could not acquire PHY\n");
5076		return ret;
5077	}
5078	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5079	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5080	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5081	if (ret) {
5082		printf("Could not read PHY page 769\n");
5083		goto out;
5084	}
5085	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5086	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5087	if (ret)
5088		printf("Could not set PHY Host Wakeup bit\n");
5089out:
5090	hw->phy.ops.release(hw);
5091
5092	return ret;
5093}
5094
5095static void
5096em_led_func(void *arg, int onoff)
5097{
5098	struct adapter	*adapter = arg;
5099
5100	EM_CORE_LOCK(adapter);
5101	if (onoff) {
5102		e1000_setup_led(&adapter->hw);
5103		e1000_led_on(&adapter->hw);
5104	} else {
5105		e1000_led_off(&adapter->hw);
5106		e1000_cleanup_led(&adapter->hw);
5107	}
5108	EM_CORE_UNLOCK(adapter);
5109}
5110
5111/*
5112** Disable the L0S and L1 LINK states
5113*/
5114static void
5115em_disable_aspm(struct adapter *adapter)
5116{
5117	int		base, reg;
5118	u16		link_cap,link_ctrl;
5119	device_t	dev = adapter->dev;
5120
5121	switch (adapter->hw.mac.type) {
5122		case e1000_82573:
5123		case e1000_82574:
5124		case e1000_82583:
5125			break;
5126		default:
5127			return;
5128	}
5129	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5130		return;
5131	reg = base + PCIER_LINK_CAP;
5132	link_cap = pci_read_config(dev, reg, 2);
5133	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5134		return;
5135	reg = base + PCIER_LINK_CTL;
5136	link_ctrl = pci_read_config(dev, reg, 2);
5137	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5138	pci_write_config(dev, reg, link_ctrl, 2);
5139	return;
5140}
5141
5142/**********************************************************************
5143 *
5144 *  Update the board statistics counters.
5145 *
5146 **********************************************************************/
5147static void
5148em_update_stats_counters(struct adapter *adapter)
5149{
5150	struct ifnet   *ifp;
5151
5152	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5153	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5154		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5155		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5156	}
5157	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5158	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5159	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5160	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5161
5162	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5163	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5164	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5165	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5166	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5167	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5168	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5169	/*
5170	** For watchdog management we need to know if we have been
5171	** paused during the last interval, so capture that here.
5172	*/
5173	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5174	adapter->stats.xoffrxc += adapter->pause_frames;
5175	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5176	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5177	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5178	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5179	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5180	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5181	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5182	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5183	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5184	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5185	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5186	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5187
5188	/* For the 64-bit byte counters the low dword must be read first. */
5189	/* Both registers clear on the read of the high dword */
5190
5191	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5192	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5193	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5194	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5195
5196	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5197	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5198	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5199	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5200	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5201
5202	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5203	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5204
5205	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5206	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5207	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5208	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5209	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5210	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5211	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5212	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5213	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5214	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5215
5216	/* Interrupt Counts */
5217
5218	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5219	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5220	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5221	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5222	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5223	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5224	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5225	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5226	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5227
5228	if (adapter->hw.mac.type >= e1000_82543) {
5229		adapter->stats.algnerrc +=
5230		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5231		adapter->stats.rxerrc +=
5232		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5233		adapter->stats.tncrs +=
5234		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5235		adapter->stats.cexterr +=
5236		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5237		adapter->stats.tsctc +=
5238		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5239		adapter->stats.tsctfc +=
5240		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5241	}
5242	ifp = adapter->ifp;
5243
5244	ifp->if_collisions = adapter->stats.colc;
5245
5246	/* Rx Errors */
5247	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5248	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5249	    adapter->stats.ruc + adapter->stats.roc +
5250	    adapter->stats.mpc + adapter->stats.cexterr;
5251
5252	/* Tx Errors */
5253	ifp->if_oerrors = adapter->stats.ecol +
5254	    adapter->stats.latecol + adapter->watchdog_events;
5255}
5256
5257/* Export a single 32-bit register via a read-only sysctl. */
5258static int
5259em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5260{
5261	struct adapter *adapter;
5262	u_int val;
5263
5264	adapter = oidp->oid_arg1;
5265	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5266	return (sysctl_handle_int(oidp, &val, 0, req));
5267}
5268
5269/*
5270 * Add sysctl variables, one per statistic, to the system.
5271 */
5272static void
5273em_add_hw_stats(struct adapter *adapter)
5274{
5275	device_t dev = adapter->dev;
5276
5277	struct tx_ring *txr = adapter->tx_rings;
5278	struct rx_ring *rxr = adapter->rx_rings;
5279
5280	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5281	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5282	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5283	struct e1000_hw_stats *stats = &adapter->stats;
5284
5285	struct sysctl_oid *stat_node, *queue_node, *int_node;
5286	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5287
5288#define QUEUE_NAME_LEN 32
5289	char namebuf[QUEUE_NAME_LEN];
5290
5291	/* Driver Statistics */
5292	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5293			CTLFLAG_RD, &adapter->link_irq,
5294			"Link MSIX IRQ Handled");
5295	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5296			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5297			 "Std mbuf failed");
5298	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5299			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5300			 "Std mbuf cluster failed");
5301	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5302			CTLFLAG_RD, &adapter->dropped_pkts,
5303			"Driver dropped packets");
5304	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5305			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5306			"Driver tx dma failure in xmit");
5307	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5308			CTLFLAG_RD, &adapter->rx_overruns,
5309			"RX overruns");
5310	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5311			CTLFLAG_RD, &adapter->watchdog_events,
5312			"Watchdog timeouts");
5313
5314	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5315			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5316			em_sysctl_reg_handler, "IU",
5317			"Device Control Register");
5318	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5319			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5320			em_sysctl_reg_handler, "IU",
5321			"Receiver Control Register");
5322	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5323			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5324			"Flow Control High Watermark");
5325	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5326			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5327			"Flow Control Low Watermark");
5328
5329	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5330		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5331		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5332					    CTLFLAG_RD, NULL, "Queue Name");
5333		queue_list = SYSCTL_CHILDREN(queue_node);
5334
5335		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5336				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5337				E1000_TDH(txr->me),
5338				em_sysctl_reg_handler, "IU",
5339 				"Transmit Descriptor Head");
5340		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5341				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5342				E1000_TDT(txr->me),
5343				em_sysctl_reg_handler, "IU",
5344 				"Transmit Descriptor Tail");
5345		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5346				CTLFLAG_RD, &txr->tx_irq,
5347				"Queue MSI-X Transmit Interrupts");
5348		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5349				CTLFLAG_RD, &txr->no_desc_avail,
5350				"Queue No Descriptor Available");
5351
5352		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5353				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5354				E1000_RDH(rxr->me),
5355				em_sysctl_reg_handler, "IU",
5356				"Receive Descriptor Head");
5357		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5358				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5359				E1000_RDT(rxr->me),
5360				em_sysctl_reg_handler, "IU",
5361				"Receive Descriptor Tail");
5362		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5363				CTLFLAG_RD, &rxr->rx_irq,
5364				"Queue MSI-X Receive Interrupts");
5365	}
5366
5367	/* MAC stats get their own sub node */
5368
5369	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5370				    CTLFLAG_RD, NULL, "Statistics");
5371	stat_list = SYSCTL_CHILDREN(stat_node);
5372
5373	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5374			CTLFLAG_RD, &stats->ecol,
5375			"Excessive collisions");
5376	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5377			CTLFLAG_RD, &stats->scc,
5378			"Single collisions");
5379	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5380			CTLFLAG_RD, &stats->mcc,
5381			"Multiple collisions");
5382	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5383			CTLFLAG_RD, &stats->latecol,
5384			"Late collisions");
5385	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5386			CTLFLAG_RD, &stats->colc,
5387			"Collision Count");
5388	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5389			CTLFLAG_RD, &adapter->stats.symerrs,
5390			"Symbol Errors");
5391	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5392			CTLFLAG_RD, &adapter->stats.sec,
5393			"Sequence Errors");
5394	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5395			CTLFLAG_RD, &adapter->stats.dc,
5396			"Defer Count");
5397	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5398			CTLFLAG_RD, &adapter->stats.mpc,
5399			"Missed Packets");
5400	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5401			CTLFLAG_RD, &adapter->stats.rnbc,
5402			"Receive No Buffers");
5403	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5404			CTLFLAG_RD, &adapter->stats.ruc,
5405			"Receive Undersize");
5406	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5407			CTLFLAG_RD, &adapter->stats.rfc,
5408			"Fragmented Packets Received ");
5409	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5410			CTLFLAG_RD, &adapter->stats.roc,
5411			"Oversized Packets Received");
5412	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5413			CTLFLAG_RD, &adapter->stats.rjc,
5414			"Recevied Jabber");
5415	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5416			CTLFLAG_RD, &adapter->stats.rxerrc,
5417			"Receive Errors");
5418	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5419			CTLFLAG_RD, &adapter->stats.crcerrs,
5420			"CRC errors");
5421	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5422			CTLFLAG_RD, &adapter->stats.algnerrc,
5423			"Alignment Errors");
5424	/* On 82575 these are collision counts */
5425	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5426			CTLFLAG_RD, &adapter->stats.cexterr,
5427			"Collision/Carrier extension errors");
5428	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5429			CTLFLAG_RD, &adapter->stats.xonrxc,
5430			"XON Received");
5431	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5432			CTLFLAG_RD, &adapter->stats.xontxc,
5433			"XON Transmitted");
5434	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5435			CTLFLAG_RD, &adapter->stats.xoffrxc,
5436			"XOFF Received");
5437	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5438			CTLFLAG_RD, &adapter->stats.xofftxc,
5439			"XOFF Transmitted");
5440
5441	/* Packet Reception Stats */
5442	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5443			CTLFLAG_RD, &adapter->stats.tpr,
5444			"Total Packets Received ");
5445	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5446			CTLFLAG_RD, &adapter->stats.gprc,
5447			"Good Packets Received");
5448	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5449			CTLFLAG_RD, &adapter->stats.bprc,
5450			"Broadcast Packets Received");
5451	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5452			CTLFLAG_RD, &adapter->stats.mprc,
5453			"Multicast Packets Received");
5454	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5455			CTLFLAG_RD, &adapter->stats.prc64,
5456			"64 byte frames received ");
5457	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5458			CTLFLAG_RD, &adapter->stats.prc127,
5459			"65-127 byte frames received");
5460	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5461			CTLFLAG_RD, &adapter->stats.prc255,
5462			"128-255 byte frames received");
5463	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5464			CTLFLAG_RD, &adapter->stats.prc511,
5465			"256-511 byte frames received");
5466	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5467			CTLFLAG_RD, &adapter->stats.prc1023,
5468			"512-1023 byte frames received");
5469	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5470			CTLFLAG_RD, &adapter->stats.prc1522,
5471			"1023-1522 byte frames received");
5472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5473 			CTLFLAG_RD, &adapter->stats.gorc,
5474 			"Good Octets Received");
5475
5476	/* Packet Transmission Stats */
5477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5478 			CTLFLAG_RD, &adapter->stats.gotc,
5479 			"Good Octets Transmitted");
5480	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5481			CTLFLAG_RD, &adapter->stats.tpt,
5482			"Total Packets Transmitted");
5483	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5484			CTLFLAG_RD, &adapter->stats.gptc,
5485			"Good Packets Transmitted");
5486	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5487			CTLFLAG_RD, &adapter->stats.bptc,
5488			"Broadcast Packets Transmitted");
5489	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5490			CTLFLAG_RD, &adapter->stats.mptc,
5491			"Multicast Packets Transmitted");
5492	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5493			CTLFLAG_RD, &adapter->stats.ptc64,
5494			"64 byte frames transmitted ");
5495	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5496			CTLFLAG_RD, &adapter->stats.ptc127,
5497			"65-127 byte frames transmitted");
5498	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5499			CTLFLAG_RD, &adapter->stats.ptc255,
5500			"128-255 byte frames transmitted");
5501	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5502			CTLFLAG_RD, &adapter->stats.ptc511,
5503			"256-511 byte frames transmitted");
5504	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5505			CTLFLAG_RD, &adapter->stats.ptc1023,
5506			"512-1023 byte frames transmitted");
5507	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5508			CTLFLAG_RD, &adapter->stats.ptc1522,
5509			"1024-1522 byte frames transmitted");
5510	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5511			CTLFLAG_RD, &adapter->stats.tsctc,
5512			"TSO Contexts Transmitted");
5513	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5514			CTLFLAG_RD, &adapter->stats.tsctfc,
5515			"TSO Contexts Failed");
5516
5517
5518	/* Interrupt Stats */
5519
5520	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5521				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5522	int_list = SYSCTL_CHILDREN(int_node);
5523
5524	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5525			CTLFLAG_RD, &adapter->stats.iac,
5526			"Interrupt Assertion Count");
5527
5528	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5529			CTLFLAG_RD, &adapter->stats.icrxptc,
5530			"Interrupt Cause Rx Pkt Timer Expire Count");
5531
5532	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5533			CTLFLAG_RD, &adapter->stats.icrxatc,
5534			"Interrupt Cause Rx Abs Timer Expire Count");
5535
5536	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5537			CTLFLAG_RD, &adapter->stats.ictxptc,
5538			"Interrupt Cause Tx Pkt Timer Expire Count");
5539
5540	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5541			CTLFLAG_RD, &adapter->stats.ictxatc,
5542			"Interrupt Cause Tx Abs Timer Expire Count");
5543
5544	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5545			CTLFLAG_RD, &adapter->stats.ictxqec,
5546			"Interrupt Cause Tx Queue Empty Count");
5547
5548	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5549			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5550			"Interrupt Cause Tx Queue Min Thresh Count");
5551
5552	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5553			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5554			"Interrupt Cause Rx Desc Min Thresh Count");
5555
5556	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5557			CTLFLAG_RD, &adapter->stats.icrxoc,
5558			"Interrupt Cause Receiver Overrun Count");
5559}
5560
5561/**********************************************************************
5562 *
5563 *  This routine provides a way to dump out the adapter eeprom,
5564 *  often a useful debug/service tool. This only dumps the first
5565 *  32 words, stuff that matters is in that extent.
5566 *
5567 **********************************************************************/
5568static int
5569em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5570{
5571	struct adapter *adapter = (struct adapter *)arg1;
5572	int error;
5573	int result;
5574
5575	result = -1;
5576	error = sysctl_handle_int(oidp, &result, 0, req);
5577
5578	if (error || !req->newptr)
5579		return (error);
5580
5581	/*
5582	 * This value will cause a hex dump of the
5583	 * first 32 16-bit words of the EEPROM to
5584	 * the screen.
5585	 */
5586	if (result == 1)
5587		em_print_nvm_info(adapter);
5588
5589	return (error);
5590}
5591
5592static void
5593em_print_nvm_info(struct adapter *adapter)
5594{
5595	u16	eeprom_data;
5596	int	i, j, row = 0;
5597
5598	/* Its a bit crude, but it gets the job done */
5599	printf("\nInterface EEPROM Dump:\n");
5600	printf("Offset\n0x0000  ");
5601	for (i = 0, j = 0; i < 32; i++, j++) {
5602		if (j == 8) { /* Make the offset block */
5603			j = 0; ++row;
5604			printf("\n0x00%x0  ",row);
5605		}
5606		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5607		printf("%04x ", eeprom_data);
5608	}
5609	printf("\n");
5610}
5611
5612static int
5613em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5614{
5615	struct em_int_delay_info *info;
5616	struct adapter *adapter;
5617	u32 regval;
5618	int error, usecs, ticks;
5619
5620	info = (struct em_int_delay_info *)arg1;
5621	usecs = info->value;
5622	error = sysctl_handle_int(oidp, &usecs, 0, req);
5623	if (error != 0 || req->newptr == NULL)
5624		return (error);
5625	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5626		return (EINVAL);
5627	info->value = usecs;
5628	ticks = EM_USECS_TO_TICKS(usecs);
5629	if (info->offset == E1000_ITR)	/* units are 256ns here */
5630		ticks *= 4;
5631
5632	adapter = info->adapter;
5633
5634	EM_CORE_LOCK(adapter);
5635	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5636	regval = (regval & ~0xffff) | (ticks & 0xffff);
5637	/* Handle a few special cases. */
5638	switch (info->offset) {
5639	case E1000_RDTR:
5640		break;
5641	case E1000_TIDV:
5642		if (ticks == 0) {
5643			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5644			/* Don't write 0 into the TIDV register. */
5645			regval++;
5646		} else
5647			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5648		break;
5649	}
5650	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5651	EM_CORE_UNLOCK(adapter);
5652	return (0);
5653}
5654
5655static void
5656em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5657	const char *description, struct em_int_delay_info *info,
5658	int offset, int value)
5659{
5660	info->adapter = adapter;
5661	info->offset = offset;
5662	info->value = value;
5663	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5664	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5665	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5666	    info, 0, em_sysctl_int_delay, "I", description);
5667}
5668
5669static void
5670em_set_sysctl_value(struct adapter *adapter, const char *name,
5671	const char *description, int *limit, int value)
5672{
5673	*limit = value;
5674	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5675	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5676	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5677}
5678
5679
5680/*
5681** Set flow control using sysctl:
5682** Flow control values:
5683**      0 - off
5684**      1 - rx pause
5685**      2 - tx pause
5686**      3 - full
5687*/
5688static int
5689em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5690{
5691        int		error;
5692	static int	input = 3; /* default is full */
5693        struct adapter	*adapter = (struct adapter *) arg1;
5694
5695        error = sysctl_handle_int(oidp, &input, 0, req);
5696
5697        if ((error) || (req->newptr == NULL))
5698                return (error);
5699
5700	if (input == adapter->fc) /* no change? */
5701		return (error);
5702
5703        switch (input) {
5704                case e1000_fc_rx_pause:
5705                case e1000_fc_tx_pause:
5706                case e1000_fc_full:
5707                case e1000_fc_none:
5708                        adapter->hw.fc.requested_mode = input;
5709			adapter->fc = input;
5710                        break;
5711                default:
5712			/* Do nothing */
5713			return (error);
5714        }
5715
5716        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5717        e1000_force_mac_fc(&adapter->hw);
5718        return (error);
5719}
5720
5721/*
5722** Manage Energy Efficient Ethernet:
5723** Control values:
5724**     0/1 - enabled/disabled
5725*/
5726static int
5727em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5728{
5729       struct adapter *adapter = (struct adapter *) arg1;
5730       int             error, value;
5731
5732       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5733       error = sysctl_handle_int(oidp, &value, 0, req);
5734       if (error || req->newptr == NULL)
5735               return (error);
5736       EM_CORE_LOCK(adapter);
5737       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5738       em_init_locked(adapter);
5739       EM_CORE_UNLOCK(adapter);
5740       return (0);
5741}
5742
5743static int
5744em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5745{
5746	struct adapter *adapter;
5747	int error;
5748	int result;
5749
5750	result = -1;
5751	error = sysctl_handle_int(oidp, &result, 0, req);
5752
5753	if (error || !req->newptr)
5754		return (error);
5755
5756	if (result == 1) {
5757		adapter = (struct adapter *)arg1;
5758		em_print_debug_info(adapter);
5759        }
5760
5761	return (error);
5762}
5763
5764/*
5765** This routine is meant to be fluid, add whatever is
5766** needed for debugging a problem.  -jfv
5767*/
5768static void
5769em_print_debug_info(struct adapter *adapter)
5770{
5771	device_t dev = adapter->dev;
5772	struct tx_ring *txr = adapter->tx_rings;
5773	struct rx_ring *rxr = adapter->rx_rings;
5774
5775	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5776		printf("Interface is RUNNING ");
5777	else
5778		printf("Interface is NOT RUNNING\n");
5779
5780	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5781		printf("and INACTIVE\n");
5782	else
5783		printf("and ACTIVE\n");
5784
5785	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5786	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5787	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5788	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5789	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5790	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5791	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5792	device_printf(dev, "TX descriptors avail = %d\n",
5793	    txr->tx_avail);
5794	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5795	    txr->no_desc_avail);
5796	device_printf(dev, "RX discarded packets = %ld\n",
5797	    rxr->rx_discarded);
5798	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5799	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5800}
5801