if_em.c revision 238262
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 238262 2012-07-08 20:35:56Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <machine/bus.h>
60#include <machine/resource.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68
69#include <net/if_types.h>
70#include <net/if_vlan_var.h>
71
72#include <netinet/in_systm.h>
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/ip.h>
76#include <netinet/ip6.h>
77#include <netinet/tcp.h>
78#include <netinet/udp.h>
79
80#include <machine/in_cksum.h>
81#include <dev/led/led.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82571.h"
87#include "if_em.h"
88
89/*********************************************************************
90 *  Set this to one to display debug statistics
91 *********************************************************************/
92int	em_display_debug_stats = 0;
93
94/*********************************************************************
95 *  Driver version:
96 *********************************************************************/
97char em_driver_version[] = "7.3.2";
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	/* required last entry */
176	{ 0, 0, 0, 0, 0}
177};
178
179/*********************************************************************
180 *  Table of branding strings for all supported NICs.
181 *********************************************************************/
182
183static char *em_strings[] = {
184	"Intel(R) PRO/1000 Network Connection"
185};
186
187/*********************************************************************
188 *  Function prototypes
189 *********************************************************************/
190static int	em_probe(device_t);
191static int	em_attach(device_t);
192static int	em_detach(device_t);
193static int	em_shutdown(device_t);
194static int	em_suspend(device_t);
195static int	em_resume(device_t);
196#ifdef EM_MULTIQUEUE
197static int	em_mq_start(struct ifnet *, struct mbuf *);
198static int	em_mq_start_locked(struct ifnet *,
199		    struct tx_ring *, struct mbuf *);
200static void	em_qflush(struct ifnet *);
201#else
202static void	em_start(struct ifnet *);
203static void	em_start_locked(struct ifnet *, struct tx_ring *);
204#endif
205static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206static void	em_init(void *);
207static void	em_init_locked(struct adapter *);
208static void	em_stop(void *);
209static void	em_media_status(struct ifnet *, struct ifmediareq *);
210static int	em_media_change(struct ifnet *);
211static void	em_identify_hardware(struct adapter *);
212static int	em_allocate_pci_resources(struct adapter *);
213static int	em_allocate_legacy(struct adapter *);
214static int	em_allocate_msix(struct adapter *);
215static int	em_allocate_queues(struct adapter *);
216static int	em_setup_msix(struct adapter *);
217static void	em_free_pci_resources(struct adapter *);
218static void	em_local_timer(void *);
219static void	em_reset(struct adapter *);
220static int	em_setup_interface(device_t, struct adapter *);
221
222static void	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_allocate_transmit_buffers(struct tx_ring *);
225static void	em_free_transmit_structures(struct adapter *);
226static void	em_free_transmit_buffers(struct tx_ring *);
227
228static int	em_setup_receive_structures(struct adapter *);
229static int	em_allocate_receive_buffers(struct rx_ring *);
230static void	em_initialize_receive_unit(struct adapter *);
231static void	em_free_receive_structures(struct adapter *);
232static void	em_free_receive_buffers(struct rx_ring *);
233
234static void	em_enable_intr(struct adapter *);
235static void	em_disable_intr(struct adapter *);
236static void	em_update_stats_counters(struct adapter *);
237static void	em_add_hw_stats(struct adapter *adapter);
238static void	em_txeof(struct tx_ring *);
239static bool	em_rxeof(struct rx_ring *, int, int *);
240#ifndef __NO_STRICT_ALIGNMENT
241static int	em_fixup_rx(struct rx_ring *);
242#endif
243static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245		    struct ip *, u32 *, u32 *);
246static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247		    struct tcphdr *, u32 *, u32 *);
248static void	em_set_promisc(struct adapter *);
249static void	em_disable_promisc(struct adapter *);
250static void	em_set_multi(struct adapter *);
251static void	em_update_link_status(struct adapter *);
252static void	em_refresh_mbufs(struct rx_ring *, int);
253static void	em_register_vlan(void *, struct ifnet *, u16);
254static void	em_unregister_vlan(void *, struct ifnet *, u16);
255static void	em_setup_vlan_hw_support(struct adapter *);
256static int	em_xmit(struct tx_ring *, struct mbuf **);
257static int	em_dma_malloc(struct adapter *, bus_size_t,
258		    struct em_dma_alloc *, int);
259static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_nvm_info(struct adapter *);
262static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263static void	em_print_debug_info(struct adapter *);
264static int 	em_is_valid_ether_addr(u8 *);
265static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267		    const char *, struct em_int_delay_info *, int, int);
268/* Management and WOL Support */
269static void	em_init_manageability(struct adapter *);
270static void	em_release_manageability(struct adapter *);
271static void     em_get_hw_control(struct adapter *);
272static void     em_release_hw_control(struct adapter *);
273static void	em_get_wakeup(device_t);
274static void     em_enable_wakeup(device_t);
275static int	em_enable_phy_wakeup(struct adapter *);
276static void	em_led_func(void *, int);
277static void	em_disable_aspm(struct adapter *);
278
279static int	em_irq_fast(void *);
280
281/* MSIX handlers */
282static void	em_msix_tx(void *);
283static void	em_msix_rx(void *);
284static void	em_msix_link(void *);
285static void	em_handle_tx(void *context, int pending);
286static void	em_handle_rx(void *context, int pending);
287static void	em_handle_link(void *context, int pending);
288
289static void	em_set_sysctl_value(struct adapter *, const char *,
290		    const char *, int *, int);
291static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294static __inline void em_rx_discard(struct rx_ring *, int);
295
296#ifdef DEVICE_POLLING
297static poll_handler_t em_poll;
298#endif /* POLLING */
299
300/*********************************************************************
301 *  FreeBSD Device Interface Entry Points
302 *********************************************************************/
303
304static device_method_t em_methods[] = {
305	/* Device interface */
306	DEVMETHOD(device_probe, em_probe),
307	DEVMETHOD(device_attach, em_attach),
308	DEVMETHOD(device_detach, em_detach),
309	DEVMETHOD(device_shutdown, em_shutdown),
310	DEVMETHOD(device_suspend, em_suspend),
311	DEVMETHOD(device_resume, em_resume),
312	{0, 0}
313};
314
315static driver_t em_driver = {
316	"em", em_methods, sizeof(struct adapter),
317};
318
319devclass_t em_devclass;
320DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321MODULE_DEPEND(em, pci, 1, 1, 1);
322MODULE_DEPEND(em, ether, 1, 1, 1);
323
324/*********************************************************************
325 *  Tunable default values.
326 *********************************************************************/
327
328#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
329#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
330#define M_TSO_LEN			66
331
332/* Allow common code without TSO */
333#ifndef CSUM_TSO
334#define CSUM_TSO	0
335#endif
336
337static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344    0, "Default transmit interrupt delay in usecs");
345SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346    0, "Default receive interrupt delay in usecs");
347
348static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353    &em_tx_abs_int_delay_dflt, 0,
354    "Default transmit interrupt delay limit in usecs");
355SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356    &em_rx_abs_int_delay_dflt, 0,
357    "Default receive interrupt delay limit in usecs");
358
359static int em_rxd = EM_DEFAULT_RXD;
360static int em_txd = EM_DEFAULT_TXD;
361TUNABLE_INT("hw.em.rxd", &em_rxd);
362TUNABLE_INT("hw.em.txd", &em_txd);
363SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364    "Number of receive descriptors per queue");
365SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366    "Number of transmit descriptors per queue");
367
368static int em_smart_pwr_down = FALSE;
369TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371    0, "Set to true to leave smart power down enabled on newer adapters");
372
373/* Controls whether promiscuous also shows bad packets */
374static int em_debug_sbp = FALSE;
375TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377    "Show bad packets in promiscuous mode");
378
379static int em_enable_msix = TRUE;
380TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382    "Enable MSI-X interrupts");
383
384/* How many packets rxeof tries to clean at a time */
385static int em_rx_process_limit = 100;
386TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388    &em_rx_process_limit, 0,
389    "Maximum number of received packets to process "
390    "at a time, -1 means unlimited");
391
392/* Energy efficient ethernet - default to OFF */
393static int eee_setting = 1;
394TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396    "Enable Energy Efficient Ethernet");
397
398/* Global used in WOL setup with multiport cards */
399static int global_quad_port_a = 0;
400
401#ifdef DEV_NETMAP	/* see ixgbe.c for details */
402#include <dev/netmap/if_em_netmap.h>
403#endif /* DEV_NETMAP */
404
405/*********************************************************************
406 *  Device identification routine
407 *
408 *  em_probe determines if the driver should be loaded on
409 *  adapter based on PCI vendor/device id of the adapter.
410 *
411 *  return BUS_PROBE_DEFAULT on success, positive on failure
412 *********************************************************************/
413
414static int
415em_probe(device_t dev)
416{
417	char		adapter_name[60];
418	u16		pci_vendor_id = 0;
419	u16		pci_device_id = 0;
420	u16		pci_subvendor_id = 0;
421	u16		pci_subdevice_id = 0;
422	em_vendor_info_t *ent;
423
424	INIT_DEBUGOUT("em_probe: begin");
425
426	pci_vendor_id = pci_get_vendor(dev);
427	if (pci_vendor_id != EM_VENDOR_ID)
428		return (ENXIO);
429
430	pci_device_id = pci_get_device(dev);
431	pci_subvendor_id = pci_get_subvendor(dev);
432	pci_subdevice_id = pci_get_subdevice(dev);
433
434	ent = em_vendor_info_array;
435	while (ent->vendor_id != 0) {
436		if ((pci_vendor_id == ent->vendor_id) &&
437		    (pci_device_id == ent->device_id) &&
438
439		    ((pci_subvendor_id == ent->subvendor_id) ||
440		    (ent->subvendor_id == PCI_ANY_ID)) &&
441
442		    ((pci_subdevice_id == ent->subdevice_id) ||
443		    (ent->subdevice_id == PCI_ANY_ID))) {
444			sprintf(adapter_name, "%s %s",
445				em_strings[ent->index],
446				em_driver_version);
447			device_set_desc_copy(dev, adapter_name);
448			return (BUS_PROBE_DEFAULT);
449		}
450		ent++;
451	}
452
453	return (ENXIO);
454}
455
456/*********************************************************************
457 *  Device initialization routine
458 *
459 *  The attach entry point is called when the driver is being loaded.
460 *  This routine identifies the type of hardware, allocates all resources
461 *  and initializes the hardware.
462 *
463 *  return 0 on success, positive on failure
464 *********************************************************************/
465
466static int
467em_attach(device_t dev)
468{
469	struct adapter	*adapter;
470	struct e1000_hw	*hw;
471	int		error = 0;
472
473	INIT_DEBUGOUT("em_attach: begin");
474
475	if (resource_disabled("em", device_get_unit(dev))) {
476		device_printf(dev, "Disabled by device hint\n");
477		return (ENXIO);
478	}
479
480	adapter = device_get_softc(dev);
481	adapter->dev = adapter->osdep.dev = dev;
482	hw = &adapter->hw;
483	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484
485	/* SYSCTL stuff */
486	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489	    em_sysctl_nvm_info, "I", "NVM Information");
490
491	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494	    em_sysctl_debug_info, "I", "Debug Information");
495
496	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499	    em_set_flowcntl, "I", "Flow Control");
500
501	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502
503	/* Determine hardware and mac info */
504	em_identify_hardware(adapter);
505
506	/* Setup PCI resources */
507	if (em_allocate_pci_resources(adapter)) {
508		device_printf(dev, "Allocation of PCI resources failed\n");
509		error = ENXIO;
510		goto err_pci;
511	}
512
513	/*
514	** For ICH8 and family we need to
515	** map the flash memory, and this
516	** must happen after the MAC is
517	** identified
518	*/
519	if ((hw->mac.type == e1000_ich8lan) ||
520	    (hw->mac.type == e1000_ich9lan) ||
521	    (hw->mac.type == e1000_ich10lan) ||
522	    (hw->mac.type == e1000_pchlan) ||
523	    (hw->mac.type == e1000_pch2lan)) {
524		int rid = EM_BAR_TYPE_FLASH;
525		adapter->flash = bus_alloc_resource_any(dev,
526		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
527		if (adapter->flash == NULL) {
528			device_printf(dev, "Mapping of Flash failed\n");
529			error = ENXIO;
530			goto err_pci;
531		}
532		/* This is used in the shared code */
533		hw->flash_address = (u8 *)adapter->flash;
534		adapter->osdep.flash_bus_space_tag =
535		    rman_get_bustag(adapter->flash);
536		adapter->osdep.flash_bus_space_handle =
537		    rman_get_bushandle(adapter->flash);
538	}
539
540	/* Do Shared Code initialization */
541	if (e1000_setup_init_funcs(hw, TRUE)) {
542		device_printf(dev, "Setup of Shared code failed\n");
543		error = ENXIO;
544		goto err_pci;
545	}
546
547	e1000_get_bus_info(hw);
548
549	/* Set up some sysctls for the tunable interrupt delays */
550	em_add_int_delay_sysctl(adapter, "rx_int_delay",
551	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
552	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553	em_add_int_delay_sysctl(adapter, "tx_int_delay",
554	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557	    "receive interrupt delay limit in usecs",
558	    &adapter->rx_abs_int_delay,
559	    E1000_REGISTER(hw, E1000_RADV),
560	    em_rx_abs_int_delay_dflt);
561	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562	    "transmit interrupt delay limit in usecs",
563	    &adapter->tx_abs_int_delay,
564	    E1000_REGISTER(hw, E1000_TADV),
565	    em_tx_abs_int_delay_dflt);
566
567	/* Sysctl for limiting the amount of work done in the taskqueue */
568	em_set_sysctl_value(adapter, "rx_processing_limit",
569	    "max number of rx packets to process", &adapter->rx_process_limit,
570	    em_rx_process_limit);
571
572	/*
573	 * Validate number of transmit and receive descriptors. It
574	 * must not exceed hardware maximum, and must be multiple
575	 * of E1000_DBA_ALIGN.
576	 */
577	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580		    EM_DEFAULT_TXD, em_txd);
581		adapter->num_tx_desc = EM_DEFAULT_TXD;
582	} else
583		adapter->num_tx_desc = em_txd;
584
585	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588		    EM_DEFAULT_RXD, em_rxd);
589		adapter->num_rx_desc = EM_DEFAULT_RXD;
590	} else
591		adapter->num_rx_desc = em_rxd;
592
593	hw->mac.autoneg = DO_AUTO_NEG;
594	hw->phy.autoneg_wait_to_complete = FALSE;
595	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596
597	/* Copper options */
598	if (hw->phy.media_type == e1000_media_type_copper) {
599		hw->phy.mdix = AUTO_ALL_MODES;
600		hw->phy.disable_polarity_correction = FALSE;
601		hw->phy.ms_type = EM_MASTER_SLAVE;
602	}
603
604	/*
605	 * Set the frame limits assuming
606	 * standard ethernet sized frames.
607	 */
608	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610
611	/*
612	 * This controls when hardware reports transmit completion
613	 * status.
614	 */
615	hw->mac.report_tx_early = 1;
616
617	/*
618	** Get queue/ring memory
619	*/
620	if (em_allocate_queues(adapter)) {
621		error = ENOMEM;
622		goto err_pci;
623	}
624
625	/* Allocate multicast array memory. */
626	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628	if (adapter->mta == NULL) {
629		device_printf(dev, "Can not allocate multicast setup array\n");
630		error = ENOMEM;
631		goto err_late;
632	}
633
634	/* Check SOL/IDER usage */
635	if (e1000_check_reset_block(hw))
636		device_printf(dev, "PHY reset is blocked"
637		    " due to SOL/IDER session.\n");
638
639	/* Sysctl for setting Energy Efficient Ethernet */
640	hw->dev_spec.ich8lan.eee_disable = eee_setting;
641	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644	    adapter, 0, em_sysctl_eee, "I",
645	    "Disable Energy Efficient Ethernet");
646
647	/*
648	** Start from a known state, this is
649	** important in reading the nvm and
650	** mac from that.
651	*/
652	e1000_reset_hw(hw);
653
654
655	/* Make sure we have a good EEPROM before we read from it */
656	if (e1000_validate_nvm_checksum(hw) < 0) {
657		/*
658		** Some PCI-E parts fail the first check due to
659		** the link being in sleep state, call it again,
660		** if it fails a second time its a real issue.
661		*/
662		if (e1000_validate_nvm_checksum(hw) < 0) {
663			device_printf(dev,
664			    "The EEPROM Checksum Is Not Valid\n");
665			error = EIO;
666			goto err_late;
667		}
668	}
669
670	/* Copy the permanent MAC address out of the EEPROM */
671	if (e1000_read_mac_addr(hw) < 0) {
672		device_printf(dev, "EEPROM read error while reading MAC"
673		    " address\n");
674		error = EIO;
675		goto err_late;
676	}
677
678	if (!em_is_valid_ether_addr(hw->mac.addr)) {
679		device_printf(dev, "Invalid MAC address\n");
680		error = EIO;
681		goto err_late;
682	}
683
684	/*
685	**  Do interrupt configuration
686	*/
687	if (adapter->msix > 1) /* Do MSIX */
688		error = em_allocate_msix(adapter);
689	else  /* MSI or Legacy */
690		error = em_allocate_legacy(adapter);
691	if (error)
692		goto err_late;
693
694	/*
695	 * Get Wake-on-Lan and Management info for later use
696	 */
697	em_get_wakeup(dev);
698
699	/* Setup OS specific network interface */
700	if (em_setup_interface(dev, adapter) != 0)
701		goto err_late;
702
703	em_reset(adapter);
704
705	/* Initialize statistics */
706	em_update_stats_counters(adapter);
707
708	hw->mac.get_link_status = 1;
709	em_update_link_status(adapter);
710
711	/* Register for VLAN events */
712	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
716
717	em_add_hw_stats(adapter);
718
719	/* Non-AMT based hardware can now take control from firmware */
720	if (adapter->has_manage && !adapter->has_amt)
721		em_get_hw_control(adapter);
722
723	/* Tell the stack that the interface is not active */
724	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726
727	adapter->led_dev = led_create(em_led_func, adapter,
728	    device_get_nameunit(dev));
729#ifdef DEV_NETMAP
730	em_netmap_attach(adapter);
731#endif /* DEV_NETMAP */
732
733	INIT_DEBUGOUT("em_attach: end");
734
735	return (0);
736
737err_late:
738	em_free_transmit_structures(adapter);
739	em_free_receive_structures(adapter);
740	em_release_hw_control(adapter);
741	if (adapter->ifp != NULL)
742		if_free(adapter->ifp);
743err_pci:
744	em_free_pci_resources(adapter);
745	free(adapter->mta, M_DEVBUF);
746	EM_CORE_LOCK_DESTROY(adapter);
747
748	return (error);
749}
750
751/*********************************************************************
752 *  Device removal routine
753 *
754 *  The detach entry point is called when the driver is being removed.
755 *  This routine stops the adapter and deallocates all the resources
756 *  that were allocated for driver operation.
757 *
758 *  return 0 on success, positive on failure
759 *********************************************************************/
760
761static int
762em_detach(device_t dev)
763{
764	struct adapter	*adapter = device_get_softc(dev);
765	struct ifnet	*ifp = adapter->ifp;
766
767	INIT_DEBUGOUT("em_detach: begin");
768
769	/* Make sure VLANS are not using driver */
770	if (adapter->ifp->if_vlantrunk != NULL) {
771		device_printf(dev,"Vlan in use, detach first\n");
772		return (EBUSY);
773	}
774
775#ifdef DEVICE_POLLING
776	if (ifp->if_capenable & IFCAP_POLLING)
777		ether_poll_deregister(ifp);
778#endif
779
780	if (adapter->led_dev != NULL)
781		led_destroy(adapter->led_dev);
782
783	EM_CORE_LOCK(adapter);
784	adapter->in_detach = 1;
785	em_stop(adapter);
786	EM_CORE_UNLOCK(adapter);
787	EM_CORE_LOCK_DESTROY(adapter);
788
789	e1000_phy_hw_reset(&adapter->hw);
790
791	em_release_manageability(adapter);
792	em_release_hw_control(adapter);
793
794	/* Unregister VLAN events */
795	if (adapter->vlan_attach != NULL)
796		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797	if (adapter->vlan_detach != NULL)
798		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
799
800	ether_ifdetach(adapter->ifp);
801	callout_drain(&adapter->timer);
802
803#ifdef DEV_NETMAP
804	netmap_detach(ifp);
805#endif /* DEV_NETMAP */
806
807	em_free_pci_resources(adapter);
808	bus_generic_detach(dev);
809	if_free(ifp);
810
811	em_free_transmit_structures(adapter);
812	em_free_receive_structures(adapter);
813
814	em_release_hw_control(adapter);
815	free(adapter->mta, M_DEVBUF);
816
817	return (0);
818}
819
820/*********************************************************************
821 *
822 *  Shutdown entry point
823 *
824 **********************************************************************/
825
826static int
827em_shutdown(device_t dev)
828{
829	return em_suspend(dev);
830}
831
832/*
833 * Suspend/resume device methods.
834 */
835static int
836em_suspend(device_t dev)
837{
838	struct adapter *adapter = device_get_softc(dev);
839
840	EM_CORE_LOCK(adapter);
841
842        em_release_manageability(adapter);
843	em_release_hw_control(adapter);
844	em_enable_wakeup(dev);
845
846	EM_CORE_UNLOCK(adapter);
847
848	return bus_generic_suspend(dev);
849}
850
851static int
852em_resume(device_t dev)
853{
854	struct adapter *adapter = device_get_softc(dev);
855	struct tx_ring	*txr = adapter->tx_rings;
856	struct ifnet *ifp = adapter->ifp;
857
858	EM_CORE_LOCK(adapter);
859	if (adapter->hw.mac.type == e1000_pch2lan)
860		e1000_resume_workarounds_pchlan(&adapter->hw);
861	em_init_locked(adapter);
862	em_init_manageability(adapter);
863
864	if ((ifp->if_flags & IFF_UP) &&
865	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867			EM_TX_LOCK(txr);
868#ifdef EM_MULTIQUEUE
869			if (!drbr_empty(ifp, txr->br))
870				em_mq_start_locked(ifp, txr, NULL);
871#else
872			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873				em_start_locked(ifp, txr);
874#endif
875			EM_TX_UNLOCK(txr);
876		}
877	}
878	EM_CORE_UNLOCK(adapter);
879
880	return bus_generic_resume(dev);
881}
882
883
884#ifdef EM_MULTIQUEUE
885/*********************************************************************
886 *  Multiqueue Transmit routines
887 *
888 *  em_mq_start is called by the stack to initiate a transmit.
889 *  however, if busy the driver can queue the request rather
890 *  than do an immediate send. It is this that is an advantage
891 *  in this driver, rather than also having multiple tx queues.
892 **********************************************************************/
893static int
894em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895{
896	struct adapter  *adapter = txr->adapter;
897        struct mbuf     *next;
898        int             err = 0, enq = 0;
899
900	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901	    IFF_DRV_RUNNING || adapter->link_active == 0) {
902		if (m != NULL)
903			err = drbr_enqueue(ifp, txr->br, m);
904		return (err);
905	}
906
907	enq = 0;
908	if (m == NULL) {
909		next = drbr_dequeue(ifp, txr->br);
910	} else if (drbr_needs_enqueue(ifp, txr->br)) {
911		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912			return (err);
913		next = drbr_dequeue(ifp, txr->br);
914	} else
915		next = m;
916
917	/* Process the queue */
918	while (next != NULL) {
919		if ((err = em_xmit(txr, &next)) != 0) {
920                        if (next != NULL)
921                                err = drbr_enqueue(ifp, txr->br, next);
922                        break;
923		}
924		enq++;
925		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
926		ETHER_BPF_MTAP(ifp, next);
927		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
928                        break;
929		next = drbr_dequeue(ifp, txr->br);
930	}
931
932	if (enq > 0) {
933                /* Set the watchdog */
934                txr->queue_status = EM_QUEUE_WORKING;
935		txr->watchdog_time = ticks;
936	}
937
938	if (txr->tx_avail < EM_MAX_SCATTER)
939		em_txeof(txr);
940	if (txr->tx_avail < EM_MAX_SCATTER)
941		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
942	return (err);
943}
944
945/*
946** Multiqueue capable stack interface
947*/
948static int
949em_mq_start(struct ifnet *ifp, struct mbuf *m)
950{
951	struct adapter	*adapter = ifp->if_softc;
952	struct tx_ring	*txr = adapter->tx_rings;
953	int 		error;
954
955	if (EM_TX_TRYLOCK(txr)) {
956		error = em_mq_start_locked(ifp, txr, m);
957		EM_TX_UNLOCK(txr);
958	} else
959		error = drbr_enqueue(ifp, txr->br, m);
960
961	return (error);
962}
963
964/*
965** Flush all ring buffers
966*/
967static void
968em_qflush(struct ifnet *ifp)
969{
970	struct adapter  *adapter = ifp->if_softc;
971	struct tx_ring  *txr = adapter->tx_rings;
972	struct mbuf     *m;
973
974	for (int i = 0; i < adapter->num_queues; i++, txr++) {
975		EM_TX_LOCK(txr);
976		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
977			m_freem(m);
978		EM_TX_UNLOCK(txr);
979	}
980	if_qflush(ifp);
981}
982#else  /* !EM_MULTIQUEUE */
983
984static void
985em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986{
987	struct adapter	*adapter = ifp->if_softc;
988	struct mbuf	*m_head;
989
990	EM_TX_LOCK_ASSERT(txr);
991
992	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
993	    IFF_DRV_RUNNING)
994		return;
995
996	if (!adapter->link_active)
997		return;
998
999	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1000        	/* Call cleanup if number of TX descriptors low */
1001		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002			em_txeof(txr);
1003		if (txr->tx_avail < EM_MAX_SCATTER) {
1004			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1005			break;
1006		}
1007                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1008		if (m_head == NULL)
1009			break;
1010		/*
1011		 *  Encapsulation can modify our pointer, and or make it
1012		 *  NULL on failure.  In that event, we can't requeue.
1013		 */
1014		if (em_xmit(txr, &m_head)) {
1015			if (m_head == NULL)
1016				break;
1017			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1018			break;
1019		}
1020
1021		/* Send a copy of the frame to the BPF listener */
1022		ETHER_BPF_MTAP(ifp, m_head);
1023
1024		/* Set timeout in case hardware has problems transmitting. */
1025		txr->watchdog_time = ticks;
1026                txr->queue_status = EM_QUEUE_WORKING;
1027	}
1028
1029	return;
1030}
1031
1032static void
1033em_start(struct ifnet *ifp)
1034{
1035	struct adapter	*adapter = ifp->if_softc;
1036	struct tx_ring	*txr = adapter->tx_rings;
1037
1038	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039		EM_TX_LOCK(txr);
1040		em_start_locked(ifp, txr);
1041		EM_TX_UNLOCK(txr);
1042	}
1043	return;
1044}
1045#endif /* EM_MULTIQUEUE */
1046
1047/*********************************************************************
1048 *  Ioctl entry point
1049 *
1050 *  em_ioctl is called when the user wants to configure the
1051 *  interface.
1052 *
1053 *  return 0 on success, positive on failure
1054 **********************************************************************/
1055
1056static int
1057em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1058{
1059	struct adapter	*adapter = ifp->if_softc;
1060	struct ifreq	*ifr = (struct ifreq *)data;
1061#if defined(INET) || defined(INET6)
1062	struct ifaddr	*ifa = (struct ifaddr *)data;
1063#endif
1064	bool		avoid_reset = FALSE;
1065	int		error = 0;
1066
1067	if (adapter->in_detach)
1068		return (error);
1069
1070	switch (command) {
1071	case SIOCSIFADDR:
1072#ifdef INET
1073		if (ifa->ifa_addr->sa_family == AF_INET)
1074			avoid_reset = TRUE;
1075#endif
1076#ifdef INET6
1077		if (ifa->ifa_addr->sa_family == AF_INET6)
1078			avoid_reset = TRUE;
1079#endif
1080		/*
1081		** Calling init results in link renegotiation,
1082		** so we avoid doing it when possible.
1083		*/
1084		if (avoid_reset) {
1085			ifp->if_flags |= IFF_UP;
1086			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1087				em_init(adapter);
1088#ifdef INET
1089			if (!(ifp->if_flags & IFF_NOARP))
1090				arp_ifinit(ifp, ifa);
1091#endif
1092		} else
1093			error = ether_ioctl(ifp, command, data);
1094		break;
1095	case SIOCSIFMTU:
1096	    {
1097		int max_frame_size;
1098
1099		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1100
1101		EM_CORE_LOCK(adapter);
1102		switch (adapter->hw.mac.type) {
1103		case e1000_82571:
1104		case e1000_82572:
1105		case e1000_ich9lan:
1106		case e1000_ich10lan:
1107		case e1000_pch2lan:
1108		case e1000_82574:
1109		case e1000_82583:
1110		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1111			max_frame_size = 9234;
1112			break;
1113		case e1000_pchlan:
1114			max_frame_size = 4096;
1115			break;
1116			/* Adapters that do not support jumbo frames */
1117		case e1000_ich8lan:
1118			max_frame_size = ETHER_MAX_LEN;
1119			break;
1120		default:
1121			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1122		}
1123		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124		    ETHER_CRC_LEN) {
1125			EM_CORE_UNLOCK(adapter);
1126			error = EINVAL;
1127			break;
1128		}
1129
1130		ifp->if_mtu = ifr->ifr_mtu;
1131		adapter->max_frame_size =
1132		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133		em_init_locked(adapter);
1134		EM_CORE_UNLOCK(adapter);
1135		break;
1136	    }
1137	case SIOCSIFFLAGS:
1138		IOCTL_DEBUGOUT("ioctl rcv'd:\
1139		    SIOCSIFFLAGS (Set Interface Flags)");
1140		EM_CORE_LOCK(adapter);
1141		if (ifp->if_flags & IFF_UP) {
1142			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143				if ((ifp->if_flags ^ adapter->if_flags) &
1144				    (IFF_PROMISC | IFF_ALLMULTI)) {
1145					em_disable_promisc(adapter);
1146					em_set_promisc(adapter);
1147				}
1148			} else
1149				em_init_locked(adapter);
1150		} else
1151			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152				em_stop(adapter);
1153		adapter->if_flags = ifp->if_flags;
1154		EM_CORE_UNLOCK(adapter);
1155		break;
1156	case SIOCADDMULTI:
1157	case SIOCDELMULTI:
1158		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160			EM_CORE_LOCK(adapter);
1161			em_disable_intr(adapter);
1162			em_set_multi(adapter);
1163#ifdef DEVICE_POLLING
1164			if (!(ifp->if_capenable & IFCAP_POLLING))
1165#endif
1166				em_enable_intr(adapter);
1167			EM_CORE_UNLOCK(adapter);
1168		}
1169		break;
1170	case SIOCSIFMEDIA:
1171		/* Check SOL/IDER usage */
1172		EM_CORE_LOCK(adapter);
1173		if (e1000_check_reset_block(&adapter->hw)) {
1174			EM_CORE_UNLOCK(adapter);
1175			device_printf(adapter->dev, "Media change is"
1176			    " blocked due to SOL/IDER session.\n");
1177			break;
1178		}
1179		EM_CORE_UNLOCK(adapter);
1180		/* falls thru */
1181	case SIOCGIFMEDIA:
1182		IOCTL_DEBUGOUT("ioctl rcv'd: \
1183		    SIOCxIFMEDIA (Get/Set Interface Media)");
1184		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185		break;
1186	case SIOCSIFCAP:
1187	    {
1188		int mask, reinit;
1189
1190		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191		reinit = 0;
1192		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193#ifdef DEVICE_POLLING
1194		if (mask & IFCAP_POLLING) {
1195			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196				error = ether_poll_register(em_poll, ifp);
1197				if (error)
1198					return (error);
1199				EM_CORE_LOCK(adapter);
1200				em_disable_intr(adapter);
1201				ifp->if_capenable |= IFCAP_POLLING;
1202				EM_CORE_UNLOCK(adapter);
1203			} else {
1204				error = ether_poll_deregister(ifp);
1205				/* Enable interrupt even in error case */
1206				EM_CORE_LOCK(adapter);
1207				em_enable_intr(adapter);
1208				ifp->if_capenable &= ~IFCAP_POLLING;
1209				EM_CORE_UNLOCK(adapter);
1210			}
1211		}
1212#endif
1213		if (mask & IFCAP_HWCSUM) {
1214			ifp->if_capenable ^= IFCAP_HWCSUM;
1215			reinit = 1;
1216		}
1217		if (mask & IFCAP_TSO4) {
1218			ifp->if_capenable ^= IFCAP_TSO4;
1219			reinit = 1;
1220		}
1221		if (mask & IFCAP_VLAN_HWTAGGING) {
1222			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223			reinit = 1;
1224		}
1225		if (mask & IFCAP_VLAN_HWFILTER) {
1226			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227			reinit = 1;
1228		}
1229		if (mask & IFCAP_VLAN_HWTSO) {
1230			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231			reinit = 1;
1232		}
1233		if ((mask & IFCAP_WOL) &&
1234		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1235			if (mask & IFCAP_WOL_MCAST)
1236				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1237			if (mask & IFCAP_WOL_MAGIC)
1238				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1239		}
1240		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1241			em_init(adapter);
1242		VLAN_CAPABILITIES(ifp);
1243		break;
1244	    }
1245
1246	default:
1247		error = ether_ioctl(ifp, command, data);
1248		break;
1249	}
1250
1251	return (error);
1252}
1253
1254
1255/*********************************************************************
1256 *  Init entry point
1257 *
1258 *  This routine is used in two ways. It is used by the stack as
1259 *  init entry point in network interface structure. It is also used
1260 *  by the driver as a hw/sw initialization routine to get to a
1261 *  consistent state.
1262 *
1263 *  return 0 on success, positive on failure
1264 **********************************************************************/
1265
1266static void
1267em_init_locked(struct adapter *adapter)
1268{
1269	struct ifnet	*ifp = adapter->ifp;
1270	device_t	dev = adapter->dev;
1271
1272	INIT_DEBUGOUT("em_init: begin");
1273
1274	EM_CORE_LOCK_ASSERT(adapter);
1275
1276	em_disable_intr(adapter);
1277	callout_stop(&adapter->timer);
1278
1279	/* Get the latest mac address, User can use a LAA */
1280        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1281              ETHER_ADDR_LEN);
1282
1283	/* Put the address into the Receive Address Array */
1284	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1285
1286	/*
1287	 * With the 82571 adapter, RAR[0] may be overwritten
1288	 * when the other port is reset, we make a duplicate
1289	 * in RAR[14] for that eventuality, this assures
1290	 * the interface continues to function.
1291	 */
1292	if (adapter->hw.mac.type == e1000_82571) {
1293		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1294		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1295		    E1000_RAR_ENTRIES - 1);
1296	}
1297
1298	/* Initialize the hardware */
1299	em_reset(adapter);
1300	em_update_link_status(adapter);
1301
1302	/* Setup VLAN support, basic and offload if available */
1303	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1304
1305	/* Set hardware offload abilities */
1306	ifp->if_hwassist = 0;
1307	if (ifp->if_capenable & IFCAP_TXCSUM)
1308		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1309	if (ifp->if_capenable & IFCAP_TSO4)
1310		ifp->if_hwassist |= CSUM_TSO;
1311
1312	/* Configure for OS presence */
1313	em_init_manageability(adapter);
1314
1315	/* Prepare transmit descriptors and buffers */
1316	em_setup_transmit_structures(adapter);
1317	em_initialize_transmit_unit(adapter);
1318
1319	/* Setup Multicast table */
1320	em_set_multi(adapter);
1321
1322	/*
1323	** Figure out the desired mbuf
1324	** pool for doing jumbos
1325	*/
1326	if (adapter->max_frame_size <= 2048)
1327		adapter->rx_mbuf_sz = MCLBYTES;
1328	else if (adapter->max_frame_size <= 4096)
1329		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330	else
1331		adapter->rx_mbuf_sz = MJUM9BYTES;
1332
1333	/* Prepare receive descriptors and buffers */
1334	if (em_setup_receive_structures(adapter)) {
1335		device_printf(dev, "Could not setup receive structures\n");
1336		em_stop(adapter);
1337		return;
1338	}
1339	em_initialize_receive_unit(adapter);
1340
1341	/* Use real VLAN Filter support? */
1342	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1343		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1344			/* Use real VLAN Filter support */
1345			em_setup_vlan_hw_support(adapter);
1346		else {
1347			u32 ctrl;
1348			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1349			ctrl |= E1000_CTRL_VME;
1350			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1351		}
1352	}
1353
1354	/* Don't lose promiscuous settings */
1355	em_set_promisc(adapter);
1356
1357	/* Set the interface as ACTIVE */
1358	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1362	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364	/* MSI/X configuration for 82574 */
1365	if (adapter->hw.mac.type == e1000_82574) {
1366		int tmp;
1367		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1368		tmp |= E1000_CTRL_EXT_PBA_CLR;
1369		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1370		/* Set the IVAR - interrupt vector routing. */
1371		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1372	}
1373
1374#ifdef DEVICE_POLLING
1375	/*
1376	 * Only enable interrupts if we are not polling, make sure
1377	 * they are off otherwise.
1378	 */
1379	if (ifp->if_capenable & IFCAP_POLLING)
1380		em_disable_intr(adapter);
1381	else
1382#endif /* DEVICE_POLLING */
1383		em_enable_intr(adapter);
1384
1385	/* AMT based hardware can now take control from firmware */
1386	if (adapter->has_manage && adapter->has_amt)
1387		em_get_hw_control(adapter);
1388}
1389
1390static void
1391em_init(void *arg)
1392{
1393	struct adapter *adapter = arg;
1394
1395	EM_CORE_LOCK(adapter);
1396	em_init_locked(adapter);
1397	EM_CORE_UNLOCK(adapter);
1398}
1399
1400
1401#ifdef DEVICE_POLLING
1402/*********************************************************************
1403 *
1404 *  Legacy polling routine: note this only works with single queue
1405 *
1406 *********************************************************************/
1407static int
1408em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1409{
1410	struct adapter *adapter = ifp->if_softc;
1411	struct tx_ring	*txr = adapter->tx_rings;
1412	struct rx_ring	*rxr = adapter->rx_rings;
1413	u32		reg_icr;
1414	int		rx_done;
1415
1416	EM_CORE_LOCK(adapter);
1417	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1418		EM_CORE_UNLOCK(adapter);
1419		return (0);
1420	}
1421
1422	if (cmd == POLL_AND_CHECK_STATUS) {
1423		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1424		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1425			callout_stop(&adapter->timer);
1426			adapter->hw.mac.get_link_status = 1;
1427			em_update_link_status(adapter);
1428			callout_reset(&adapter->timer, hz,
1429			    em_local_timer, adapter);
1430		}
1431	}
1432	EM_CORE_UNLOCK(adapter);
1433
1434	em_rxeof(rxr, count, &rx_done);
1435
1436	EM_TX_LOCK(txr);
1437	em_txeof(txr);
1438#ifdef EM_MULTIQUEUE
1439	if (!drbr_empty(ifp, txr->br))
1440		em_mq_start_locked(ifp, txr, NULL);
1441#else
1442	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443		em_start_locked(ifp, txr);
1444#endif
1445	EM_TX_UNLOCK(txr);
1446
1447	return (rx_done);
1448}
1449#endif /* DEVICE_POLLING */
1450
1451
1452/*********************************************************************
1453 *
1454 *  Fast Legacy/MSI Combined Interrupt Service routine
1455 *
1456 *********************************************************************/
1457static int
1458em_irq_fast(void *arg)
1459{
1460	struct adapter	*adapter = arg;
1461	struct ifnet	*ifp;
1462	u32		reg_icr;
1463
1464	ifp = adapter->ifp;
1465
1466	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467
1468	/* Hot eject?  */
1469	if (reg_icr == 0xffffffff)
1470		return FILTER_STRAY;
1471
1472	/* Definitely not our interrupt.  */
1473	if (reg_icr == 0x0)
1474		return FILTER_STRAY;
1475
1476	/*
1477	 * Starting with the 82571 chip, bit 31 should be used to
1478	 * determine whether the interrupt belongs to us.
1479	 */
1480	if (adapter->hw.mac.type >= e1000_82571 &&
1481	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482		return FILTER_STRAY;
1483
1484	em_disable_intr(adapter);
1485	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1486
1487	/* Link status change */
1488	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1489		adapter->hw.mac.get_link_status = 1;
1490		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1491	}
1492
1493	if (reg_icr & E1000_ICR_RXO)
1494		adapter->rx_overruns++;
1495	return FILTER_HANDLED;
1496}
1497
1498/* Combined RX/TX handler, used by Legacy and MSI */
1499static void
1500em_handle_que(void *context, int pending)
1501{
1502	struct adapter	*adapter = context;
1503	struct ifnet	*ifp = adapter->ifp;
1504	struct tx_ring	*txr = adapter->tx_rings;
1505	struct rx_ring	*rxr = adapter->rx_rings;
1506
1507
1508	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1509		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1510		EM_TX_LOCK(txr);
1511		em_txeof(txr);
1512#ifdef EM_MULTIQUEUE
1513		if (!drbr_empty(ifp, txr->br))
1514			em_mq_start_locked(ifp, txr, NULL);
1515#else
1516		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1517			em_start_locked(ifp, txr);
1518#endif
1519		EM_TX_UNLOCK(txr);
1520		if (more) {
1521			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1522			return;
1523		}
1524	}
1525
1526	em_enable_intr(adapter);
1527	return;
1528}
1529
1530
1531/*********************************************************************
1532 *
1533 *  MSIX Interrupt Service Routines
1534 *
1535 **********************************************************************/
1536static void
1537em_msix_tx(void *arg)
1538{
1539	struct tx_ring *txr = arg;
1540	struct adapter *adapter = txr->adapter;
1541	struct ifnet	*ifp = adapter->ifp;
1542
1543	++txr->tx_irq;
1544	EM_TX_LOCK(txr);
1545	em_txeof(txr);
1546#ifdef EM_MULTIQUEUE
1547	if (!drbr_empty(ifp, txr->br))
1548		em_mq_start_locked(ifp, txr, NULL);
1549#else
1550	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1551		em_start_locked(ifp, txr);
1552#endif
1553	/* Reenable this interrupt */
1554	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1555	EM_TX_UNLOCK(txr);
1556	return;
1557}
1558
1559/*********************************************************************
1560 *
1561 *  MSIX RX Interrupt Service routine
1562 *
1563 **********************************************************************/
1564
1565static void
1566em_msix_rx(void *arg)
1567{
1568	struct rx_ring	*rxr = arg;
1569	struct adapter	*adapter = rxr->adapter;
1570	bool		more;
1571
1572	++rxr->rx_irq;
1573	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1574	if (more)
1575		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1576	else
1577		/* Reenable this interrupt */
1578		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1579	return;
1580}
1581
1582/*********************************************************************
1583 *
1584 *  MSIX Link Fast Interrupt Service routine
1585 *
1586 **********************************************************************/
1587static void
1588em_msix_link(void *arg)
1589{
1590	struct adapter	*adapter = arg;
1591	u32		reg_icr;
1592
1593	++adapter->link_irq;
1594	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1595
1596	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1597		adapter->hw.mac.get_link_status = 1;
1598		em_handle_link(adapter, 0);
1599	} else
1600		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1601		    EM_MSIX_LINK | E1000_IMS_LSC);
1602	return;
1603}
1604
1605static void
1606em_handle_rx(void *context, int pending)
1607{
1608	struct rx_ring	*rxr = context;
1609	struct adapter	*adapter = rxr->adapter;
1610        bool            more;
1611
1612	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1613	if (more)
1614		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1615	else
1616		/* Reenable this interrupt */
1617		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1618}
1619
1620static void
1621em_handle_tx(void *context, int pending)
1622{
1623	struct tx_ring	*txr = context;
1624	struct adapter	*adapter = txr->adapter;
1625	struct ifnet	*ifp = adapter->ifp;
1626
1627	EM_TX_LOCK(txr);
1628	em_txeof(txr);
1629#ifdef EM_MULTIQUEUE
1630	if (!drbr_empty(ifp, txr->br))
1631		em_mq_start_locked(ifp, txr, NULL);
1632#else
1633	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1634		em_start_locked(ifp, txr);
1635#endif
1636	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1637	EM_TX_UNLOCK(txr);
1638}
1639
1640static void
1641em_handle_link(void *context, int pending)
1642{
1643	struct adapter	*adapter = context;
1644	struct tx_ring	*txr = adapter->tx_rings;
1645	struct ifnet *ifp = adapter->ifp;
1646
1647	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1648		return;
1649
1650	EM_CORE_LOCK(adapter);
1651	callout_stop(&adapter->timer);
1652	em_update_link_status(adapter);
1653	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1654	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1655	    EM_MSIX_LINK | E1000_IMS_LSC);
1656	if (adapter->link_active) {
1657		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1658			EM_TX_LOCK(txr);
1659#ifdef EM_MULTIQUEUE
1660			if (!drbr_empty(ifp, txr->br))
1661				em_mq_start_locked(ifp, txr, NULL);
1662#else
1663			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1664				em_start_locked(ifp, txr);
1665#endif
1666			EM_TX_UNLOCK(txr);
1667		}
1668	}
1669	EM_CORE_UNLOCK(adapter);
1670}
1671
1672
1673/*********************************************************************
1674 *
1675 *  Media Ioctl callback
1676 *
1677 *  This routine is called whenever the user queries the status of
1678 *  the interface using ifconfig.
1679 *
1680 **********************************************************************/
1681static void
1682em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1683{
1684	struct adapter *adapter = ifp->if_softc;
1685	u_char fiber_type = IFM_1000_SX;
1686
1687	INIT_DEBUGOUT("em_media_status: begin");
1688
1689	EM_CORE_LOCK(adapter);
1690	em_update_link_status(adapter);
1691
1692	ifmr->ifm_status = IFM_AVALID;
1693	ifmr->ifm_active = IFM_ETHER;
1694
1695	if (!adapter->link_active) {
1696		EM_CORE_UNLOCK(adapter);
1697		return;
1698	}
1699
1700	ifmr->ifm_status |= IFM_ACTIVE;
1701
1702	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1703	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1704		ifmr->ifm_active |= fiber_type | IFM_FDX;
1705	} else {
1706		switch (adapter->link_speed) {
1707		case 10:
1708			ifmr->ifm_active |= IFM_10_T;
1709			break;
1710		case 100:
1711			ifmr->ifm_active |= IFM_100_TX;
1712			break;
1713		case 1000:
1714			ifmr->ifm_active |= IFM_1000_T;
1715			break;
1716		}
1717		if (adapter->link_duplex == FULL_DUPLEX)
1718			ifmr->ifm_active |= IFM_FDX;
1719		else
1720			ifmr->ifm_active |= IFM_HDX;
1721	}
1722	EM_CORE_UNLOCK(adapter);
1723}
1724
1725/*********************************************************************
1726 *
1727 *  Media Ioctl callback
1728 *
1729 *  This routine is called when the user changes speed/duplex using
1730 *  media/mediopt option with ifconfig.
1731 *
1732 **********************************************************************/
1733static int
1734em_media_change(struct ifnet *ifp)
1735{
1736	struct adapter *adapter = ifp->if_softc;
1737	struct ifmedia  *ifm = &adapter->media;
1738
1739	INIT_DEBUGOUT("em_media_change: begin");
1740
1741	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1742		return (EINVAL);
1743
1744	EM_CORE_LOCK(adapter);
1745	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1746	case IFM_AUTO:
1747		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1748		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1749		break;
1750	case IFM_1000_LX:
1751	case IFM_1000_SX:
1752	case IFM_1000_T:
1753		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1754		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1755		break;
1756	case IFM_100_TX:
1757		adapter->hw.mac.autoneg = FALSE;
1758		adapter->hw.phy.autoneg_advertised = 0;
1759		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1760			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1761		else
1762			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1763		break;
1764	case IFM_10_T:
1765		adapter->hw.mac.autoneg = FALSE;
1766		adapter->hw.phy.autoneg_advertised = 0;
1767		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1768			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1769		else
1770			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1771		break;
1772	default:
1773		device_printf(adapter->dev, "Unsupported media type\n");
1774	}
1775
1776	em_init_locked(adapter);
1777	EM_CORE_UNLOCK(adapter);
1778
1779	return (0);
1780}
1781
1782/*********************************************************************
1783 *
1784 *  This routine maps the mbufs to tx descriptors.
1785 *
1786 *  return 0 on success, positive on failure
1787 **********************************************************************/
1788
1789static int
1790em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1791{
1792	struct adapter		*adapter = txr->adapter;
1793	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1794	bus_dmamap_t		map;
1795	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1796	struct e1000_tx_desc	*ctxd = NULL;
1797	struct mbuf		*m_head;
1798	struct ether_header	*eh;
1799	struct ip		*ip = NULL;
1800	struct tcphdr		*tp = NULL;
1801	u32			txd_upper, txd_lower, txd_used, txd_saved;
1802	int			ip_off, poff;
1803	int			nsegs, i, j, first, last = 0;
1804	int			error, do_tso, tso_desc = 0, remap = 1;
1805
1806retry:
1807	m_head = *m_headp;
1808	txd_upper = txd_lower = txd_used = txd_saved = 0;
1809	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1810	ip_off = poff = 0;
1811
1812	/*
1813	 * Intel recommends entire IP/TCP header length reside in a single
1814	 * buffer. If multiple descriptors are used to describe the IP and
1815	 * TCP header, each descriptor should describe one or more
1816	 * complete headers; descriptors referencing only parts of headers
1817	 * are not supported. If all layer headers are not coalesced into
1818	 * a single buffer, each buffer should not cross a 4KB boundary,
1819	 * or be larger than the maximum read request size.
1820	 * Controller also requires modifing IP/TCP header to make TSO work
1821	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1822	 * IP/TCP header into a single buffer to meet the requirement of
1823	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1824	 * which also has similiar restrictions.
1825	 */
1826	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1827		if (do_tso || (m_head->m_next != NULL &&
1828		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1829			if (M_WRITABLE(*m_headp) == 0) {
1830				m_head = m_dup(*m_headp, M_DONTWAIT);
1831				m_freem(*m_headp);
1832				if (m_head == NULL) {
1833					*m_headp = NULL;
1834					return (ENOBUFS);
1835				}
1836				*m_headp = m_head;
1837			}
1838		}
1839		/*
1840		 * XXX
1841		 * Assume IPv4, we don't have TSO/checksum offload support
1842		 * for IPv6 yet.
1843		 */
1844		ip_off = sizeof(struct ether_header);
1845		m_head = m_pullup(m_head, ip_off);
1846		if (m_head == NULL) {
1847			*m_headp = NULL;
1848			return (ENOBUFS);
1849		}
1850		eh = mtod(m_head, struct ether_header *);
1851		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1852			ip_off = sizeof(struct ether_vlan_header);
1853			m_head = m_pullup(m_head, ip_off);
1854			if (m_head == NULL) {
1855				*m_headp = NULL;
1856				return (ENOBUFS);
1857			}
1858		}
1859		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1860		if (m_head == NULL) {
1861			*m_headp = NULL;
1862			return (ENOBUFS);
1863		}
1864		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1865		poff = ip_off + (ip->ip_hl << 2);
1866		if (do_tso) {
1867			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1868			if (m_head == NULL) {
1869				*m_headp = NULL;
1870				return (ENOBUFS);
1871			}
1872			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1873			/*
1874			 * TSO workaround:
1875			 *   pull 4 more bytes of data into it.
1876			 */
1877			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1878			if (m_head == NULL) {
1879				*m_headp = NULL;
1880				return (ENOBUFS);
1881			}
1882			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1883			ip->ip_len = 0;
1884			ip->ip_sum = 0;
1885			/*
1886			 * The pseudo TCP checksum does not include TCP payload
1887			 * length so driver should recompute the checksum here
1888			 * what hardware expect to see. This is adherence of
1889			 * Microsoft's Large Send specification.
1890			 */
1891			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1893			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1894		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1895			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1896			if (m_head == NULL) {
1897				*m_headp = NULL;
1898				return (ENOBUFS);
1899			}
1900			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1901			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1902			if (m_head == NULL) {
1903				*m_headp = NULL;
1904				return (ENOBUFS);
1905			}
1906			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1907			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1908		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1909			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1910			if (m_head == NULL) {
1911				*m_headp = NULL;
1912				return (ENOBUFS);
1913			}
1914			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1915		}
1916		*m_headp = m_head;
1917	}
1918
1919	/*
1920	 * Map the packet for DMA
1921	 *
1922	 * Capture the first descriptor index,
1923	 * this descriptor will have the index
1924	 * of the EOP which is the only one that
1925	 * now gets a DONE bit writeback.
1926	 */
1927	first = txr->next_avail_desc;
1928	tx_buffer = &txr->tx_buffers[first];
1929	tx_buffer_mapped = tx_buffer;
1930	map = tx_buffer->map;
1931
1932	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1933	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1934
1935	/*
1936	 * There are two types of errors we can (try) to handle:
1937	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1938	 *   out of segments.  Defragment the mbuf chain and try again.
1939	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1940	 *   at this point in time.  Defer sending and try again later.
1941	 * All other errors, in particular EINVAL, are fatal and prevent the
1942	 * mbuf chain from ever going through.  Drop it and report error.
1943	 */
1944	if (error == EFBIG && remap) {
1945		struct mbuf *m;
1946
1947		m = m_defrag(*m_headp, M_DONTWAIT);
1948		if (m == NULL) {
1949			adapter->mbuf_alloc_failed++;
1950			m_freem(*m_headp);
1951			*m_headp = NULL;
1952			return (ENOBUFS);
1953		}
1954		*m_headp = m;
1955
1956		/* Try it again, but only once */
1957		remap = 0;
1958		goto retry;
1959	} else if (error == ENOMEM) {
1960		adapter->no_tx_dma_setup++;
1961		return (error);
1962	} else if (error != 0) {
1963		adapter->no_tx_dma_setup++;
1964		m_freem(*m_headp);
1965		*m_headp = NULL;
1966		return (error);
1967	}
1968
1969	/*
1970	 * TSO Hardware workaround, if this packet is not
1971	 * TSO, and is only a single descriptor long, and
1972	 * it follows a TSO burst, then we need to add a
1973	 * sentinel descriptor to prevent premature writeback.
1974	 */
1975	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1976		if (nsegs == 1)
1977			tso_desc = TRUE;
1978		txr->tx_tso = FALSE;
1979	}
1980
1981        if (nsegs > (txr->tx_avail - 2)) {
1982                txr->no_desc_avail++;
1983		bus_dmamap_unload(txr->txtag, map);
1984		return (ENOBUFS);
1985        }
1986	m_head = *m_headp;
1987
1988	/* Do hardware assists */
1989	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1990		em_tso_setup(txr, m_head, ip_off, ip, tp,
1991		    &txd_upper, &txd_lower);
1992		/* we need to make a final sentinel transmit desc */
1993		tso_desc = TRUE;
1994	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1995		em_transmit_checksum_setup(txr, m_head,
1996		    ip_off, ip, &txd_upper, &txd_lower);
1997
1998	if (m_head->m_flags & M_VLANTAG) {
1999		/* Set the vlan id. */
2000		txd_upper |=
2001		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2002                /* Tell hardware to add tag */
2003                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2004        }
2005
2006	i = txr->next_avail_desc;
2007
2008	/* Set up our transmit descriptors */
2009	for (j = 0; j < nsegs; j++) {
2010		bus_size_t seg_len;
2011		bus_addr_t seg_addr;
2012
2013		tx_buffer = &txr->tx_buffers[i];
2014		ctxd = &txr->tx_base[i];
2015		seg_addr = segs[j].ds_addr;
2016		seg_len  = segs[j].ds_len;
2017		/*
2018		** TSO Workaround:
2019		** If this is the last descriptor, we want to
2020		** split it so we have a small final sentinel
2021		*/
2022		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2023			seg_len -= 4;
2024			ctxd->buffer_addr = htole64(seg_addr);
2025			ctxd->lower.data = htole32(
2026			adapter->txd_cmd | txd_lower | seg_len);
2027			ctxd->upper.data =
2028			    htole32(txd_upper);
2029			if (++i == adapter->num_tx_desc)
2030				i = 0;
2031			/* Now make the sentinel */
2032			++txd_used; /* using an extra txd */
2033			ctxd = &txr->tx_base[i];
2034			tx_buffer = &txr->tx_buffers[i];
2035			ctxd->buffer_addr =
2036			    htole64(seg_addr + seg_len);
2037			ctxd->lower.data = htole32(
2038			adapter->txd_cmd | txd_lower | 4);
2039			ctxd->upper.data =
2040			    htole32(txd_upper);
2041			last = i;
2042			if (++i == adapter->num_tx_desc)
2043				i = 0;
2044		} else {
2045			ctxd->buffer_addr = htole64(seg_addr);
2046			ctxd->lower.data = htole32(
2047			adapter->txd_cmd | txd_lower | seg_len);
2048			ctxd->upper.data =
2049			    htole32(txd_upper);
2050			last = i;
2051			if (++i == adapter->num_tx_desc)
2052				i = 0;
2053		}
2054		tx_buffer->m_head = NULL;
2055		tx_buffer->next_eop = -1;
2056	}
2057
2058	txr->next_avail_desc = i;
2059	txr->tx_avail -= nsegs;
2060	if (tso_desc) /* TSO used an extra for sentinel */
2061		txr->tx_avail -= txd_used;
2062
2063        tx_buffer->m_head = m_head;
2064	/*
2065	** Here we swap the map so the last descriptor,
2066	** which gets the completion interrupt has the
2067	** real map, and the first descriptor gets the
2068	** unused map from this descriptor.
2069	*/
2070	tx_buffer_mapped->map = tx_buffer->map;
2071	tx_buffer->map = map;
2072        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2073
2074        /*
2075         * Last Descriptor of Packet
2076	 * needs End Of Packet (EOP)
2077	 * and Report Status (RS)
2078         */
2079        ctxd->lower.data |=
2080	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2081	/*
2082	 * Keep track in the first buffer which
2083	 * descriptor will be written back
2084	 */
2085	tx_buffer = &txr->tx_buffers[first];
2086	tx_buffer->next_eop = last;
2087	/* Update the watchdog time early and often */
2088	txr->watchdog_time = ticks;
2089
2090	/*
2091	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2092	 * that this frame is available to transmit.
2093	 */
2094	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2095	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2096	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2097
2098	return (0);
2099}
2100
2101static void
2102em_set_promisc(struct adapter *adapter)
2103{
2104	struct ifnet	*ifp = adapter->ifp;
2105	u32		reg_rctl;
2106
2107	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2108
2109	if (ifp->if_flags & IFF_PROMISC) {
2110		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2111		/* Turn this on if you want to see bad packets */
2112		if (em_debug_sbp)
2113			reg_rctl |= E1000_RCTL_SBP;
2114		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115	} else if (ifp->if_flags & IFF_ALLMULTI) {
2116		reg_rctl |= E1000_RCTL_MPE;
2117		reg_rctl &= ~E1000_RCTL_UPE;
2118		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2119	}
2120}
2121
2122static void
2123em_disable_promisc(struct adapter *adapter)
2124{
2125	u32	reg_rctl;
2126
2127	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129	reg_rctl &=  (~E1000_RCTL_UPE);
2130	reg_rctl &=  (~E1000_RCTL_MPE);
2131	reg_rctl &=  (~E1000_RCTL_SBP);
2132	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133}
2134
2135
2136/*********************************************************************
2137 *  Multicast Update
2138 *
2139 *  This routine is called whenever multicast address list is updated.
2140 *
2141 **********************************************************************/
2142
2143static void
2144em_set_multi(struct adapter *adapter)
2145{
2146	struct ifnet	*ifp = adapter->ifp;
2147	struct ifmultiaddr *ifma;
2148	u32 reg_rctl = 0;
2149	u8  *mta; /* Multicast array memory */
2150	int mcnt = 0;
2151
2152	IOCTL_DEBUGOUT("em_set_multi: begin");
2153
2154	mta = adapter->mta;
2155	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2156
2157	if (adapter->hw.mac.type == e1000_82542 &&
2158	    adapter->hw.revision_id == E1000_REVISION_2) {
2159		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2160		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2161			e1000_pci_clear_mwi(&adapter->hw);
2162		reg_rctl |= E1000_RCTL_RST;
2163		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164		msec_delay(5);
2165	}
2166
2167#if __FreeBSD_version < 800000
2168	IF_ADDR_LOCK(ifp);
2169#else
2170	if_maddr_rlock(ifp);
2171#endif
2172	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2173		if (ifma->ifma_addr->sa_family != AF_LINK)
2174			continue;
2175
2176		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2177			break;
2178
2179		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2180		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2181		mcnt++;
2182	}
2183#if __FreeBSD_version < 800000
2184	IF_ADDR_UNLOCK(ifp);
2185#else
2186	if_maddr_runlock(ifp);
2187#endif
2188	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2189		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2190		reg_rctl |= E1000_RCTL_MPE;
2191		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2192	} else
2193		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2194
2195	if (adapter->hw.mac.type == e1000_82542 &&
2196	    adapter->hw.revision_id == E1000_REVISION_2) {
2197		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2198		reg_rctl &= ~E1000_RCTL_RST;
2199		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2200		msec_delay(5);
2201		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2202			e1000_pci_set_mwi(&adapter->hw);
2203	}
2204}
2205
2206
2207/*********************************************************************
2208 *  Timer routine
2209 *
2210 *  This routine checks for link status and updates statistics.
2211 *
2212 **********************************************************************/
2213
2214static void
2215em_local_timer(void *arg)
2216{
2217	struct adapter	*adapter = arg;
2218	struct ifnet	*ifp = adapter->ifp;
2219	struct tx_ring	*txr = adapter->tx_rings;
2220	struct rx_ring	*rxr = adapter->rx_rings;
2221	u32		trigger;
2222
2223	EM_CORE_LOCK_ASSERT(adapter);
2224
2225	em_update_link_status(adapter);
2226	em_update_stats_counters(adapter);
2227
2228	/* Reset LAA into RAR[0] on 82571 */
2229	if ((adapter->hw.mac.type == e1000_82571) &&
2230	    e1000_get_laa_state_82571(&adapter->hw))
2231		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2232
2233	/* Mask to use in the irq trigger */
2234	if (adapter->msix_mem)
2235		trigger = rxr->ims; /* RX for 82574 */
2236	else
2237		trigger = E1000_ICS_RXDMT0;
2238
2239	/*
2240	** Check on the state of the TX queue(s), this
2241	** can be done without the lock because its RO
2242	** and the HUNG state will be static if set.
2243	*/
2244	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2245		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2246		    (adapter->pause_frames == 0))
2247			goto hung;
2248		/* Schedule a TX tasklet if needed */
2249		if (txr->tx_avail <= EM_MAX_SCATTER)
2250			taskqueue_enqueue(txr->tq, &txr->tx_task);
2251	}
2252
2253	adapter->pause_frames = 0;
2254	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2255#ifndef DEVICE_POLLING
2256	/* Trigger an RX interrupt to guarantee mbuf refresh */
2257	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2258#endif
2259	return;
2260hung:
2261	/* Looks like we're hung */
2262	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2263	device_printf(adapter->dev,
2264	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2265	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2266	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2267	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2268	    "Next TX to Clean = %d\n",
2269	    txr->me, txr->tx_avail, txr->next_to_clean);
2270	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2271	adapter->watchdog_events++;
2272	adapter->pause_frames = 0;
2273	em_init_locked(adapter);
2274}
2275
2276
2277static void
2278em_update_link_status(struct adapter *adapter)
2279{
2280	struct e1000_hw *hw = &adapter->hw;
2281	struct ifnet *ifp = adapter->ifp;
2282	device_t dev = adapter->dev;
2283	struct tx_ring *txr = adapter->tx_rings;
2284	u32 link_check = 0;
2285
2286	/* Get the cached link value or read phy for real */
2287	switch (hw->phy.media_type) {
2288	case e1000_media_type_copper:
2289		if (hw->mac.get_link_status) {
2290			/* Do the work to read phy */
2291			e1000_check_for_link(hw);
2292			link_check = !hw->mac.get_link_status;
2293			if (link_check) /* ESB2 fix */
2294				e1000_cfg_on_link_up(hw);
2295		} else
2296			link_check = TRUE;
2297		break;
2298	case e1000_media_type_fiber:
2299		e1000_check_for_link(hw);
2300		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2301                                 E1000_STATUS_LU);
2302		break;
2303	case e1000_media_type_internal_serdes:
2304		e1000_check_for_link(hw);
2305		link_check = adapter->hw.mac.serdes_has_link;
2306		break;
2307	default:
2308	case e1000_media_type_unknown:
2309		break;
2310	}
2311
2312	/* Now check for a transition */
2313	if (link_check && (adapter->link_active == 0)) {
2314		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2315		    &adapter->link_duplex);
2316		/* Check if we must disable SPEED_MODE bit on PCI-E */
2317		if ((adapter->link_speed != SPEED_1000) &&
2318		    ((hw->mac.type == e1000_82571) ||
2319		    (hw->mac.type == e1000_82572))) {
2320			int tarc0;
2321			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2322			tarc0 &= ~SPEED_MODE_BIT;
2323			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2324		}
2325		if (bootverbose)
2326			device_printf(dev, "Link is up %d Mbps %s\n",
2327			    adapter->link_speed,
2328			    ((adapter->link_duplex == FULL_DUPLEX) ?
2329			    "Full Duplex" : "Half Duplex"));
2330		adapter->link_active = 1;
2331		adapter->smartspeed = 0;
2332		ifp->if_baudrate = adapter->link_speed * 1000000;
2333		if_link_state_change(ifp, LINK_STATE_UP);
2334	} else if (!link_check && (adapter->link_active == 1)) {
2335		ifp->if_baudrate = adapter->link_speed = 0;
2336		adapter->link_duplex = 0;
2337		if (bootverbose)
2338			device_printf(dev, "Link is Down\n");
2339		adapter->link_active = 0;
2340		/* Link down, disable watchdog */
2341		for (int i = 0; i < adapter->num_queues; i++, txr++)
2342			txr->queue_status = EM_QUEUE_IDLE;
2343		if_link_state_change(ifp, LINK_STATE_DOWN);
2344	}
2345}
2346
2347/*********************************************************************
2348 *
2349 *  This routine disables all traffic on the adapter by issuing a
2350 *  global reset on the MAC and deallocates TX/RX buffers.
2351 *
2352 *  This routine should always be called with BOTH the CORE
2353 *  and TX locks.
2354 **********************************************************************/
2355
2356static void
2357em_stop(void *arg)
2358{
2359	struct adapter	*adapter = arg;
2360	struct ifnet	*ifp = adapter->ifp;
2361	struct tx_ring	*txr = adapter->tx_rings;
2362
2363	EM_CORE_LOCK_ASSERT(adapter);
2364
2365	INIT_DEBUGOUT("em_stop: begin");
2366
2367	em_disable_intr(adapter);
2368	callout_stop(&adapter->timer);
2369
2370	/* Tell the stack that the interface is no longer active */
2371	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2372	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2373
2374        /* Unarm watchdog timer. */
2375	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2376		EM_TX_LOCK(txr);
2377		txr->queue_status = EM_QUEUE_IDLE;
2378		EM_TX_UNLOCK(txr);
2379	}
2380
2381	e1000_reset_hw(&adapter->hw);
2382	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2383
2384	e1000_led_off(&adapter->hw);
2385	e1000_cleanup_led(&adapter->hw);
2386}
2387
2388
2389/*********************************************************************
2390 *
2391 *  Determine hardware revision.
2392 *
2393 **********************************************************************/
2394static void
2395em_identify_hardware(struct adapter *adapter)
2396{
2397	device_t dev = adapter->dev;
2398
2399	/* Make sure our PCI config space has the necessary stuff set */
2400	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2401	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2402	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2403		device_printf(dev, "Memory Access and/or Bus Master bits "
2404		    "were not set!\n");
2405		adapter->hw.bus.pci_cmd_word |=
2406		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2407		pci_write_config(dev, PCIR_COMMAND,
2408		    adapter->hw.bus.pci_cmd_word, 2);
2409	}
2410
2411	/* Save off the information about this board */
2412	adapter->hw.vendor_id = pci_get_vendor(dev);
2413	adapter->hw.device_id = pci_get_device(dev);
2414	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2415	adapter->hw.subsystem_vendor_id =
2416	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2417	adapter->hw.subsystem_device_id =
2418	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2419
2420	/* Do Shared Code Init and Setup */
2421	if (e1000_set_mac_type(&adapter->hw)) {
2422		device_printf(dev, "Setup init failure\n");
2423		return;
2424	}
2425}
2426
2427static int
2428em_allocate_pci_resources(struct adapter *adapter)
2429{
2430	device_t	dev = adapter->dev;
2431	int		rid;
2432
2433	rid = PCIR_BAR(0);
2434	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2435	    &rid, RF_ACTIVE);
2436	if (adapter->memory == NULL) {
2437		device_printf(dev, "Unable to allocate bus resource: memory\n");
2438		return (ENXIO);
2439	}
2440	adapter->osdep.mem_bus_space_tag =
2441	    rman_get_bustag(adapter->memory);
2442	adapter->osdep.mem_bus_space_handle =
2443	    rman_get_bushandle(adapter->memory);
2444	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2445
2446	/* Default to a single queue */
2447	adapter->num_queues = 1;
2448
2449	/*
2450	 * Setup MSI/X or MSI if PCI Express
2451	 */
2452	adapter->msix = em_setup_msix(adapter);
2453
2454	adapter->hw.back = &adapter->osdep;
2455
2456	return (0);
2457}
2458
2459/*********************************************************************
2460 *
2461 *  Setup the Legacy or MSI Interrupt handler
2462 *
2463 **********************************************************************/
2464int
2465em_allocate_legacy(struct adapter *adapter)
2466{
2467	device_t dev = adapter->dev;
2468	struct tx_ring	*txr = adapter->tx_rings;
2469	int error, rid = 0;
2470
2471	/* Manually turn off all interrupts */
2472	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2473
2474	if (adapter->msix == 1) /* using MSI */
2475		rid = 1;
2476	/* We allocate a single interrupt resource */
2477	adapter->res = bus_alloc_resource_any(dev,
2478	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2479	if (adapter->res == NULL) {
2480		device_printf(dev, "Unable to allocate bus resource: "
2481		    "interrupt\n");
2482		return (ENXIO);
2483	}
2484
2485	/*
2486	 * Allocate a fast interrupt and the associated
2487	 * deferred processing contexts.
2488	 */
2489	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2490	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2491	    taskqueue_thread_enqueue, &adapter->tq);
2492	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2493	    device_get_nameunit(adapter->dev));
2494	/* Use a TX only tasklet for local timer */
2495	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2496	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2497	    taskqueue_thread_enqueue, &txr->tq);
2498	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2499	    device_get_nameunit(adapter->dev));
2500	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2501	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2502	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2503		device_printf(dev, "Failed to register fast interrupt "
2504			    "handler: %d\n", error);
2505		taskqueue_free(adapter->tq);
2506		adapter->tq = NULL;
2507		return (error);
2508	}
2509
2510	return (0);
2511}
2512
2513/*********************************************************************
2514 *
2515 *  Setup the MSIX Interrupt handlers
2516 *   This is not really Multiqueue, rather
2517 *   its just seperate interrupt vectors
2518 *   for TX, RX, and Link.
2519 *
2520 **********************************************************************/
2521int
2522em_allocate_msix(struct adapter *adapter)
2523{
2524	device_t	dev = adapter->dev;
2525	struct		tx_ring *txr = adapter->tx_rings;
2526	struct		rx_ring *rxr = adapter->rx_rings;
2527	int		error, rid, vector = 0;
2528
2529
2530	/* Make sure all interrupts are disabled */
2531	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2532
2533	/* First set up ring resources */
2534	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2535
2536		/* RX ring */
2537		rid = vector + 1;
2538
2539		rxr->res = bus_alloc_resource_any(dev,
2540		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2541		if (rxr->res == NULL) {
2542			device_printf(dev,
2543			    "Unable to allocate bus resource: "
2544			    "RX MSIX Interrupt %d\n", i);
2545			return (ENXIO);
2546		}
2547		if ((error = bus_setup_intr(dev, rxr->res,
2548		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2549		    rxr, &rxr->tag)) != 0) {
2550			device_printf(dev, "Failed to register RX handler");
2551			return (error);
2552		}
2553#if __FreeBSD_version >= 800504
2554		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2555#endif
2556		rxr->msix = vector++; /* NOTE increment vector for TX */
2557		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2558		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2559		    taskqueue_thread_enqueue, &rxr->tq);
2560		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2561		    device_get_nameunit(adapter->dev));
2562		/*
2563		** Set the bit to enable interrupt
2564		** in E1000_IMS -- bits 20 and 21
2565		** are for RX0 and RX1, note this has
2566		** NOTHING to do with the MSIX vector
2567		*/
2568		rxr->ims = 1 << (20 + i);
2569		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2570
2571		/* TX ring */
2572		rid = vector + 1;
2573		txr->res = bus_alloc_resource_any(dev,
2574		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2575		if (txr->res == NULL) {
2576			device_printf(dev,
2577			    "Unable to allocate bus resource: "
2578			    "TX MSIX Interrupt %d\n", i);
2579			return (ENXIO);
2580		}
2581		if ((error = bus_setup_intr(dev, txr->res,
2582		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2583		    txr, &txr->tag)) != 0) {
2584			device_printf(dev, "Failed to register TX handler");
2585			return (error);
2586		}
2587#if __FreeBSD_version >= 800504
2588		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2589#endif
2590		txr->msix = vector++; /* Increment vector for next pass */
2591		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2592		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2593		    taskqueue_thread_enqueue, &txr->tq);
2594		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2595		    device_get_nameunit(adapter->dev));
2596		/*
2597		** Set the bit to enable interrupt
2598		** in E1000_IMS -- bits 22 and 23
2599		** are for TX0 and TX1, note this has
2600		** NOTHING to do with the MSIX vector
2601		*/
2602		txr->ims = 1 << (22 + i);
2603		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2604	}
2605
2606	/* Link interrupt */
2607	++rid;
2608	adapter->res = bus_alloc_resource_any(dev,
2609	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2610	if (!adapter->res) {
2611		device_printf(dev,"Unable to allocate "
2612		    "bus resource: Link interrupt [%d]\n", rid);
2613		return (ENXIO);
2614        }
2615	/* Set the link handler function */
2616	error = bus_setup_intr(dev, adapter->res,
2617	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2618	    em_msix_link, adapter, &adapter->tag);
2619	if (error) {
2620		adapter->res = NULL;
2621		device_printf(dev, "Failed to register LINK handler");
2622		return (error);
2623	}
2624#if __FreeBSD_version >= 800504
2625		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2626#endif
2627	adapter->linkvec = vector;
2628	adapter->ivars |=  (8 | vector) << 16;
2629	adapter->ivars |= 0x80000000;
2630
2631	return (0);
2632}
2633
2634
2635static void
2636em_free_pci_resources(struct adapter *adapter)
2637{
2638	device_t	dev = adapter->dev;
2639	struct tx_ring	*txr;
2640	struct rx_ring	*rxr;
2641	int		rid;
2642
2643
2644	/*
2645	** Release all the queue interrupt resources:
2646	*/
2647	for (int i = 0; i < adapter->num_queues; i++) {
2648		txr = &adapter->tx_rings[i];
2649		rxr = &adapter->rx_rings[i];
2650		/* an early abort? */
2651		if ((txr == NULL) || (rxr == NULL))
2652			break;
2653		rid = txr->msix +1;
2654		if (txr->tag != NULL) {
2655			bus_teardown_intr(dev, txr->res, txr->tag);
2656			txr->tag = NULL;
2657		}
2658		if (txr->res != NULL)
2659			bus_release_resource(dev, SYS_RES_IRQ,
2660			    rid, txr->res);
2661		rid = rxr->msix +1;
2662		if (rxr->tag != NULL) {
2663			bus_teardown_intr(dev, rxr->res, rxr->tag);
2664			rxr->tag = NULL;
2665		}
2666		if (rxr->res != NULL)
2667			bus_release_resource(dev, SYS_RES_IRQ,
2668			    rid, rxr->res);
2669	}
2670
2671        if (adapter->linkvec) /* we are doing MSIX */
2672                rid = adapter->linkvec + 1;
2673        else
2674                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2675
2676	if (adapter->tag != NULL) {
2677		bus_teardown_intr(dev, adapter->res, adapter->tag);
2678		adapter->tag = NULL;
2679	}
2680
2681	if (adapter->res != NULL)
2682		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2683
2684
2685	if (adapter->msix)
2686		pci_release_msi(dev);
2687
2688	if (adapter->msix_mem != NULL)
2689		bus_release_resource(dev, SYS_RES_MEMORY,
2690		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2691
2692	if (adapter->memory != NULL)
2693		bus_release_resource(dev, SYS_RES_MEMORY,
2694		    PCIR_BAR(0), adapter->memory);
2695
2696	if (adapter->flash != NULL)
2697		bus_release_resource(dev, SYS_RES_MEMORY,
2698		    EM_FLASH, adapter->flash);
2699}
2700
2701/*
2702 * Setup MSI or MSI/X
2703 */
2704static int
2705em_setup_msix(struct adapter *adapter)
2706{
2707	device_t dev = adapter->dev;
2708	int val = 0;
2709
2710	/*
2711	** Setup MSI/X for Hartwell: tests have shown
2712	** use of two queues to be unstable, and to
2713	** provide no great gain anyway, so we simply
2714	** seperate the interrupts and use a single queue.
2715	*/
2716	if ((adapter->hw.mac.type == e1000_82574) &&
2717	    (em_enable_msix == TRUE)) {
2718		/* Map the MSIX BAR */
2719		int rid = PCIR_BAR(EM_MSIX_BAR);
2720		adapter->msix_mem = bus_alloc_resource_any(dev,
2721		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2722       		if (!adapter->msix_mem) {
2723			/* May not be enabled */
2724               		device_printf(adapter->dev,
2725			    "Unable to map MSIX table \n");
2726			goto msi;
2727       		}
2728		val = pci_msix_count(dev);
2729		/* We only need 3 vectors */
2730		if (val > 3)
2731			val = 3;
2732		if ((val != 3) && (val != 5)) {
2733			bus_release_resource(dev, SYS_RES_MEMORY,
2734			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735			adapter->msix_mem = NULL;
2736               		device_printf(adapter->dev,
2737			    "MSIX: incorrect vectors, using MSI\n");
2738			goto msi;
2739		}
2740
2741		if (pci_alloc_msix(dev, &val) == 0) {
2742			device_printf(adapter->dev,
2743			    "Using MSIX interrupts "
2744			    "with %d vectors\n", val);
2745		}
2746
2747		return (val);
2748	}
2749msi:
2750       	val = pci_msi_count(dev);
2751       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2752               	adapter->msix = 1;
2753               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2754		return (val);
2755	}
2756	/* Should only happen due to manual configuration */
2757	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2758	return (0);
2759}
2760
2761
2762/*********************************************************************
2763 *
2764 *  Initialize the hardware to a configuration
2765 *  as specified by the adapter structure.
2766 *
2767 **********************************************************************/
2768static void
2769em_reset(struct adapter *adapter)
2770{
2771	device_t	dev = adapter->dev;
2772	struct ifnet	*ifp = adapter->ifp;
2773	struct e1000_hw	*hw = &adapter->hw;
2774	u16		rx_buffer_size;
2775	u32		pba;
2776
2777	INIT_DEBUGOUT("em_reset: begin");
2778
2779	/* Set up smart power down as default off on newer adapters. */
2780	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2781	    hw->mac.type == e1000_82572)) {
2782		u16 phy_tmp = 0;
2783
2784		/* Speed up time to link by disabling smart power down. */
2785		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2786		phy_tmp &= ~IGP02E1000_PM_SPD;
2787		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2788	}
2789
2790	/*
2791	 * Packet Buffer Allocation (PBA)
2792	 * Writing PBA sets the receive portion of the buffer
2793	 * the remainder is used for the transmit buffer.
2794	 */
2795	switch (hw->mac.type) {
2796	/* Total Packet Buffer on these is 48K */
2797	case e1000_82571:
2798	case e1000_82572:
2799	case e1000_80003es2lan:
2800			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2801		break;
2802	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2803			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2804		break;
2805	case e1000_82574:
2806	case e1000_82583:
2807			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2808		break;
2809	case e1000_ich8lan:
2810		pba = E1000_PBA_8K;
2811		break;
2812	case e1000_ich9lan:
2813	case e1000_ich10lan:
2814		/* Boost Receive side for jumbo frames */
2815		if (adapter->max_frame_size > 4096)
2816			pba = E1000_PBA_14K;
2817		else
2818			pba = E1000_PBA_10K;
2819		break;
2820	case e1000_pchlan:
2821	case e1000_pch2lan:
2822		pba = E1000_PBA_26K;
2823		break;
2824	default:
2825		if (adapter->max_frame_size > 8192)
2826			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2827		else
2828			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2829	}
2830	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2831
2832	/*
2833	 * These parameters control the automatic generation (Tx) and
2834	 * response (Rx) to Ethernet PAUSE frames.
2835	 * - High water mark should allow for at least two frames to be
2836	 *   received after sending an XOFF.
2837	 * - Low water mark works best when it is very near the high water mark.
2838	 *   This allows the receiver to restart by sending XON when it has
2839	 *   drained a bit. Here we use an arbitary value of 1500 which will
2840	 *   restart after one full frame is pulled from the buffer. There
2841	 *   could be several smaller frames in the buffer and if so they will
2842	 *   not trigger the XON until their total number reduces the buffer
2843	 *   by 1500.
2844	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2845	 */
2846	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2847	hw->fc.high_water = rx_buffer_size -
2848	    roundup2(adapter->max_frame_size, 1024);
2849	hw->fc.low_water = hw->fc.high_water - 1500;
2850
2851	if (adapter->fc) /* locally set flow control value? */
2852		hw->fc.requested_mode = adapter->fc;
2853	else
2854		hw->fc.requested_mode = e1000_fc_full;
2855
2856	if (hw->mac.type == e1000_80003es2lan)
2857		hw->fc.pause_time = 0xFFFF;
2858	else
2859		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2860
2861	hw->fc.send_xon = TRUE;
2862
2863	/* Device specific overrides/settings */
2864	switch (hw->mac.type) {
2865	case e1000_pchlan:
2866		/* Workaround: no TX flow ctrl for PCH */
2867                hw->fc.requested_mode = e1000_fc_rx_pause;
2868		hw->fc.pause_time = 0xFFFF; /* override */
2869		if (ifp->if_mtu > ETHERMTU) {
2870			hw->fc.high_water = 0x3500;
2871			hw->fc.low_water = 0x1500;
2872		} else {
2873			hw->fc.high_water = 0x5000;
2874			hw->fc.low_water = 0x3000;
2875		}
2876		hw->fc.refresh_time = 0x1000;
2877		break;
2878	case e1000_pch2lan:
2879		hw->fc.high_water = 0x5C20;
2880		hw->fc.low_water = 0x5048;
2881		hw->fc.pause_time = 0x0650;
2882		hw->fc.refresh_time = 0x0400;
2883		/* Jumbos need adjusted PBA */
2884		if (ifp->if_mtu > ETHERMTU)
2885			E1000_WRITE_REG(hw, E1000_PBA, 12);
2886		else
2887			E1000_WRITE_REG(hw, E1000_PBA, 26);
2888		break;
2889        case e1000_ich9lan:
2890        case e1000_ich10lan:
2891		if (ifp->if_mtu > ETHERMTU) {
2892			hw->fc.high_water = 0x2800;
2893			hw->fc.low_water = hw->fc.high_water - 8;
2894			break;
2895		}
2896		/* else fall thru */
2897	default:
2898		if (hw->mac.type == e1000_80003es2lan)
2899			hw->fc.pause_time = 0xFFFF;
2900		break;
2901	}
2902
2903	/* Issue a global reset */
2904	e1000_reset_hw(hw);
2905	E1000_WRITE_REG(hw, E1000_WUC, 0);
2906	em_disable_aspm(adapter);
2907	/* and a re-init */
2908	if (e1000_init_hw(hw) < 0) {
2909		device_printf(dev, "Hardware Initialization Failed\n");
2910		return;
2911	}
2912
2913	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2914	e1000_get_phy_info(hw);
2915	e1000_check_for_link(hw);
2916	return;
2917}
2918
2919/*********************************************************************
2920 *
2921 *  Setup networking device structure and register an interface.
2922 *
2923 **********************************************************************/
2924static int
2925em_setup_interface(device_t dev, struct adapter *adapter)
2926{
2927	struct ifnet   *ifp;
2928
2929	INIT_DEBUGOUT("em_setup_interface: begin");
2930
2931	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2932	if (ifp == NULL) {
2933		device_printf(dev, "can not allocate ifnet structure\n");
2934		return (-1);
2935	}
2936	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2937	ifp->if_init =  em_init;
2938	ifp->if_softc = adapter;
2939	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2940	ifp->if_ioctl = em_ioctl;
2941#ifdef EM_MULTIQUEUE
2942	/* Multiqueue stack interface */
2943	ifp->if_transmit = em_mq_start;
2944	ifp->if_qflush = em_qflush;
2945#else
2946	ifp->if_start = em_start;
2947	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2948	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2949	IFQ_SET_READY(&ifp->if_snd);
2950#endif
2951
2952	ether_ifattach(ifp, adapter->hw.mac.addr);
2953
2954	ifp->if_capabilities = ifp->if_capenable = 0;
2955
2956
2957	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2958	ifp->if_capabilities |= IFCAP_TSO4;
2959	/*
2960	 * Tell the upper layer(s) we
2961	 * support full VLAN capability
2962	 */
2963	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2964	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2965			     |  IFCAP_VLAN_HWTSO
2966			     |  IFCAP_VLAN_MTU;
2967	ifp->if_capenable = ifp->if_capabilities;
2968
2969	/*
2970	** Don't turn this on by default, if vlans are
2971	** created on another pseudo device (eg. lagg)
2972	** then vlan events are not passed thru, breaking
2973	** operation, but with HW FILTER off it works. If
2974	** using vlans directly on the em driver you can
2975	** enable this and get full hardware tag filtering.
2976	*/
2977	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2978
2979#ifdef DEVICE_POLLING
2980	ifp->if_capabilities |= IFCAP_POLLING;
2981#endif
2982
2983	/* Enable only WOL MAGIC by default */
2984	if (adapter->wol) {
2985		ifp->if_capabilities |= IFCAP_WOL;
2986		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2987	}
2988
2989	/*
2990	 * Specify the media types supported by this adapter and register
2991	 * callbacks to update media and link information
2992	 */
2993	ifmedia_init(&adapter->media, IFM_IMASK,
2994	    em_media_change, em_media_status);
2995	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2996	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2997		u_char fiber_type = IFM_1000_SX;	/* default type */
2998
2999		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3000			    0, NULL);
3001		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3002	} else {
3003		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3004		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3005			    0, NULL);
3006		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3007			    0, NULL);
3008		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3009			    0, NULL);
3010		if (adapter->hw.phy.type != e1000_phy_ife) {
3011			ifmedia_add(&adapter->media,
3012				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3013			ifmedia_add(&adapter->media,
3014				IFM_ETHER | IFM_1000_T, 0, NULL);
3015		}
3016	}
3017	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3018	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3019	return (0);
3020}
3021
3022
3023/*
3024 * Manage DMA'able memory.
3025 */
3026static void
3027em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3028{
3029	if (error)
3030		return;
3031	*(bus_addr_t *) arg = segs[0].ds_addr;
3032}
3033
3034static int
3035em_dma_malloc(struct adapter *adapter, bus_size_t size,
3036        struct em_dma_alloc *dma, int mapflags)
3037{
3038	int error;
3039
3040	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3041				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3042				BUS_SPACE_MAXADDR,	/* lowaddr */
3043				BUS_SPACE_MAXADDR,	/* highaddr */
3044				NULL, NULL,		/* filter, filterarg */
3045				size,			/* maxsize */
3046				1,			/* nsegments */
3047				size,			/* maxsegsize */
3048				0,			/* flags */
3049				NULL,			/* lockfunc */
3050				NULL,			/* lockarg */
3051				&dma->dma_tag);
3052	if (error) {
3053		device_printf(adapter->dev,
3054		    "%s: bus_dma_tag_create failed: %d\n",
3055		    __func__, error);
3056		goto fail_0;
3057	}
3058
3059	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3060	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3061	if (error) {
3062		device_printf(adapter->dev,
3063		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3064		    __func__, (uintmax_t)size, error);
3065		goto fail_2;
3066	}
3067
3068	dma->dma_paddr = 0;
3069	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3070	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3071	if (error || dma->dma_paddr == 0) {
3072		device_printf(adapter->dev,
3073		    "%s: bus_dmamap_load failed: %d\n",
3074		    __func__, error);
3075		goto fail_3;
3076	}
3077
3078	return (0);
3079
3080fail_3:
3081	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3082fail_2:
3083	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3084	bus_dma_tag_destroy(dma->dma_tag);
3085fail_0:
3086	dma->dma_map = NULL;
3087	dma->dma_tag = NULL;
3088
3089	return (error);
3090}
3091
3092static void
3093em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3094{
3095	if (dma->dma_tag == NULL)
3096		return;
3097	if (dma->dma_map != NULL) {
3098		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3099		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3100		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3101		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3102		dma->dma_map = NULL;
3103	}
3104	bus_dma_tag_destroy(dma->dma_tag);
3105	dma->dma_tag = NULL;
3106}
3107
3108
3109/*********************************************************************
3110 *
3111 *  Allocate memory for the transmit and receive rings, and then
3112 *  the descriptors associated with each, called only once at attach.
3113 *
3114 **********************************************************************/
3115static int
3116em_allocate_queues(struct adapter *adapter)
3117{
3118	device_t		dev = adapter->dev;
3119	struct tx_ring		*txr = NULL;
3120	struct rx_ring		*rxr = NULL;
3121	int rsize, tsize, error = E1000_SUCCESS;
3122	int txconf = 0, rxconf = 0;
3123
3124
3125	/* Allocate the TX ring struct memory */
3126	if (!(adapter->tx_rings =
3127	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3128	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3129		device_printf(dev, "Unable to allocate TX ring memory\n");
3130		error = ENOMEM;
3131		goto fail;
3132	}
3133
3134	/* Now allocate the RX */
3135	if (!(adapter->rx_rings =
3136	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3137	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3138		device_printf(dev, "Unable to allocate RX ring memory\n");
3139		error = ENOMEM;
3140		goto rx_fail;
3141	}
3142
3143	tsize = roundup2(adapter->num_tx_desc *
3144	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3145	/*
3146	 * Now set up the TX queues, txconf is needed to handle the
3147	 * possibility that things fail midcourse and we need to
3148	 * undo memory gracefully
3149	 */
3150	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3151		/* Set up some basics */
3152		txr = &adapter->tx_rings[i];
3153		txr->adapter = adapter;
3154		txr->me = i;
3155
3156		/* Initialize the TX lock */
3157		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3158		    device_get_nameunit(dev), txr->me);
3159		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3160
3161		if (em_dma_malloc(adapter, tsize,
3162			&txr->txdma, BUS_DMA_NOWAIT)) {
3163			device_printf(dev,
3164			    "Unable to allocate TX Descriptor memory\n");
3165			error = ENOMEM;
3166			goto err_tx_desc;
3167		}
3168		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3169		bzero((void *)txr->tx_base, tsize);
3170
3171        	if (em_allocate_transmit_buffers(txr)) {
3172			device_printf(dev,
3173			    "Critical Failure setting up transmit buffers\n");
3174			error = ENOMEM;
3175			goto err_tx_desc;
3176        	}
3177#if __FreeBSD_version >= 800000
3178		/* Allocate a buf ring */
3179		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3180		    M_WAITOK, &txr->tx_mtx);
3181#endif
3182	}
3183
3184	/*
3185	 * Next the RX queues...
3186	 */
3187	rsize = roundup2(adapter->num_rx_desc *
3188	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3189	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3190		rxr = &adapter->rx_rings[i];
3191		rxr->adapter = adapter;
3192		rxr->me = i;
3193
3194		/* Initialize the RX lock */
3195		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3196		    device_get_nameunit(dev), txr->me);
3197		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3198
3199		if (em_dma_malloc(adapter, rsize,
3200			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3201			device_printf(dev,
3202			    "Unable to allocate RxDescriptor memory\n");
3203			error = ENOMEM;
3204			goto err_rx_desc;
3205		}
3206		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3207		bzero((void *)rxr->rx_base, rsize);
3208
3209        	/* Allocate receive buffers for the ring*/
3210		if (em_allocate_receive_buffers(rxr)) {
3211			device_printf(dev,
3212			    "Critical Failure setting up receive buffers\n");
3213			error = ENOMEM;
3214			goto err_rx_desc;
3215		}
3216	}
3217
3218	return (0);
3219
3220err_rx_desc:
3221	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3222		em_dma_free(adapter, &rxr->rxdma);
3223err_tx_desc:
3224	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3225		em_dma_free(adapter, &txr->txdma);
3226	free(adapter->rx_rings, M_DEVBUF);
3227rx_fail:
3228#if __FreeBSD_version >= 800000
3229	buf_ring_free(txr->br, M_DEVBUF);
3230#endif
3231	free(adapter->tx_rings, M_DEVBUF);
3232fail:
3233	return (error);
3234}
3235
3236
3237/*********************************************************************
3238 *
3239 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3240 *  the information needed to transmit a packet on the wire. This is
3241 *  called only once at attach, setup is done every reset.
3242 *
3243 **********************************************************************/
3244static int
3245em_allocate_transmit_buffers(struct tx_ring *txr)
3246{
3247	struct adapter *adapter = txr->adapter;
3248	device_t dev = adapter->dev;
3249	struct em_buffer *txbuf;
3250	int error, i;
3251
3252	/*
3253	 * Setup DMA descriptor areas.
3254	 */
3255	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3256			       1, 0,			/* alignment, bounds */
3257			       BUS_SPACE_MAXADDR,	/* lowaddr */
3258			       BUS_SPACE_MAXADDR,	/* highaddr */
3259			       NULL, NULL,		/* filter, filterarg */
3260			       EM_TSO_SIZE,		/* maxsize */
3261			       EM_MAX_SCATTER,		/* nsegments */
3262			       PAGE_SIZE,		/* maxsegsize */
3263			       0,			/* flags */
3264			       NULL,			/* lockfunc */
3265			       NULL,			/* lockfuncarg */
3266			       &txr->txtag))) {
3267		device_printf(dev,"Unable to allocate TX DMA tag\n");
3268		goto fail;
3269	}
3270
3271	if (!(txr->tx_buffers =
3272	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3273	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3275		error = ENOMEM;
3276		goto fail;
3277	}
3278
3279        /* Create the descriptor buffer dma maps */
3280	txbuf = txr->tx_buffers;
3281	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3282		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3283		if (error != 0) {
3284			device_printf(dev, "Unable to create TX DMA map\n");
3285			goto fail;
3286		}
3287	}
3288
3289	return 0;
3290fail:
3291	/* We free all, it handles case where we are in the middle */
3292	em_free_transmit_structures(adapter);
3293	return (error);
3294}
3295
3296/*********************************************************************
3297 *
3298 *  Initialize a transmit ring.
3299 *
3300 **********************************************************************/
3301static void
3302em_setup_transmit_ring(struct tx_ring *txr)
3303{
3304	struct adapter *adapter = txr->adapter;
3305	struct em_buffer *txbuf;
3306	int i;
3307#ifdef DEV_NETMAP
3308	struct netmap_adapter *na = NA(adapter->ifp);
3309	struct netmap_slot *slot;
3310#endif /* DEV_NETMAP */
3311
3312	/* Clear the old descriptor contents */
3313	EM_TX_LOCK(txr);
3314#ifdef DEV_NETMAP
3315	slot = netmap_reset(na, NR_TX, txr->me, 0);
3316#endif /* DEV_NETMAP */
3317
3318	bzero((void *)txr->tx_base,
3319	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3320	/* Reset indices */
3321	txr->next_avail_desc = 0;
3322	txr->next_to_clean = 0;
3323
3324	/* Free any existing tx buffers. */
3325        txbuf = txr->tx_buffers;
3326	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327		if (txbuf->m_head != NULL) {
3328			bus_dmamap_sync(txr->txtag, txbuf->map,
3329			    BUS_DMASYNC_POSTWRITE);
3330			bus_dmamap_unload(txr->txtag, txbuf->map);
3331			m_freem(txbuf->m_head);
3332			txbuf->m_head = NULL;
3333		}
3334#ifdef DEV_NETMAP
3335		if (slot) {
3336			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3337			uint64_t paddr;
3338			void *addr;
3339
3340			addr = PNMB(slot + si, &paddr);
3341			txr->tx_base[i].buffer_addr = htole64(paddr);
3342			/* reload the map for netmap mode */
3343			netmap_load_map(txr->txtag, txbuf->map, addr);
3344		}
3345#endif /* DEV_NETMAP */
3346
3347		/* clear the watch index */
3348		txbuf->next_eop = -1;
3349        }
3350
3351	/* Set number of descriptors available */
3352	txr->tx_avail = adapter->num_tx_desc;
3353	txr->queue_status = EM_QUEUE_IDLE;
3354
3355	/* Clear checksum offload context. */
3356	txr->last_hw_offload = 0;
3357	txr->last_hw_ipcss = 0;
3358	txr->last_hw_ipcso = 0;
3359	txr->last_hw_tucss = 0;
3360	txr->last_hw_tucso = 0;
3361
3362	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3363	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3364	EM_TX_UNLOCK(txr);
3365}
3366
3367/*********************************************************************
3368 *
3369 *  Initialize all transmit rings.
3370 *
3371 **********************************************************************/
3372static void
3373em_setup_transmit_structures(struct adapter *adapter)
3374{
3375	struct tx_ring *txr = adapter->tx_rings;
3376
3377	for (int i = 0; i < adapter->num_queues; i++, txr++)
3378		em_setup_transmit_ring(txr);
3379
3380	return;
3381}
3382
3383/*********************************************************************
3384 *
3385 *  Enable transmit unit.
3386 *
3387 **********************************************************************/
3388static void
3389em_initialize_transmit_unit(struct adapter *adapter)
3390{
3391	struct tx_ring	*txr = adapter->tx_rings;
3392	struct e1000_hw	*hw = &adapter->hw;
3393	u32	tctl, tarc, tipg = 0;
3394
3395	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3396
3397	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3398		u64 bus_addr = txr->txdma.dma_paddr;
3399		/* Base and Len of TX Ring */
3400		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3401	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3402		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3403	    	    (u32)(bus_addr >> 32));
3404		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3405	    	    (u32)bus_addr);
3406		/* Init the HEAD/TAIL indices */
3407		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3408		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3409
3410		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3411		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3412		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3413
3414		txr->queue_status = EM_QUEUE_IDLE;
3415	}
3416
3417	/* Set the default values for the Tx Inter Packet Gap timer */
3418	switch (adapter->hw.mac.type) {
3419	case e1000_80003es2lan:
3420		tipg = DEFAULT_82543_TIPG_IPGR1;
3421		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3422		    E1000_TIPG_IPGR2_SHIFT;
3423		break;
3424	default:
3425		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3426		    (adapter->hw.phy.media_type ==
3427		    e1000_media_type_internal_serdes))
3428			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3429		else
3430			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3431		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3432		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3433	}
3434
3435	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3436	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3437
3438	if(adapter->hw.mac.type >= e1000_82540)
3439		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3440		    adapter->tx_abs_int_delay.value);
3441
3442	if ((adapter->hw.mac.type == e1000_82571) ||
3443	    (adapter->hw.mac.type == e1000_82572)) {
3444		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445		tarc |= SPEED_MODE_BIT;
3446		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3448		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3449		tarc |= 1;
3450		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3451		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3452		tarc |= 1;
3453		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3454	}
3455
3456	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3457	if (adapter->tx_int_delay.value > 0)
3458		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3459
3460	/* Program the Transmit Control Register */
3461	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3462	tctl &= ~E1000_TCTL_CT;
3463	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3464		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3465
3466	if (adapter->hw.mac.type >= e1000_82571)
3467		tctl |= E1000_TCTL_MULR;
3468
3469	/* This write will effectively turn on the transmit unit. */
3470	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3471
3472}
3473
3474
3475/*********************************************************************
3476 *
3477 *  Free all transmit rings.
3478 *
3479 **********************************************************************/
3480static void
3481em_free_transmit_structures(struct adapter *adapter)
3482{
3483	struct tx_ring *txr = adapter->tx_rings;
3484
3485	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3486		EM_TX_LOCK(txr);
3487		em_free_transmit_buffers(txr);
3488		em_dma_free(adapter, &txr->txdma);
3489		EM_TX_UNLOCK(txr);
3490		EM_TX_LOCK_DESTROY(txr);
3491	}
3492
3493	free(adapter->tx_rings, M_DEVBUF);
3494}
3495
3496/*********************************************************************
3497 *
3498 *  Free transmit ring related data structures.
3499 *
3500 **********************************************************************/
3501static void
3502em_free_transmit_buffers(struct tx_ring *txr)
3503{
3504	struct adapter		*adapter = txr->adapter;
3505	struct em_buffer	*txbuf;
3506
3507	INIT_DEBUGOUT("free_transmit_ring: begin");
3508
3509	if (txr->tx_buffers == NULL)
3510		return;
3511
3512	for (int i = 0; i < adapter->num_tx_desc; i++) {
3513		txbuf = &txr->tx_buffers[i];
3514		if (txbuf->m_head != NULL) {
3515			bus_dmamap_sync(txr->txtag, txbuf->map,
3516			    BUS_DMASYNC_POSTWRITE);
3517			bus_dmamap_unload(txr->txtag,
3518			    txbuf->map);
3519			m_freem(txbuf->m_head);
3520			txbuf->m_head = NULL;
3521			if (txbuf->map != NULL) {
3522				bus_dmamap_destroy(txr->txtag,
3523				    txbuf->map);
3524				txbuf->map = NULL;
3525			}
3526		} else if (txbuf->map != NULL) {
3527			bus_dmamap_unload(txr->txtag,
3528			    txbuf->map);
3529			bus_dmamap_destroy(txr->txtag,
3530			    txbuf->map);
3531			txbuf->map = NULL;
3532		}
3533	}
3534#if __FreeBSD_version >= 800000
3535	if (txr->br != NULL)
3536		buf_ring_free(txr->br, M_DEVBUF);
3537#endif
3538	if (txr->tx_buffers != NULL) {
3539		free(txr->tx_buffers, M_DEVBUF);
3540		txr->tx_buffers = NULL;
3541	}
3542	if (txr->txtag != NULL) {
3543		bus_dma_tag_destroy(txr->txtag);
3544		txr->txtag = NULL;
3545	}
3546	return;
3547}
3548
3549
3550/*********************************************************************
3551 *  The offload context is protocol specific (TCP/UDP) and thus
3552 *  only needs to be set when the protocol changes. The occasion
3553 *  of a context change can be a performance detriment, and
3554 *  might be better just disabled. The reason arises in the way
3555 *  in which the controller supports pipelined requests from the
3556 *  Tx data DMA. Up to four requests can be pipelined, and they may
3557 *  belong to the same packet or to multiple packets. However all
3558 *  requests for one packet are issued before a request is issued
3559 *  for a subsequent packet and if a request for the next packet
3560 *  requires a context change, that request will be stalled
3561 *  until the previous request completes. This means setting up
3562 *  a new context effectively disables pipelined Tx data DMA which
3563 *  in turn greatly slow down performance to send small sized
3564 *  frames.
3565 **********************************************************************/
3566static void
3567em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3568    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3569{
3570	struct adapter			*adapter = txr->adapter;
3571	struct e1000_context_desc	*TXD = NULL;
3572	struct em_buffer		*tx_buffer;
3573	int				cur, hdr_len;
3574	u32				cmd = 0;
3575	u16				offload = 0;
3576	u8				ipcso, ipcss, tucso, tucss;
3577
3578	ipcss = ipcso = tucss = tucso = 0;
3579	hdr_len = ip_off + (ip->ip_hl << 2);
3580	cur = txr->next_avail_desc;
3581
3582	/* Setup of IP header checksum. */
3583	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3584		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3585		offload |= CSUM_IP;
3586		ipcss = ip_off;
3587		ipcso = ip_off + offsetof(struct ip, ip_sum);
3588		/*
3589		 * Start offset for header checksum calculation.
3590		 * End offset for header checksum calculation.
3591		 * Offset of place to put the checksum.
3592		 */
3593		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3594		TXD->lower_setup.ip_fields.ipcss = ipcss;
3595		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3596		TXD->lower_setup.ip_fields.ipcso = ipcso;
3597		cmd |= E1000_TXD_CMD_IP;
3598	}
3599
3600	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3601 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3602 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3603 		offload |= CSUM_TCP;
3604 		tucss = hdr_len;
3605 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3606 		/*
3607 		 * Setting up new checksum offload context for every frames
3608 		 * takes a lot of processing time for hardware. This also
3609 		 * reduces performance a lot for small sized frames so avoid
3610 		 * it if driver can use previously configured checksum
3611 		 * offload context.
3612 		 */
3613 		if (txr->last_hw_offload == offload) {
3614 			if (offload & CSUM_IP) {
3615 				if (txr->last_hw_ipcss == ipcss &&
3616 				    txr->last_hw_ipcso == ipcso &&
3617 				    txr->last_hw_tucss == tucss &&
3618 				    txr->last_hw_tucso == tucso)
3619 					return;
3620 			} else {
3621 				if (txr->last_hw_tucss == tucss &&
3622 				    txr->last_hw_tucso == tucso)
3623 					return;
3624 			}
3625  		}
3626 		txr->last_hw_offload = offload;
3627 		txr->last_hw_tucss = tucss;
3628 		txr->last_hw_tucso = tucso;
3629 		/*
3630 		 * Start offset for payload checksum calculation.
3631 		 * End offset for payload checksum calculation.
3632 		 * Offset of place to put the checksum.
3633 		 */
3634		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3635 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3636 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3637 		TXD->upper_setup.tcp_fields.tucso = tucso;
3638 		cmd |= E1000_TXD_CMD_TCP;
3639 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3640 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3641 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3642 		tucss = hdr_len;
3643 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3644 		/*
3645 		 * Setting up new checksum offload context for every frames
3646 		 * takes a lot of processing time for hardware. This also
3647 		 * reduces performance a lot for small sized frames so avoid
3648 		 * it if driver can use previously configured checksum
3649 		 * offload context.
3650 		 */
3651 		if (txr->last_hw_offload == offload) {
3652 			if (offload & CSUM_IP) {
3653 				if (txr->last_hw_ipcss == ipcss &&
3654 				    txr->last_hw_ipcso == ipcso &&
3655 				    txr->last_hw_tucss == tucss &&
3656 				    txr->last_hw_tucso == tucso)
3657 					return;
3658 			} else {
3659 				if (txr->last_hw_tucss == tucss &&
3660 				    txr->last_hw_tucso == tucso)
3661 					return;
3662 			}
3663 		}
3664 		txr->last_hw_offload = offload;
3665 		txr->last_hw_tucss = tucss;
3666 		txr->last_hw_tucso = tucso;
3667 		/*
3668 		 * Start offset for header checksum calculation.
3669 		 * End offset for header checksum calculation.
3670 		 * Offset of place to put the checksum.
3671 		 */
3672		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3673 		TXD->upper_setup.tcp_fields.tucss = tucss;
3674 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3675 		TXD->upper_setup.tcp_fields.tucso = tucso;
3676  	}
3677
3678 	if (offload & CSUM_IP) {
3679 		txr->last_hw_ipcss = ipcss;
3680 		txr->last_hw_ipcso = ipcso;
3681  	}
3682
3683	TXD->tcp_seg_setup.data = htole32(0);
3684	TXD->cmd_and_length =
3685	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3686	tx_buffer = &txr->tx_buffers[cur];
3687	tx_buffer->m_head = NULL;
3688	tx_buffer->next_eop = -1;
3689
3690	if (++cur == adapter->num_tx_desc)
3691		cur = 0;
3692
3693	txr->tx_avail--;
3694	txr->next_avail_desc = cur;
3695}
3696
3697
3698/**********************************************************************
3699 *
3700 *  Setup work for hardware segmentation offload (TSO)
3701 *
3702 **********************************************************************/
3703static void
3704em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3705    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3706{
3707	struct adapter			*adapter = txr->adapter;
3708	struct e1000_context_desc	*TXD;
3709	struct em_buffer		*tx_buffer;
3710	int cur, hdr_len;
3711
3712	/*
3713	 * In theory we can use the same TSO context if and only if
3714	 * frame is the same type(IP/TCP) and the same MSS. However
3715	 * checking whether a frame has the same IP/TCP structure is
3716	 * hard thing so just ignore that and always restablish a
3717	 * new TSO context.
3718	 */
3719	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3720	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3721		      E1000_TXD_DTYP_D |	/* Data descr type */
3722		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3723
3724	/* IP and/or TCP header checksum calculation and insertion. */
3725	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3726
3727	cur = txr->next_avail_desc;
3728	tx_buffer = &txr->tx_buffers[cur];
3729	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3730
3731	/*
3732	 * Start offset for header checksum calculation.
3733	 * End offset for header checksum calculation.
3734	 * Offset of place put the checksum.
3735	 */
3736	TXD->lower_setup.ip_fields.ipcss = ip_off;
3737	TXD->lower_setup.ip_fields.ipcse =
3738	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3739	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3740	/*
3741	 * Start offset for payload checksum calculation.
3742	 * End offset for payload checksum calculation.
3743	 * Offset of place to put the checksum.
3744	 */
3745	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3746	TXD->upper_setup.tcp_fields.tucse = 0;
3747	TXD->upper_setup.tcp_fields.tucso =
3748	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3749	/*
3750	 * Payload size per packet w/o any headers.
3751	 * Length of all headers up to payload.
3752	 */
3753	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3754	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3755
3756	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3757				E1000_TXD_CMD_DEXT |	/* Extended descr */
3758				E1000_TXD_CMD_TSE |	/* TSE context */
3759				E1000_TXD_CMD_IP |	/* Do IP csum */
3760				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3761				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3762
3763	tx_buffer->m_head = NULL;
3764	tx_buffer->next_eop = -1;
3765
3766	if (++cur == adapter->num_tx_desc)
3767		cur = 0;
3768
3769	txr->tx_avail--;
3770	txr->next_avail_desc = cur;
3771	txr->tx_tso = TRUE;
3772}
3773
3774
3775/**********************************************************************
3776 *
3777 *  Examine each tx_buffer in the used queue. If the hardware is done
3778 *  processing the packet then free associated resources. The
3779 *  tx_buffer is put back on the free queue.
3780 *
3781 **********************************************************************/
3782static void
3783em_txeof(struct tx_ring *txr)
3784{
3785	struct adapter	*adapter = txr->adapter;
3786        int first, last, done, processed;
3787        struct em_buffer *tx_buffer;
3788        struct e1000_tx_desc   *tx_desc, *eop_desc;
3789	struct ifnet   *ifp = adapter->ifp;
3790
3791	EM_TX_LOCK_ASSERT(txr);
3792#ifdef DEV_NETMAP
3793	if (ifp->if_capenable & IFCAP_NETMAP) {
3794		struct netmap_adapter *na = NA(ifp);
3795
3796		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3797		EM_TX_UNLOCK(txr);
3798		EM_CORE_LOCK(adapter);
3799		selwakeuppri(&na->tx_si, PI_NET);
3800		EM_CORE_UNLOCK(adapter);
3801		EM_TX_LOCK(txr);
3802		return;
3803	}
3804#endif /* DEV_NETMAP */
3805
3806	/* No work, make sure watchdog is off */
3807        if (txr->tx_avail == adapter->num_tx_desc) {
3808		txr->queue_status = EM_QUEUE_IDLE;
3809                return;
3810	}
3811
3812	processed = 0;
3813        first = txr->next_to_clean;
3814        tx_desc = &txr->tx_base[first];
3815        tx_buffer = &txr->tx_buffers[first];
3816	last = tx_buffer->next_eop;
3817        eop_desc = &txr->tx_base[last];
3818
3819	/*
3820	 * What this does is get the index of the
3821	 * first descriptor AFTER the EOP of the
3822	 * first packet, that way we can do the
3823	 * simple comparison on the inner while loop.
3824	 */
3825	if (++last == adapter->num_tx_desc)
3826 		last = 0;
3827	done = last;
3828
3829        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3830            BUS_DMASYNC_POSTREAD);
3831
3832        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3833		/* We clean the range of the packet */
3834		while (first != done) {
3835                	tx_desc->upper.data = 0;
3836                	tx_desc->lower.data = 0;
3837                	tx_desc->buffer_addr = 0;
3838                	++txr->tx_avail;
3839			++processed;
3840
3841			if (tx_buffer->m_head) {
3842				bus_dmamap_sync(txr->txtag,
3843				    tx_buffer->map,
3844				    BUS_DMASYNC_POSTWRITE);
3845				bus_dmamap_unload(txr->txtag,
3846				    tx_buffer->map);
3847                        	m_freem(tx_buffer->m_head);
3848                        	tx_buffer->m_head = NULL;
3849                	}
3850			tx_buffer->next_eop = -1;
3851			txr->watchdog_time = ticks;
3852
3853	                if (++first == adapter->num_tx_desc)
3854				first = 0;
3855
3856	                tx_buffer = &txr->tx_buffers[first];
3857			tx_desc = &txr->tx_base[first];
3858		}
3859		++ifp->if_opackets;
3860		/* See if we can continue to the next packet */
3861		last = tx_buffer->next_eop;
3862		if (last != -1) {
3863        		eop_desc = &txr->tx_base[last];
3864			/* Get new done point */
3865			if (++last == adapter->num_tx_desc) last = 0;
3866			done = last;
3867		} else
3868			break;
3869        }
3870        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3871            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3872
3873        txr->next_to_clean = first;
3874
3875	/*
3876	** Watchdog calculation, we know there's
3877	** work outstanding or the first return
3878	** would have been taken, so none processed
3879	** for too long indicates a hang. local timer
3880	** will examine this and do a reset if needed.
3881	*/
3882	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3883		txr->queue_status = EM_QUEUE_HUNG;
3884
3885        /*
3886         * If we have a minimum free, clear IFF_DRV_OACTIVE
3887         * to tell the stack that it is OK to send packets.
3888	 * Notice that all writes of OACTIVE happen under the
3889	 * TX lock which, with a single queue, guarantees
3890	 * sanity.
3891         */
3892        if (txr->tx_avail >= EM_MAX_SCATTER)
3893		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3894
3895	/* Disable watchdog if all clean */
3896	if (txr->tx_avail == adapter->num_tx_desc) {
3897		txr->queue_status = EM_QUEUE_IDLE;
3898	}
3899}
3900
3901
3902/*********************************************************************
3903 *
3904 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3905 *
3906 **********************************************************************/
3907static void
3908em_refresh_mbufs(struct rx_ring *rxr, int limit)
3909{
3910	struct adapter		*adapter = rxr->adapter;
3911	struct mbuf		*m;
3912	bus_dma_segment_t	segs[1];
3913	struct em_buffer	*rxbuf;
3914	int			i, j, error, nsegs;
3915	bool			cleaned = FALSE;
3916
3917	i = j = rxr->next_to_refresh;
3918	/*
3919	** Get one descriptor beyond
3920	** our work mark to control
3921	** the loop.
3922	*/
3923	if (++j == adapter->num_rx_desc)
3924		j = 0;
3925
3926	while (j != limit) {
3927		rxbuf = &rxr->rx_buffers[i];
3928		if (rxbuf->m_head == NULL) {
3929			m = m_getjcl(M_DONTWAIT, MT_DATA,
3930			    M_PKTHDR, adapter->rx_mbuf_sz);
3931			/*
3932			** If we have a temporary resource shortage
3933			** that causes a failure, just abort refresh
3934			** for now, we will return to this point when
3935			** reinvoked from em_rxeof.
3936			*/
3937			if (m == NULL)
3938				goto update;
3939		} else
3940			m = rxbuf->m_head;
3941
3942		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3943		m->m_flags |= M_PKTHDR;
3944		m->m_data = m->m_ext.ext_buf;
3945
3946		/* Use bus_dma machinery to setup the memory mapping  */
3947		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3948		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3949		if (error != 0) {
3950			printf("Refresh mbufs: hdr dmamap load"
3951			    " failure - %d\n", error);
3952			m_free(m);
3953			rxbuf->m_head = NULL;
3954			goto update;
3955		}
3956		rxbuf->m_head = m;
3957		bus_dmamap_sync(rxr->rxtag,
3958		    rxbuf->map, BUS_DMASYNC_PREREAD);
3959		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3960		cleaned = TRUE;
3961
3962		i = j; /* Next is precalulated for us */
3963		rxr->next_to_refresh = i;
3964		/* Calculate next controlling index */
3965		if (++j == adapter->num_rx_desc)
3966			j = 0;
3967	}
3968update:
3969	/*
3970	** Update the tail pointer only if,
3971	** and as far as we have refreshed.
3972	*/
3973	if (cleaned)
3974		E1000_WRITE_REG(&adapter->hw,
3975		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3976
3977	return;
3978}
3979
3980
3981/*********************************************************************
3982 *
3983 *  Allocate memory for rx_buffer structures. Since we use one
3984 *  rx_buffer per received packet, the maximum number of rx_buffer's
3985 *  that we'll need is equal to the number of receive descriptors
3986 *  that we've allocated.
3987 *
3988 **********************************************************************/
3989static int
3990em_allocate_receive_buffers(struct rx_ring *rxr)
3991{
3992	struct adapter		*adapter = rxr->adapter;
3993	device_t		dev = adapter->dev;
3994	struct em_buffer	*rxbuf;
3995	int			error;
3996
3997	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3998	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3999	if (rxr->rx_buffers == NULL) {
4000		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4001		return (ENOMEM);
4002	}
4003
4004	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4005				1, 0,			/* alignment, bounds */
4006				BUS_SPACE_MAXADDR,	/* lowaddr */
4007				BUS_SPACE_MAXADDR,	/* highaddr */
4008				NULL, NULL,		/* filter, filterarg */
4009				MJUM9BYTES,		/* maxsize */
4010				1,			/* nsegments */
4011				MJUM9BYTES,		/* maxsegsize */
4012				0,			/* flags */
4013				NULL,			/* lockfunc */
4014				NULL,			/* lockarg */
4015				&rxr->rxtag);
4016	if (error) {
4017		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4018		    __func__, error);
4019		goto fail;
4020	}
4021
4022	rxbuf = rxr->rx_buffers;
4023	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4024		rxbuf = &rxr->rx_buffers[i];
4025		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4026		    &rxbuf->map);
4027		if (error) {
4028			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4029			    __func__, error);
4030			goto fail;
4031		}
4032	}
4033
4034	return (0);
4035
4036fail:
4037	em_free_receive_structures(adapter);
4038	return (error);
4039}
4040
4041
4042/*********************************************************************
4043 *
4044 *  Initialize a receive ring and its buffers.
4045 *
4046 **********************************************************************/
4047static int
4048em_setup_receive_ring(struct rx_ring *rxr)
4049{
4050	struct	adapter 	*adapter = rxr->adapter;
4051	struct em_buffer	*rxbuf;
4052	bus_dma_segment_t	seg[1];
4053	int			rsize, nsegs, error = 0;
4054#ifdef DEV_NETMAP
4055	struct netmap_adapter *na = NA(adapter->ifp);
4056	struct netmap_slot *slot;
4057#endif
4058
4059
4060	/* Clear the ring contents */
4061	EM_RX_LOCK(rxr);
4062	rsize = roundup2(adapter->num_rx_desc *
4063	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4064	bzero((void *)rxr->rx_base, rsize);
4065#ifdef DEV_NETMAP
4066	slot = netmap_reset(na, NR_RX, 0, 0);
4067#endif
4068
4069	/*
4070	** Free current RX buffer structs and their mbufs
4071	*/
4072	for (int i = 0; i < adapter->num_rx_desc; i++) {
4073		rxbuf = &rxr->rx_buffers[i];
4074		if (rxbuf->m_head != NULL) {
4075			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4076			    BUS_DMASYNC_POSTREAD);
4077			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4078			m_freem(rxbuf->m_head);
4079			rxbuf->m_head = NULL; /* mark as freed */
4080		}
4081	}
4082
4083	/* Now replenish the mbufs */
4084        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4085		rxbuf = &rxr->rx_buffers[j];
4086#ifdef DEV_NETMAP
4087		if (slot) {
4088			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4089			uint64_t paddr;
4090			void *addr;
4091
4092			addr = PNMB(slot + si, &paddr);
4093			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4094			/* Update descriptor */
4095			rxr->rx_base[j].buffer_addr = htole64(paddr);
4096			continue;
4097		}
4098#endif /* DEV_NETMAP */
4099		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4100		    M_PKTHDR, adapter->rx_mbuf_sz);
4101		if (rxbuf->m_head == NULL) {
4102			error = ENOBUFS;
4103			goto fail;
4104		}
4105		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4106		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4107		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4108
4109		/* Get the memory mapping */
4110		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4111		    rxbuf->map, rxbuf->m_head, seg,
4112		    &nsegs, BUS_DMA_NOWAIT);
4113		if (error != 0) {
4114			m_freem(rxbuf->m_head);
4115			rxbuf->m_head = NULL;
4116			goto fail;
4117		}
4118		bus_dmamap_sync(rxr->rxtag,
4119		    rxbuf->map, BUS_DMASYNC_PREREAD);
4120
4121		/* Update descriptor */
4122		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4123	}
4124	rxr->next_to_check = 0;
4125	rxr->next_to_refresh = 0;
4126	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4127	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4128
4129fail:
4130	EM_RX_UNLOCK(rxr);
4131	return (error);
4132}
4133
4134/*********************************************************************
4135 *
4136 *  Initialize all receive rings.
4137 *
4138 **********************************************************************/
4139static int
4140em_setup_receive_structures(struct adapter *adapter)
4141{
4142	struct rx_ring *rxr = adapter->rx_rings;
4143	int q;
4144
4145	for (q = 0; q < adapter->num_queues; q++, rxr++)
4146		if (em_setup_receive_ring(rxr))
4147			goto fail;
4148
4149	return (0);
4150fail:
4151	/*
4152	 * Free RX buffers allocated so far, we will only handle
4153	 * the rings that completed, the failing case will have
4154	 * cleaned up for itself. 'q' failed, so its the terminus.
4155	 */
4156	for (int i = 0; i < q; ++i) {
4157		rxr = &adapter->rx_rings[i];
4158		for (int n = 0; n < adapter->num_rx_desc; n++) {
4159			struct em_buffer *rxbuf;
4160			rxbuf = &rxr->rx_buffers[n];
4161			if (rxbuf->m_head != NULL) {
4162				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4163			  	  BUS_DMASYNC_POSTREAD);
4164				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4165				m_freem(rxbuf->m_head);
4166				rxbuf->m_head = NULL;
4167			}
4168		}
4169		rxr->next_to_check = 0;
4170		rxr->next_to_refresh = 0;
4171	}
4172
4173	return (ENOBUFS);
4174}
4175
4176/*********************************************************************
4177 *
4178 *  Free all receive rings.
4179 *
4180 **********************************************************************/
4181static void
4182em_free_receive_structures(struct adapter *adapter)
4183{
4184	struct rx_ring *rxr = adapter->rx_rings;
4185
4186	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4187		em_free_receive_buffers(rxr);
4188		/* Free the ring memory as well */
4189		em_dma_free(adapter, &rxr->rxdma);
4190		EM_RX_LOCK_DESTROY(rxr);
4191	}
4192
4193	free(adapter->rx_rings, M_DEVBUF);
4194}
4195
4196
4197/*********************************************************************
4198 *
4199 *  Free receive ring data structures
4200 *
4201 **********************************************************************/
4202static void
4203em_free_receive_buffers(struct rx_ring *rxr)
4204{
4205	struct adapter		*adapter = rxr->adapter;
4206	struct em_buffer	*rxbuf = NULL;
4207
4208	INIT_DEBUGOUT("free_receive_buffers: begin");
4209
4210	if (rxr->rx_buffers != NULL) {
4211		for (int i = 0; i < adapter->num_rx_desc; i++) {
4212			rxbuf = &rxr->rx_buffers[i];
4213			if (rxbuf->map != NULL) {
4214				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4215				    BUS_DMASYNC_POSTREAD);
4216				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4217				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4218			}
4219			if (rxbuf->m_head != NULL) {
4220				m_freem(rxbuf->m_head);
4221				rxbuf->m_head = NULL;
4222			}
4223		}
4224		free(rxr->rx_buffers, M_DEVBUF);
4225		rxr->rx_buffers = NULL;
4226		rxr->next_to_check = 0;
4227		rxr->next_to_refresh = 0;
4228	}
4229
4230	if (rxr->rxtag != NULL) {
4231		bus_dma_tag_destroy(rxr->rxtag);
4232		rxr->rxtag = NULL;
4233	}
4234
4235	return;
4236}
4237
4238
4239/*********************************************************************
4240 *
4241 *  Enable receive unit.
4242 *
4243 **********************************************************************/
4244#define MAX_INTS_PER_SEC	8000
4245#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4246
4247static void
4248em_initialize_receive_unit(struct adapter *adapter)
4249{
4250	struct rx_ring	*rxr = adapter->rx_rings;
4251	struct ifnet	*ifp = adapter->ifp;
4252	struct e1000_hw	*hw = &adapter->hw;
4253	u64	bus_addr;
4254	u32	rctl, rxcsum;
4255
4256	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4257
4258	/*
4259	 * Make sure receives are disabled while setting
4260	 * up the descriptor ring
4261	 */
4262	rctl = E1000_READ_REG(hw, E1000_RCTL);
4263	/* Do not disable if ever enabled on this hardware */
4264	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4265		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4266
4267	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4268	    adapter->rx_abs_int_delay.value);
4269	/*
4270	 * Set the interrupt throttling rate. Value is calculated
4271	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4272	 */
4273	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4274
4275	/*
4276	** When using MSIX interrupts we need to throttle
4277	** using the EITR register (82574 only)
4278	*/
4279	if (hw->mac.type == e1000_82574) {
4280		for (int i = 0; i < 4; i++)
4281			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4282			    DEFAULT_ITR);
4283		/* Disable accelerated acknowledge */
4284		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4285	}
4286
4287	if (ifp->if_capenable & IFCAP_RXCSUM) {
4288		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4289		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4290		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4291	}
4292
4293	/*
4294	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4295	** long latencies are observed, like Lenovo X60. This
4296	** change eliminates the problem, but since having positive
4297	** values in RDTR is a known source of problems on other
4298	** platforms another solution is being sought.
4299	*/
4300	if (hw->mac.type == e1000_82573)
4301		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4302
4303	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4304		/* Setup the Base and Length of the Rx Descriptor Ring */
4305		bus_addr = rxr->rxdma.dma_paddr;
4306		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4307		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4308		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4309		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4310		/* Setup the Head and Tail Descriptor Pointers */
4311		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4312#ifdef DEV_NETMAP
4313		/*
4314		 * an init() while a netmap client is active must
4315		 * preserve the rx buffers passed to userspace.
4316		 * In this driver it means we adjust RDT to
4317		 * something different from na->num_rx_desc - 1.
4318		 */
4319		if (ifp->if_capenable & IFCAP_NETMAP) {
4320			struct netmap_adapter *na = NA(adapter->ifp);
4321			struct netmap_kring *kring = &na->rx_rings[i];
4322			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4323
4324			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4325		} else
4326#endif /* DEV_NETMAP */
4327		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4328	}
4329
4330	/* Set PTHRESH for improved jumbo performance */
4331	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4332	    (adapter->hw.mac.type == e1000_pch2lan) ||
4333	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4334	    (ifp->if_mtu > ETHERMTU)) {
4335		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4336		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4337	}
4338
4339	if (adapter->hw.mac.type == e1000_pch2lan) {
4340		if (ifp->if_mtu > ETHERMTU)
4341			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4342		else
4343			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4344	}
4345
4346	/* Setup the Receive Control Register */
4347	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4348	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4349	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4350	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4351
4352        /* Strip the CRC */
4353        rctl |= E1000_RCTL_SECRC;
4354
4355        /* Make sure VLAN Filters are off */
4356        rctl &= ~E1000_RCTL_VFE;
4357	rctl &= ~E1000_RCTL_SBP;
4358
4359	if (adapter->rx_mbuf_sz == MCLBYTES)
4360		rctl |= E1000_RCTL_SZ_2048;
4361	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4362		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4363	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4364		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4365
4366	if (ifp->if_mtu > ETHERMTU)
4367		rctl |= E1000_RCTL_LPE;
4368	else
4369		rctl &= ~E1000_RCTL_LPE;
4370
4371	/* Write out the settings */
4372	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4373
4374	return;
4375}
4376
4377
4378/*********************************************************************
4379 *
4380 *  This routine executes in interrupt context. It replenishes
4381 *  the mbufs in the descriptor and sends data which has been
4382 *  dma'ed into host memory to upper layer.
4383 *
4384 *  We loop at most count times if count is > 0, or until done if
4385 *  count < 0.
4386 *
4387 *  For polling we also now return the number of cleaned packets
4388 *********************************************************************/
4389static bool
4390em_rxeof(struct rx_ring *rxr, int count, int *done)
4391{
4392	struct adapter		*adapter = rxr->adapter;
4393	struct ifnet		*ifp = adapter->ifp;
4394	struct mbuf		*mp, *sendmp;
4395	u8			status = 0;
4396	u16 			len;
4397	int			i, processed, rxdone = 0;
4398	bool			eop;
4399	struct e1000_rx_desc	*cur;
4400
4401	EM_RX_LOCK(rxr);
4402
4403#ifdef DEV_NETMAP
4404	if (ifp->if_capenable & IFCAP_NETMAP) {
4405		struct netmap_adapter *na = NA(ifp);
4406
4407		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4408		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4409		EM_RX_UNLOCK(rxr);
4410		EM_CORE_LOCK(adapter);
4411		selwakeuppri(&na->rx_si, PI_NET);
4412		EM_CORE_UNLOCK(adapter);
4413		return (0);
4414	}
4415#endif /* DEV_NETMAP */
4416
4417	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4418
4419		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4420			break;
4421
4422		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4423		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4424
4425		cur = &rxr->rx_base[i];
4426		status = cur->status;
4427		mp = sendmp = NULL;
4428
4429		if ((status & E1000_RXD_STAT_DD) == 0)
4430			break;
4431
4432		len = le16toh(cur->length);
4433		eop = (status & E1000_RXD_STAT_EOP) != 0;
4434
4435		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4436		    (rxr->discard == TRUE)) {
4437			ifp->if_ierrors++;
4438			++rxr->rx_discarded;
4439			if (!eop) /* Catch subsequent segs */
4440				rxr->discard = TRUE;
4441			else
4442				rxr->discard = FALSE;
4443			em_rx_discard(rxr, i);
4444			goto next_desc;
4445		}
4446
4447		/* Assign correct length to the current fragment */
4448		mp = rxr->rx_buffers[i].m_head;
4449		mp->m_len = len;
4450
4451		/* Trigger for refresh */
4452		rxr->rx_buffers[i].m_head = NULL;
4453
4454		/* First segment? */
4455		if (rxr->fmp == NULL) {
4456			mp->m_pkthdr.len = len;
4457			rxr->fmp = rxr->lmp = mp;
4458		} else {
4459			/* Chain mbuf's together */
4460			mp->m_flags &= ~M_PKTHDR;
4461			rxr->lmp->m_next = mp;
4462			rxr->lmp = mp;
4463			rxr->fmp->m_pkthdr.len += len;
4464		}
4465
4466		if (eop) {
4467			--count;
4468			sendmp = rxr->fmp;
4469			sendmp->m_pkthdr.rcvif = ifp;
4470			ifp->if_ipackets++;
4471			em_receive_checksum(cur, sendmp);
4472#ifndef __NO_STRICT_ALIGNMENT
4473			if (adapter->max_frame_size >
4474			    (MCLBYTES - ETHER_ALIGN) &&
4475			    em_fixup_rx(rxr) != 0)
4476				goto skip;
4477#endif
4478			if (status & E1000_RXD_STAT_VP) {
4479				sendmp->m_pkthdr.ether_vtag =
4480				    le16toh(cur->special);
4481				sendmp->m_flags |= M_VLANTAG;
4482			}
4483#ifndef __NO_STRICT_ALIGNMENT
4484skip:
4485#endif
4486			rxr->fmp = rxr->lmp = NULL;
4487		}
4488next_desc:
4489		/* Zero out the receive descriptors status. */
4490		cur->status = 0;
4491		++rxdone;	/* cumulative for POLL */
4492		++processed;
4493
4494		/* Advance our pointers to the next descriptor. */
4495		if (++i == adapter->num_rx_desc)
4496			i = 0;
4497
4498		/* Send to the stack */
4499		if (sendmp != NULL) {
4500			rxr->next_to_check = i;
4501			EM_RX_UNLOCK(rxr);
4502			(*ifp->if_input)(ifp, sendmp);
4503			EM_RX_LOCK(rxr);
4504			i = rxr->next_to_check;
4505		}
4506
4507		/* Only refresh mbufs every 8 descriptors */
4508		if (processed == 8) {
4509			em_refresh_mbufs(rxr, i);
4510			processed = 0;
4511		}
4512	}
4513
4514	/* Catch any remaining refresh work */
4515	if (e1000_rx_unrefreshed(rxr))
4516		em_refresh_mbufs(rxr, i);
4517
4518	rxr->next_to_check = i;
4519	if (done != NULL)
4520		*done = rxdone;
4521	EM_RX_UNLOCK(rxr);
4522
4523	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4524}
4525
4526static __inline void
4527em_rx_discard(struct rx_ring *rxr, int i)
4528{
4529	struct em_buffer	*rbuf;
4530
4531	rbuf = &rxr->rx_buffers[i];
4532	/* Free any previous pieces */
4533	if (rxr->fmp != NULL) {
4534		rxr->fmp->m_flags |= M_PKTHDR;
4535		m_freem(rxr->fmp);
4536		rxr->fmp = NULL;
4537		rxr->lmp = NULL;
4538	}
4539	/*
4540	** Free buffer and allow em_refresh_mbufs()
4541	** to clean up and recharge buffer.
4542	*/
4543	if (rbuf->m_head) {
4544		m_free(rbuf->m_head);
4545		rbuf->m_head = NULL;
4546	}
4547	return;
4548}
4549
4550#ifndef __NO_STRICT_ALIGNMENT
4551/*
4552 * When jumbo frames are enabled we should realign entire payload on
4553 * architecures with strict alignment. This is serious design mistake of 8254x
4554 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4555 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4556 * payload. On architecures without strict alignment restrictions 8254x still
4557 * performs unaligned memory access which would reduce the performance too.
4558 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4559 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4560 * existing mbuf chain.
4561 *
4562 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4563 * not used at all on architectures with strict alignment.
4564 */
4565static int
4566em_fixup_rx(struct rx_ring *rxr)
4567{
4568	struct adapter *adapter = rxr->adapter;
4569	struct mbuf *m, *n;
4570	int error;
4571
4572	error = 0;
4573	m = rxr->fmp;
4574	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4575		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4576		m->m_data += ETHER_HDR_LEN;
4577	} else {
4578		MGETHDR(n, M_DONTWAIT, MT_DATA);
4579		if (n != NULL) {
4580			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4581			m->m_data += ETHER_HDR_LEN;
4582			m->m_len -= ETHER_HDR_LEN;
4583			n->m_len = ETHER_HDR_LEN;
4584			M_MOVE_PKTHDR(n, m);
4585			n->m_next = m;
4586			rxr->fmp = n;
4587		} else {
4588			adapter->dropped_pkts++;
4589			m_freem(rxr->fmp);
4590			rxr->fmp = NULL;
4591			error = ENOMEM;
4592		}
4593	}
4594
4595	return (error);
4596}
4597#endif
4598
4599/*********************************************************************
4600 *
4601 *  Verify that the hardware indicated that the checksum is valid.
4602 *  Inform the stack about the status of checksum so that stack
4603 *  doesn't spend time verifying the checksum.
4604 *
4605 *********************************************************************/
4606static void
4607em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4608{
4609	/* Ignore Checksum bit is set */
4610	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4611		mp->m_pkthdr.csum_flags = 0;
4612		return;
4613	}
4614
4615	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4616		/* Did it pass? */
4617		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4618			/* IP Checksum Good */
4619			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4620			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4621
4622		} else {
4623			mp->m_pkthdr.csum_flags = 0;
4624		}
4625	}
4626
4627	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4628		/* Did it pass? */
4629		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4630			mp->m_pkthdr.csum_flags |=
4631			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4632			mp->m_pkthdr.csum_data = htons(0xffff);
4633		}
4634	}
4635}
4636
4637/*
4638 * This routine is run via an vlan
4639 * config EVENT
4640 */
4641static void
4642em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4643{
4644	struct adapter	*adapter = ifp->if_softc;
4645	u32		index, bit;
4646
4647	if (ifp->if_softc !=  arg)   /* Not our event */
4648		return;
4649
4650	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4651                return;
4652
4653	EM_CORE_LOCK(adapter);
4654	index = (vtag >> 5) & 0x7F;
4655	bit = vtag & 0x1F;
4656	adapter->shadow_vfta[index] |= (1 << bit);
4657	++adapter->num_vlans;
4658	/* Re-init to load the changes */
4659	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4660		em_init_locked(adapter);
4661	EM_CORE_UNLOCK(adapter);
4662}
4663
4664/*
4665 * This routine is run via an vlan
4666 * unconfig EVENT
4667 */
4668static void
4669em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4670{
4671	struct adapter	*adapter = ifp->if_softc;
4672	u32		index, bit;
4673
4674	if (ifp->if_softc !=  arg)
4675		return;
4676
4677	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4678                return;
4679
4680	EM_CORE_LOCK(adapter);
4681	index = (vtag >> 5) & 0x7F;
4682	bit = vtag & 0x1F;
4683	adapter->shadow_vfta[index] &= ~(1 << bit);
4684	--adapter->num_vlans;
4685	/* Re-init to load the changes */
4686	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4687		em_init_locked(adapter);
4688	EM_CORE_UNLOCK(adapter);
4689}
4690
4691static void
4692em_setup_vlan_hw_support(struct adapter *adapter)
4693{
4694	struct e1000_hw *hw = &adapter->hw;
4695	u32             reg;
4696
4697	/*
4698	** We get here thru init_locked, meaning
4699	** a soft reset, this has already cleared
4700	** the VFTA and other state, so if there
4701	** have been no vlan's registered do nothing.
4702	*/
4703	if (adapter->num_vlans == 0)
4704                return;
4705
4706	/*
4707	** A soft reset zero's out the VFTA, so
4708	** we need to repopulate it now.
4709	*/
4710	for (int i = 0; i < EM_VFTA_SIZE; i++)
4711                if (adapter->shadow_vfta[i] != 0)
4712			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4713                            i, adapter->shadow_vfta[i]);
4714
4715	reg = E1000_READ_REG(hw, E1000_CTRL);
4716	reg |= E1000_CTRL_VME;
4717	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4718
4719	/* Enable the Filter Table */
4720	reg = E1000_READ_REG(hw, E1000_RCTL);
4721	reg &= ~E1000_RCTL_CFIEN;
4722	reg |= E1000_RCTL_VFE;
4723	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4724}
4725
4726static void
4727em_enable_intr(struct adapter *adapter)
4728{
4729	struct e1000_hw *hw = &adapter->hw;
4730	u32 ims_mask = IMS_ENABLE_MASK;
4731
4732	if (hw->mac.type == e1000_82574) {
4733		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4734		ims_mask |= EM_MSIX_MASK;
4735	}
4736	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4737}
4738
4739static void
4740em_disable_intr(struct adapter *adapter)
4741{
4742	struct e1000_hw *hw = &adapter->hw;
4743
4744	if (hw->mac.type == e1000_82574)
4745		E1000_WRITE_REG(hw, EM_EIAC, 0);
4746	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4747}
4748
4749/*
4750 * Bit of a misnomer, what this really means is
4751 * to enable OS management of the system... aka
4752 * to disable special hardware management features
4753 */
4754static void
4755em_init_manageability(struct adapter *adapter)
4756{
4757	/* A shared code workaround */
4758#define E1000_82542_MANC2H E1000_MANC2H
4759	if (adapter->has_manage) {
4760		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4761		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4762
4763		/* disable hardware interception of ARP */
4764		manc &= ~(E1000_MANC_ARP_EN);
4765
4766                /* enable receiving management packets to the host */
4767		manc |= E1000_MANC_EN_MNG2HOST;
4768#define E1000_MNG2HOST_PORT_623 (1 << 5)
4769#define E1000_MNG2HOST_PORT_664 (1 << 6)
4770		manc2h |= E1000_MNG2HOST_PORT_623;
4771		manc2h |= E1000_MNG2HOST_PORT_664;
4772		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4773		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4774	}
4775}
4776
4777/*
4778 * Give control back to hardware management
4779 * controller if there is one.
4780 */
4781static void
4782em_release_manageability(struct adapter *adapter)
4783{
4784	if (adapter->has_manage) {
4785		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4786
4787		/* re-enable hardware interception of ARP */
4788		manc |= E1000_MANC_ARP_EN;
4789		manc &= ~E1000_MANC_EN_MNG2HOST;
4790
4791		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4792	}
4793}
4794
4795/*
4796 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4797 * For ASF and Pass Through versions of f/w this means
4798 * that the driver is loaded. For AMT version type f/w
4799 * this means that the network i/f is open.
4800 */
4801static void
4802em_get_hw_control(struct adapter *adapter)
4803{
4804	u32 ctrl_ext, swsm;
4805
4806	if (adapter->hw.mac.type == e1000_82573) {
4807		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4808		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4809		    swsm | E1000_SWSM_DRV_LOAD);
4810		return;
4811	}
4812	/* else */
4813	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4814	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4815	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4816	return;
4817}
4818
4819/*
4820 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4821 * For ASF and Pass Through versions of f/w this means that
4822 * the driver is no longer loaded. For AMT versions of the
4823 * f/w this means that the network i/f is closed.
4824 */
4825static void
4826em_release_hw_control(struct adapter *adapter)
4827{
4828	u32 ctrl_ext, swsm;
4829
4830	if (!adapter->has_manage)
4831		return;
4832
4833	if (adapter->hw.mac.type == e1000_82573) {
4834		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4835		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4836		    swsm & ~E1000_SWSM_DRV_LOAD);
4837		return;
4838	}
4839	/* else */
4840	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4841	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4842	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4843	return;
4844}
4845
4846static int
4847em_is_valid_ether_addr(u8 *addr)
4848{
4849	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4850
4851	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4852		return (FALSE);
4853	}
4854
4855	return (TRUE);
4856}
4857
4858/*
4859** Parse the interface capabilities with regard
4860** to both system management and wake-on-lan for
4861** later use.
4862*/
4863static void
4864em_get_wakeup(device_t dev)
4865{
4866	struct adapter	*adapter = device_get_softc(dev);
4867	u16		eeprom_data = 0, device_id, apme_mask;
4868
4869	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4870	apme_mask = EM_EEPROM_APME;
4871
4872	switch (adapter->hw.mac.type) {
4873	case e1000_82573:
4874	case e1000_82583:
4875		adapter->has_amt = TRUE;
4876		/* Falls thru */
4877	case e1000_82571:
4878	case e1000_82572:
4879	case e1000_80003es2lan:
4880		if (adapter->hw.bus.func == 1) {
4881			e1000_read_nvm(&adapter->hw,
4882			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4883			break;
4884		} else
4885			e1000_read_nvm(&adapter->hw,
4886			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4887		break;
4888	case e1000_ich8lan:
4889	case e1000_ich9lan:
4890	case e1000_ich10lan:
4891	case e1000_pchlan:
4892	case e1000_pch2lan:
4893		apme_mask = E1000_WUC_APME;
4894		adapter->has_amt = TRUE;
4895		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4896		break;
4897	default:
4898		e1000_read_nvm(&adapter->hw,
4899		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4900		break;
4901	}
4902	if (eeprom_data & apme_mask)
4903		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4904	/*
4905         * We have the eeprom settings, now apply the special cases
4906         * where the eeprom may be wrong or the board won't support
4907         * wake on lan on a particular port
4908	 */
4909	device_id = pci_get_device(dev);
4910        switch (device_id) {
4911	case E1000_DEV_ID_82571EB_FIBER:
4912		/* Wake events only supported on port A for dual fiber
4913		 * regardless of eeprom setting */
4914		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4915		    E1000_STATUS_FUNC_1)
4916			adapter->wol = 0;
4917		break;
4918	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4919	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4920	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4921                /* if quad port adapter, disable WoL on all but port A */
4922		if (global_quad_port_a != 0)
4923			adapter->wol = 0;
4924		/* Reset for multiple quad port adapters */
4925		if (++global_quad_port_a == 4)
4926			global_quad_port_a = 0;
4927                break;
4928	}
4929	return;
4930}
4931
4932
4933/*
4934 * Enable PCI Wake On Lan capability
4935 */
4936static void
4937em_enable_wakeup(device_t dev)
4938{
4939	struct adapter	*adapter = device_get_softc(dev);
4940	struct ifnet	*ifp = adapter->ifp;
4941	u32		pmc, ctrl, ctrl_ext, rctl;
4942	u16     	status;
4943
4944	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4945		return;
4946
4947	/* Advertise the wakeup capability */
4948	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4949	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4950	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4951	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4952
4953	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4954	    (adapter->hw.mac.type == e1000_pchlan) ||
4955	    (adapter->hw.mac.type == e1000_ich9lan) ||
4956	    (adapter->hw.mac.type == e1000_ich10lan))
4957		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4958
4959	/* Keep the laser running on Fiber adapters */
4960	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4961	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4962		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4963		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4964		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4965	}
4966
4967	/*
4968	** Determine type of Wakeup: note that wol
4969	** is set with all bits on by default.
4970	*/
4971	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4972		adapter->wol &= ~E1000_WUFC_MAG;
4973
4974	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4975		adapter->wol &= ~E1000_WUFC_MC;
4976	else {
4977		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4978		rctl |= E1000_RCTL_MPE;
4979		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4980	}
4981
4982	if ((adapter->hw.mac.type == e1000_pchlan) ||
4983	    (adapter->hw.mac.type == e1000_pch2lan)) {
4984		if (em_enable_phy_wakeup(adapter))
4985			return;
4986	} else {
4987		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4988		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4989	}
4990
4991	if (adapter->hw.phy.type == e1000_phy_igp_3)
4992		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4993
4994        /* Request PME */
4995        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4996	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4997	if (ifp->if_capenable & IFCAP_WOL)
4998		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4999        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5000
5001	return;
5002}
5003
5004/*
5005** WOL in the newer chipset interfaces (pchlan)
5006** require thing to be copied into the phy
5007*/
5008static int
5009em_enable_phy_wakeup(struct adapter *adapter)
5010{
5011	struct e1000_hw *hw = &adapter->hw;
5012	u32 mreg, ret = 0;
5013	u16 preg;
5014
5015	/* copy MAC RARs to PHY RARs */
5016	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5017
5018	/* copy MAC MTA to PHY MTA */
5019	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5020		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5021		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5022		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5023		    (u16)((mreg >> 16) & 0xFFFF));
5024	}
5025
5026	/* configure PHY Rx Control register */
5027	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5028	mreg = E1000_READ_REG(hw, E1000_RCTL);
5029	if (mreg & E1000_RCTL_UPE)
5030		preg |= BM_RCTL_UPE;
5031	if (mreg & E1000_RCTL_MPE)
5032		preg |= BM_RCTL_MPE;
5033	preg &= ~(BM_RCTL_MO_MASK);
5034	if (mreg & E1000_RCTL_MO_3)
5035		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5036				<< BM_RCTL_MO_SHIFT);
5037	if (mreg & E1000_RCTL_BAM)
5038		preg |= BM_RCTL_BAM;
5039	if (mreg & E1000_RCTL_PMCF)
5040		preg |= BM_RCTL_PMCF;
5041	mreg = E1000_READ_REG(hw, E1000_CTRL);
5042	if (mreg & E1000_CTRL_RFCE)
5043		preg |= BM_RCTL_RFCE;
5044	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5045
5046	/* enable PHY wakeup in MAC register */
5047	E1000_WRITE_REG(hw, E1000_WUC,
5048	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5049	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5050
5051	/* configure and enable PHY wakeup in PHY registers */
5052	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5053	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5054
5055	/* activate PHY wakeup */
5056	ret = hw->phy.ops.acquire(hw);
5057	if (ret) {
5058		printf("Could not acquire PHY\n");
5059		return ret;
5060	}
5061	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5062	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5063	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5064	if (ret) {
5065		printf("Could not read PHY page 769\n");
5066		goto out;
5067	}
5068	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5069	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5070	if (ret)
5071		printf("Could not set PHY Host Wakeup bit\n");
5072out:
5073	hw->phy.ops.release(hw);
5074
5075	return ret;
5076}
5077
5078static void
5079em_led_func(void *arg, int onoff)
5080{
5081	struct adapter	*adapter = arg;
5082
5083	EM_CORE_LOCK(adapter);
5084	if (onoff) {
5085		e1000_setup_led(&adapter->hw);
5086		e1000_led_on(&adapter->hw);
5087	} else {
5088		e1000_led_off(&adapter->hw);
5089		e1000_cleanup_led(&adapter->hw);
5090	}
5091	EM_CORE_UNLOCK(adapter);
5092}
5093
5094/*
5095** Disable the L0S and L1 LINK states
5096*/
5097static void
5098em_disable_aspm(struct adapter *adapter)
5099{
5100	int		base, reg;
5101	u16		link_cap,link_ctrl;
5102	device_t	dev = adapter->dev;
5103
5104	switch (adapter->hw.mac.type) {
5105		case e1000_82573:
5106		case e1000_82574:
5107		case e1000_82583:
5108			break;
5109		default:
5110			return;
5111	}
5112	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5113		return;
5114	reg = base + PCIR_EXPRESS_LINK_CAP;
5115	link_cap = pci_read_config(dev, reg, 2);
5116	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5117		return;
5118	reg = base + PCIR_EXPRESS_LINK_CTL;
5119	link_ctrl = pci_read_config(dev, reg, 2);
5120	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5121	pci_write_config(dev, reg, link_ctrl, 2);
5122	return;
5123}
5124
5125/**********************************************************************
5126 *
5127 *  Update the board statistics counters.
5128 *
5129 **********************************************************************/
5130static void
5131em_update_stats_counters(struct adapter *adapter)
5132{
5133	struct ifnet   *ifp;
5134
5135	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5136	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5137		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5138		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5139	}
5140	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5141	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5142	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5143	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5144
5145	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5146	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5147	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5148	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5149	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5150	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5151	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5152	/*
5153	** For watchdog management we need to know if we have been
5154	** paused during the last interval, so capture that here.
5155	*/
5156	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5157	adapter->stats.xoffrxc += adapter->pause_frames;
5158	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5159	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5160	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5161	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5162	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5163	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5164	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5165	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5166	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5167	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5168	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5169	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5170
5171	/* For the 64-bit byte counters the low dword must be read first. */
5172	/* Both registers clear on the read of the high dword */
5173
5174	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5175	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5176	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5177	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5178
5179	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5180	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5181	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5182	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5183	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5184
5185	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5186	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5187
5188	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5189	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5190	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5191	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5192	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5193	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5194	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5195	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5196	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5197	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5198
5199	/* Interrupt Counts */
5200
5201	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5202	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5203	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5204	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5205	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5206	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5207	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5208	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5209	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5210
5211	if (adapter->hw.mac.type >= e1000_82543) {
5212		adapter->stats.algnerrc +=
5213		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5214		adapter->stats.rxerrc +=
5215		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5216		adapter->stats.tncrs +=
5217		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5218		adapter->stats.cexterr +=
5219		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5220		adapter->stats.tsctc +=
5221		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5222		adapter->stats.tsctfc +=
5223		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5224	}
5225	ifp = adapter->ifp;
5226
5227	ifp->if_collisions = adapter->stats.colc;
5228
5229	/* Rx Errors */
5230	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5231	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5232	    adapter->stats.ruc + adapter->stats.roc +
5233	    adapter->stats.mpc + adapter->stats.cexterr;
5234
5235	/* Tx Errors */
5236	ifp->if_oerrors = adapter->stats.ecol +
5237	    adapter->stats.latecol + adapter->watchdog_events;
5238}
5239
5240/* Export a single 32-bit register via a read-only sysctl. */
5241static int
5242em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5243{
5244	struct adapter *adapter;
5245	u_int val;
5246
5247	adapter = oidp->oid_arg1;
5248	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5249	return (sysctl_handle_int(oidp, &val, 0, req));
5250}
5251
5252/*
5253 * Add sysctl variables, one per statistic, to the system.
5254 */
5255static void
5256em_add_hw_stats(struct adapter *adapter)
5257{
5258	device_t dev = adapter->dev;
5259
5260	struct tx_ring *txr = adapter->tx_rings;
5261	struct rx_ring *rxr = adapter->rx_rings;
5262
5263	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5264	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5265	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5266	struct e1000_hw_stats *stats = &adapter->stats;
5267
5268	struct sysctl_oid *stat_node, *queue_node, *int_node;
5269	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5270
5271#define QUEUE_NAME_LEN 32
5272	char namebuf[QUEUE_NAME_LEN];
5273
5274	/* Driver Statistics */
5275	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5276			CTLFLAG_RD, &adapter->link_irq,
5277			"Link MSIX IRQ Handled");
5278	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5279			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5280			 "Std mbuf failed");
5281	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5282			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5283			 "Std mbuf cluster failed");
5284	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5285			CTLFLAG_RD, &adapter->dropped_pkts,
5286			"Driver dropped packets");
5287	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5288			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5289			"Driver tx dma failure in xmit");
5290	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5291			CTLFLAG_RD, &adapter->rx_overruns,
5292			"RX overruns");
5293	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5294			CTLFLAG_RD, &adapter->watchdog_events,
5295			"Watchdog timeouts");
5296
5297	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5298			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5299			em_sysctl_reg_handler, "IU",
5300			"Device Control Register");
5301	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5302			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5303			em_sysctl_reg_handler, "IU",
5304			"Receiver Control Register");
5305	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5306			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5307			"Flow Control High Watermark");
5308	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5309			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5310			"Flow Control Low Watermark");
5311
5312	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5313		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5314		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5315					    CTLFLAG_RD, NULL, "Queue Name");
5316		queue_list = SYSCTL_CHILDREN(queue_node);
5317
5318		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5319				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5320				E1000_TDH(txr->me),
5321				em_sysctl_reg_handler, "IU",
5322 				"Transmit Descriptor Head");
5323		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5324				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5325				E1000_TDT(txr->me),
5326				em_sysctl_reg_handler, "IU",
5327 				"Transmit Descriptor Tail");
5328		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5329				CTLFLAG_RD, &txr->tx_irq,
5330				"Queue MSI-X Transmit Interrupts");
5331		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5332				CTLFLAG_RD, &txr->no_desc_avail,
5333				"Queue No Descriptor Available");
5334
5335		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5336				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5337				E1000_RDH(rxr->me),
5338				em_sysctl_reg_handler, "IU",
5339				"Receive Descriptor Head");
5340		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5341				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5342				E1000_RDT(rxr->me),
5343				em_sysctl_reg_handler, "IU",
5344				"Receive Descriptor Tail");
5345		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5346				CTLFLAG_RD, &rxr->rx_irq,
5347				"Queue MSI-X Receive Interrupts");
5348	}
5349
5350	/* MAC stats get their own sub node */
5351
5352	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5353				    CTLFLAG_RD, NULL, "Statistics");
5354	stat_list = SYSCTL_CHILDREN(stat_node);
5355
5356	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5357			CTLFLAG_RD, &stats->ecol,
5358			"Excessive collisions");
5359	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5360			CTLFLAG_RD, &stats->scc,
5361			"Single collisions");
5362	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5363			CTLFLAG_RD, &stats->mcc,
5364			"Multiple collisions");
5365	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5366			CTLFLAG_RD, &stats->latecol,
5367			"Late collisions");
5368	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5369			CTLFLAG_RD, &stats->colc,
5370			"Collision Count");
5371	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5372			CTLFLAG_RD, &adapter->stats.symerrs,
5373			"Symbol Errors");
5374	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5375			CTLFLAG_RD, &adapter->stats.sec,
5376			"Sequence Errors");
5377	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5378			CTLFLAG_RD, &adapter->stats.dc,
5379			"Defer Count");
5380	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5381			CTLFLAG_RD, &adapter->stats.mpc,
5382			"Missed Packets");
5383	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5384			CTLFLAG_RD, &adapter->stats.rnbc,
5385			"Receive No Buffers");
5386	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5387			CTLFLAG_RD, &adapter->stats.ruc,
5388			"Receive Undersize");
5389	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5390			CTLFLAG_RD, &adapter->stats.rfc,
5391			"Fragmented Packets Received ");
5392	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5393			CTLFLAG_RD, &adapter->stats.roc,
5394			"Oversized Packets Received");
5395	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5396			CTLFLAG_RD, &adapter->stats.rjc,
5397			"Recevied Jabber");
5398	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5399			CTLFLAG_RD, &adapter->stats.rxerrc,
5400			"Receive Errors");
5401	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5402			CTLFLAG_RD, &adapter->stats.crcerrs,
5403			"CRC errors");
5404	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5405			CTLFLAG_RD, &adapter->stats.algnerrc,
5406			"Alignment Errors");
5407	/* On 82575 these are collision counts */
5408	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5409			CTLFLAG_RD, &adapter->stats.cexterr,
5410			"Collision/Carrier extension errors");
5411	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5412			CTLFLAG_RD, &adapter->stats.xonrxc,
5413			"XON Received");
5414	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5415			CTLFLAG_RD, &adapter->stats.xontxc,
5416			"XON Transmitted");
5417	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5418			CTLFLAG_RD, &adapter->stats.xoffrxc,
5419			"XOFF Received");
5420	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5421			CTLFLAG_RD, &adapter->stats.xofftxc,
5422			"XOFF Transmitted");
5423
5424	/* Packet Reception Stats */
5425	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5426			CTLFLAG_RD, &adapter->stats.tpr,
5427			"Total Packets Received ");
5428	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5429			CTLFLAG_RD, &adapter->stats.gprc,
5430			"Good Packets Received");
5431	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5432			CTLFLAG_RD, &adapter->stats.bprc,
5433			"Broadcast Packets Received");
5434	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5435			CTLFLAG_RD, &adapter->stats.mprc,
5436			"Multicast Packets Received");
5437	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5438			CTLFLAG_RD, &adapter->stats.prc64,
5439			"64 byte frames received ");
5440	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5441			CTLFLAG_RD, &adapter->stats.prc127,
5442			"65-127 byte frames received");
5443	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5444			CTLFLAG_RD, &adapter->stats.prc255,
5445			"128-255 byte frames received");
5446	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5447			CTLFLAG_RD, &adapter->stats.prc511,
5448			"256-511 byte frames received");
5449	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5450			CTLFLAG_RD, &adapter->stats.prc1023,
5451			"512-1023 byte frames received");
5452	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5453			CTLFLAG_RD, &adapter->stats.prc1522,
5454			"1023-1522 byte frames received");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5456 			CTLFLAG_RD, &adapter->stats.gorc,
5457 			"Good Octets Received");
5458
5459	/* Packet Transmission Stats */
5460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5461 			CTLFLAG_RD, &adapter->stats.gotc,
5462 			"Good Octets Transmitted");
5463	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5464			CTLFLAG_RD, &adapter->stats.tpt,
5465			"Total Packets Transmitted");
5466	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5467			CTLFLAG_RD, &adapter->stats.gptc,
5468			"Good Packets Transmitted");
5469	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5470			CTLFLAG_RD, &adapter->stats.bptc,
5471			"Broadcast Packets Transmitted");
5472	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5473			CTLFLAG_RD, &adapter->stats.mptc,
5474			"Multicast Packets Transmitted");
5475	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5476			CTLFLAG_RD, &adapter->stats.ptc64,
5477			"64 byte frames transmitted ");
5478	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5479			CTLFLAG_RD, &adapter->stats.ptc127,
5480			"65-127 byte frames transmitted");
5481	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5482			CTLFLAG_RD, &adapter->stats.ptc255,
5483			"128-255 byte frames transmitted");
5484	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5485			CTLFLAG_RD, &adapter->stats.ptc511,
5486			"256-511 byte frames transmitted");
5487	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5488			CTLFLAG_RD, &adapter->stats.ptc1023,
5489			"512-1023 byte frames transmitted");
5490	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5491			CTLFLAG_RD, &adapter->stats.ptc1522,
5492			"1024-1522 byte frames transmitted");
5493	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5494			CTLFLAG_RD, &adapter->stats.tsctc,
5495			"TSO Contexts Transmitted");
5496	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5497			CTLFLAG_RD, &adapter->stats.tsctfc,
5498			"TSO Contexts Failed");
5499
5500
5501	/* Interrupt Stats */
5502
5503	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5504				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5505	int_list = SYSCTL_CHILDREN(int_node);
5506
5507	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5508			CTLFLAG_RD, &adapter->stats.iac,
5509			"Interrupt Assertion Count");
5510
5511	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5512			CTLFLAG_RD, &adapter->stats.icrxptc,
5513			"Interrupt Cause Rx Pkt Timer Expire Count");
5514
5515	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5516			CTLFLAG_RD, &adapter->stats.icrxatc,
5517			"Interrupt Cause Rx Abs Timer Expire Count");
5518
5519	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5520			CTLFLAG_RD, &adapter->stats.ictxptc,
5521			"Interrupt Cause Tx Pkt Timer Expire Count");
5522
5523	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5524			CTLFLAG_RD, &adapter->stats.ictxatc,
5525			"Interrupt Cause Tx Abs Timer Expire Count");
5526
5527	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5528			CTLFLAG_RD, &adapter->stats.ictxqec,
5529			"Interrupt Cause Tx Queue Empty Count");
5530
5531	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5532			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5533			"Interrupt Cause Tx Queue Min Thresh Count");
5534
5535	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5536			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5537			"Interrupt Cause Rx Desc Min Thresh Count");
5538
5539	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5540			CTLFLAG_RD, &adapter->stats.icrxoc,
5541			"Interrupt Cause Receiver Overrun Count");
5542}
5543
5544/**********************************************************************
5545 *
5546 *  This routine provides a way to dump out the adapter eeprom,
5547 *  often a useful debug/service tool. This only dumps the first
5548 *  32 words, stuff that matters is in that extent.
5549 *
5550 **********************************************************************/
5551static int
5552em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5553{
5554	struct adapter *adapter = (struct adapter *)arg1;
5555	int error;
5556	int result;
5557
5558	result = -1;
5559	error = sysctl_handle_int(oidp, &result, 0, req);
5560
5561	if (error || !req->newptr)
5562		return (error);
5563
5564	/*
5565	 * This value will cause a hex dump of the
5566	 * first 32 16-bit words of the EEPROM to
5567	 * the screen.
5568	 */
5569	if (result == 1)
5570		em_print_nvm_info(adapter);
5571
5572	return (error);
5573}
5574
5575static void
5576em_print_nvm_info(struct adapter *adapter)
5577{
5578	u16	eeprom_data;
5579	int	i, j, row = 0;
5580
5581	/* Its a bit crude, but it gets the job done */
5582	printf("\nInterface EEPROM Dump:\n");
5583	printf("Offset\n0x0000  ");
5584	for (i = 0, j = 0; i < 32; i++, j++) {
5585		if (j == 8) { /* Make the offset block */
5586			j = 0; ++row;
5587			printf("\n0x00%x0  ",row);
5588		}
5589		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5590		printf("%04x ", eeprom_data);
5591	}
5592	printf("\n");
5593}
5594
5595static int
5596em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5597{
5598	struct em_int_delay_info *info;
5599	struct adapter *adapter;
5600	u32 regval;
5601	int error, usecs, ticks;
5602
5603	info = (struct em_int_delay_info *)arg1;
5604	usecs = info->value;
5605	error = sysctl_handle_int(oidp, &usecs, 0, req);
5606	if (error != 0 || req->newptr == NULL)
5607		return (error);
5608	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5609		return (EINVAL);
5610	info->value = usecs;
5611	ticks = EM_USECS_TO_TICKS(usecs);
5612
5613	adapter = info->adapter;
5614
5615	EM_CORE_LOCK(adapter);
5616	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5617	regval = (regval & ~0xffff) | (ticks & 0xffff);
5618	/* Handle a few special cases. */
5619	switch (info->offset) {
5620	case E1000_RDTR:
5621		break;
5622	case E1000_TIDV:
5623		if (ticks == 0) {
5624			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5625			/* Don't write 0 into the TIDV register. */
5626			regval++;
5627		} else
5628			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5629		break;
5630	}
5631	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5632	EM_CORE_UNLOCK(adapter);
5633	return (0);
5634}
5635
5636static void
5637em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5638	const char *description, struct em_int_delay_info *info,
5639	int offset, int value)
5640{
5641	info->adapter = adapter;
5642	info->offset = offset;
5643	info->value = value;
5644	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5645	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5646	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5647	    info, 0, em_sysctl_int_delay, "I", description);
5648}
5649
5650static void
5651em_set_sysctl_value(struct adapter *adapter, const char *name,
5652	const char *description, int *limit, int value)
5653{
5654	*limit = value;
5655	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5656	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5657	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5658}
5659
5660
5661/*
5662** Set flow control using sysctl:
5663** Flow control values:
5664**      0 - off
5665**      1 - rx pause
5666**      2 - tx pause
5667**      3 - full
5668*/
5669static int
5670em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5671{
5672        int		error;
5673	static int	input = 3; /* default is full */
5674        struct adapter	*adapter = (struct adapter *) arg1;
5675
5676        error = sysctl_handle_int(oidp, &input, 0, req);
5677
5678        if ((error) || (req->newptr == NULL))
5679                return (error);
5680
5681	if (input == adapter->fc) /* no change? */
5682		return (error);
5683
5684        switch (input) {
5685                case e1000_fc_rx_pause:
5686                case e1000_fc_tx_pause:
5687                case e1000_fc_full:
5688                case e1000_fc_none:
5689                        adapter->hw.fc.requested_mode = input;
5690			adapter->fc = input;
5691                        break;
5692                default:
5693			/* Do nothing */
5694			return (error);
5695        }
5696
5697        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5698        e1000_force_mac_fc(&adapter->hw);
5699        return (error);
5700}
5701
5702/*
5703** Manage Energy Efficient Ethernet:
5704** Control values:
5705**     0/1 - enabled/disabled
5706*/
5707static int
5708em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5709{
5710       struct adapter *adapter = (struct adapter *) arg1;
5711       int             error, value;
5712
5713       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5714       error = sysctl_handle_int(oidp, &value, 0, req);
5715       if (error || req->newptr == NULL)
5716               return (error);
5717       EM_CORE_LOCK(adapter);
5718       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5719       em_init_locked(adapter);
5720       EM_CORE_UNLOCK(adapter);
5721       return (0);
5722}
5723
5724static int
5725em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5726{
5727	struct adapter *adapter;
5728	int error;
5729	int result;
5730
5731	result = -1;
5732	error = sysctl_handle_int(oidp, &result, 0, req);
5733
5734	if (error || !req->newptr)
5735		return (error);
5736
5737	if (result == 1) {
5738		adapter = (struct adapter *)arg1;
5739		em_print_debug_info(adapter);
5740        }
5741
5742	return (error);
5743}
5744
5745/*
5746** This routine is meant to be fluid, add whatever is
5747** needed for debugging a problem.  -jfv
5748*/
5749static void
5750em_print_debug_info(struct adapter *adapter)
5751{
5752	device_t dev = adapter->dev;
5753	struct tx_ring *txr = adapter->tx_rings;
5754	struct rx_ring *rxr = adapter->rx_rings;
5755
5756	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5757		printf("Interface is RUNNING ");
5758	else
5759		printf("Interface is NOT RUNNING\n");
5760
5761	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5762		printf("and INACTIVE\n");
5763	else
5764		printf("and ACTIVE\n");
5765
5766	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5767	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5768	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5769	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5770	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5771	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5772	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5773	device_printf(dev, "TX descriptors avail = %d\n",
5774	    txr->tx_avail);
5775	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5776	    txr->no_desc_avail);
5777	device_printf(dev, "RX discarded packets = %ld\n",
5778	    rxr->rx_discarded);
5779	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5780	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5781}
5782