if_em.c revision 243440
1180740Sdes/******************************************************************************
2204861Sdes
3180740Sdes  Copyright (c) 2001-2011, Intel Corporation
4180740Sdes  All rights reserved.
5180740Sdes
6180740Sdes  Redistribution and use in source and binary forms, with or without
7204861Sdes  modification, are permitted provided that the following conditions are met:
8180740Sdes
9180740Sdes   1. Redistributions of source code must retain the above copyright notice,
10180740Sdes      this list of conditions and the following disclaimer.
11180740Sdes
12180740Sdes   2. Redistributions in binary form must reproduce the above copyright
13180750Sdes      notice, this list of conditions and the following disclaimer in the
14180740Sdes      documentation and/or other materials provided with the distribution.
15180740Sdes
16180740Sdes   3. Neither the name of the Intel Corporation nor the names of its
17180740Sdes      contributors may be used to endorse or promote products derived from
18180740Sdes      this software without specific prior written permission.
19180740Sdes
20180740Sdes  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21180740Sdes  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22180740Sdes  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23180740Sdes  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24180740Sdes  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25180740Sdes  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26180740Sdes  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27180740Sdes  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28180740Sdes  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29180740Sdes  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30180740Sdes  POSSIBILITY OF SUCH DAMAGE.
31180740Sdes
32180740Sdes******************************************************************************/
33180750Sdes/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 243440 2012-11-23 11:19:43Z glebius $*/
34180750Sdes
35180750Sdes#ifdef HAVE_KERNEL_OPTION_HEADERS
36180750Sdes#include "opt_device_polling.h"
37180740Sdes#include "opt_inet.h"
38180740Sdes#include "opt_inet6.h"
39239844Sdes#endif
40239844Sdes
41239844Sdes#include <sys/param.h>
42239844Sdes#include <sys/systm.h>
43239844Sdes#if __FreeBSD_version >= 800000
44239844Sdes#include <sys/buf_ring.h>
45239844Sdes#endif
46239844Sdes#include <sys/bus.h>
47239844Sdes#include <sys/endian.h>
48239844Sdes#include <sys/kernel.h>
49239844Sdes#include <sys/kthread.h>
50239844Sdes#include <sys/malloc.h>
51239844Sdes#include <sys/mbuf.h>
52239844Sdes#include <sys/module.h>
53180740Sdes#include <sys/rman.h>
54180740Sdes#include <sys/socket.h>
55180740Sdes#include <sys/sockio.h>
56180740Sdes#include <sys/sysctl.h>
57180740Sdes#include <sys/taskqueue.h>
58180740Sdes#include <sys/eventhandler.h>
59180740Sdes#include <machine/bus.h>
60180740Sdes#include <machine/resource.h>
61180740Sdes
62180740Sdes#include <net/bpf.h>
63180740Sdes#include <net/ethernet.h>
64180740Sdes#include <net/if.h>
65204861Sdes#include <net/if_arp.h>
66204861Sdes#include <net/if_dl.h>
67204861Sdes#include <net/if_media.h>
68204861Sdes
69204861Sdes#include <net/if_types.h>
70204861Sdes#include <net/if_vlan_var.h>
71204861Sdes
72204861Sdes#include <netinet/in_systm.h>
73204861Sdes#include <netinet/in.h>
74204861Sdes#include <netinet/if_ether.h>
75204861Sdes#include <netinet/ip.h>
76204861Sdes#include <netinet/ip6.h>
77180740Sdes#include <netinet/tcp.h>
78#include <netinet/udp.h>
79
80#include <machine/in_cksum.h>
81#include <dev/led/led.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82571.h"
87#include "if_em.h"
88
89/*********************************************************************
90 *  Set this to one to display debug statistics
91 *********************************************************************/
92int	em_display_debug_stats = 0;
93
94/*********************************************************************
95 *  Driver version:
96 *********************************************************************/
97char em_driver_version[] = "7.3.2";
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	/* required last entry */
176	{ 0, 0, 0, 0, 0}
177};
178
179/*********************************************************************
180 *  Table of branding strings for all supported NICs.
181 *********************************************************************/
182
183static char *em_strings[] = {
184	"Intel(R) PRO/1000 Network Connection"
185};
186
187/*********************************************************************
188 *  Function prototypes
189 *********************************************************************/
190static int	em_probe(device_t);
191static int	em_attach(device_t);
192static int	em_detach(device_t);
193static int	em_shutdown(device_t);
194static int	em_suspend(device_t);
195static int	em_resume(device_t);
196#ifdef EM_MULTIQUEUE
197static int	em_mq_start(struct ifnet *, struct mbuf *);
198static int	em_mq_start_locked(struct ifnet *,
199		    struct tx_ring *, struct mbuf *);
200static void	em_qflush(struct ifnet *);
201#else
202static void	em_start(struct ifnet *);
203static void	em_start_locked(struct ifnet *, struct tx_ring *);
204#endif
205static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206static void	em_init(void *);
207static void	em_init_locked(struct adapter *);
208static void	em_stop(void *);
209static void	em_media_status(struct ifnet *, struct ifmediareq *);
210static int	em_media_change(struct ifnet *);
211static void	em_identify_hardware(struct adapter *);
212static int	em_allocate_pci_resources(struct adapter *);
213static int	em_allocate_legacy(struct adapter *);
214static int	em_allocate_msix(struct adapter *);
215static int	em_allocate_queues(struct adapter *);
216static int	em_setup_msix(struct adapter *);
217static void	em_free_pci_resources(struct adapter *);
218static void	em_local_timer(void *);
219static void	em_reset(struct adapter *);
220static int	em_setup_interface(device_t, struct adapter *);
221
222static void	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_allocate_transmit_buffers(struct tx_ring *);
225static void	em_free_transmit_structures(struct adapter *);
226static void	em_free_transmit_buffers(struct tx_ring *);
227
228static int	em_setup_receive_structures(struct adapter *);
229static int	em_allocate_receive_buffers(struct rx_ring *);
230static void	em_initialize_receive_unit(struct adapter *);
231static void	em_free_receive_structures(struct adapter *);
232static void	em_free_receive_buffers(struct rx_ring *);
233
234static void	em_enable_intr(struct adapter *);
235static void	em_disable_intr(struct adapter *);
236static void	em_update_stats_counters(struct adapter *);
237static void	em_add_hw_stats(struct adapter *adapter);
238static void	em_txeof(struct tx_ring *);
239static bool	em_rxeof(struct rx_ring *, int, int *);
240#ifndef __NO_STRICT_ALIGNMENT
241static int	em_fixup_rx(struct rx_ring *);
242#endif
243static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245		    struct ip *, u32 *, u32 *);
246static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247		    struct tcphdr *, u32 *, u32 *);
248static void	em_set_promisc(struct adapter *);
249static void	em_disable_promisc(struct adapter *);
250static void	em_set_multi(struct adapter *);
251static void	em_update_link_status(struct adapter *);
252static void	em_refresh_mbufs(struct rx_ring *, int);
253static void	em_register_vlan(void *, struct ifnet *, u16);
254static void	em_unregister_vlan(void *, struct ifnet *, u16);
255static void	em_setup_vlan_hw_support(struct adapter *);
256static int	em_xmit(struct tx_ring *, struct mbuf **);
257static int	em_dma_malloc(struct adapter *, bus_size_t,
258		    struct em_dma_alloc *, int);
259static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_nvm_info(struct adapter *);
262static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263static void	em_print_debug_info(struct adapter *);
264static int 	em_is_valid_ether_addr(u8 *);
265static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267		    const char *, struct em_int_delay_info *, int, int);
268/* Management and WOL Support */
269static void	em_init_manageability(struct adapter *);
270static void	em_release_manageability(struct adapter *);
271static void     em_get_hw_control(struct adapter *);
272static void     em_release_hw_control(struct adapter *);
273static void	em_get_wakeup(device_t);
274static void     em_enable_wakeup(device_t);
275static int	em_enable_phy_wakeup(struct adapter *);
276static void	em_led_func(void *, int);
277static void	em_disable_aspm(struct adapter *);
278
279static int	em_irq_fast(void *);
280
281/* MSIX handlers */
282static void	em_msix_tx(void *);
283static void	em_msix_rx(void *);
284static void	em_msix_link(void *);
285static void	em_handle_tx(void *context, int pending);
286static void	em_handle_rx(void *context, int pending);
287static void	em_handle_link(void *context, int pending);
288
289static void	em_set_sysctl_value(struct adapter *, const char *,
290		    const char *, int *, int);
291static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294static __inline void em_rx_discard(struct rx_ring *, int);
295
296#ifdef DEVICE_POLLING
297static poll_handler_t em_poll;
298#endif /* POLLING */
299
300/*********************************************************************
301 *  FreeBSD Device Interface Entry Points
302 *********************************************************************/
303
304static device_method_t em_methods[] = {
305	/* Device interface */
306	DEVMETHOD(device_probe, em_probe),
307	DEVMETHOD(device_attach, em_attach),
308	DEVMETHOD(device_detach, em_detach),
309	DEVMETHOD(device_shutdown, em_shutdown),
310	DEVMETHOD(device_suspend, em_suspend),
311	DEVMETHOD(device_resume, em_resume),
312	{0, 0}
313};
314
315static driver_t em_driver = {
316	"em", em_methods, sizeof(struct adapter),
317};
318
319devclass_t em_devclass;
320DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321MODULE_DEPEND(em, pci, 1, 1, 1);
322MODULE_DEPEND(em, ether, 1, 1, 1);
323
324/*********************************************************************
325 *  Tunable default values.
326 *********************************************************************/
327
328#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
329#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
330#define M_TSO_LEN			66
331
332/* Allow common code without TSO */
333#ifndef CSUM_TSO
334#define CSUM_TSO	0
335#endif
336
337static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344    0, "Default transmit interrupt delay in usecs");
345SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346    0, "Default receive interrupt delay in usecs");
347
348static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353    &em_tx_abs_int_delay_dflt, 0,
354    "Default transmit interrupt delay limit in usecs");
355SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356    &em_rx_abs_int_delay_dflt, 0,
357    "Default receive interrupt delay limit in usecs");
358
359static int em_rxd = EM_DEFAULT_RXD;
360static int em_txd = EM_DEFAULT_TXD;
361TUNABLE_INT("hw.em.rxd", &em_rxd);
362TUNABLE_INT("hw.em.txd", &em_txd);
363SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364    "Number of receive descriptors per queue");
365SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366    "Number of transmit descriptors per queue");
367
368static int em_smart_pwr_down = FALSE;
369TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371    0, "Set to true to leave smart power down enabled on newer adapters");
372
373/* Controls whether promiscuous also shows bad packets */
374static int em_debug_sbp = FALSE;
375TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377    "Show bad packets in promiscuous mode");
378
379static int em_enable_msix = TRUE;
380TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382    "Enable MSI-X interrupts");
383
384/* How many packets rxeof tries to clean at a time */
385static int em_rx_process_limit = 100;
386TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388    &em_rx_process_limit, 0,
389    "Maximum number of received packets to process "
390    "at a time, -1 means unlimited");
391
392/* Energy efficient ethernet - default to OFF */
393static int eee_setting = 1;
394TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396    "Enable Energy Efficient Ethernet");
397
398/* Global used in WOL setup with multiport cards */
399static int global_quad_port_a = 0;
400
401#ifdef DEV_NETMAP	/* see ixgbe.c for details */
402#include <dev/netmap/if_em_netmap.h>
403#endif /* DEV_NETMAP */
404
405/*********************************************************************
406 *  Device identification routine
407 *
408 *  em_probe determines if the driver should be loaded on
409 *  adapter based on PCI vendor/device id of the adapter.
410 *
411 *  return BUS_PROBE_DEFAULT on success, positive on failure
412 *********************************************************************/
413
414static int
415em_probe(device_t dev)
416{
417	char		adapter_name[60];
418	u16		pci_vendor_id = 0;
419	u16		pci_device_id = 0;
420	u16		pci_subvendor_id = 0;
421	u16		pci_subdevice_id = 0;
422	em_vendor_info_t *ent;
423
424	INIT_DEBUGOUT("em_probe: begin");
425
426	pci_vendor_id = pci_get_vendor(dev);
427	if (pci_vendor_id != EM_VENDOR_ID)
428		return (ENXIO);
429
430	pci_device_id = pci_get_device(dev);
431	pci_subvendor_id = pci_get_subvendor(dev);
432	pci_subdevice_id = pci_get_subdevice(dev);
433
434	ent = em_vendor_info_array;
435	while (ent->vendor_id != 0) {
436		if ((pci_vendor_id == ent->vendor_id) &&
437		    (pci_device_id == ent->device_id) &&
438
439		    ((pci_subvendor_id == ent->subvendor_id) ||
440		    (ent->subvendor_id == PCI_ANY_ID)) &&
441
442		    ((pci_subdevice_id == ent->subdevice_id) ||
443		    (ent->subdevice_id == PCI_ANY_ID))) {
444			sprintf(adapter_name, "%s %s",
445				em_strings[ent->index],
446				em_driver_version);
447			device_set_desc_copy(dev, adapter_name);
448			return (BUS_PROBE_DEFAULT);
449		}
450		ent++;
451	}
452
453	return (ENXIO);
454}
455
456/*********************************************************************
457 *  Device initialization routine
458 *
459 *  The attach entry point is called when the driver is being loaded.
460 *  This routine identifies the type of hardware, allocates all resources
461 *  and initializes the hardware.
462 *
463 *  return 0 on success, positive on failure
464 *********************************************************************/
465
466static int
467em_attach(device_t dev)
468{
469	struct adapter	*adapter;
470	struct e1000_hw	*hw;
471	int		error = 0;
472
473	INIT_DEBUGOUT("em_attach: begin");
474
475	if (resource_disabled("em", device_get_unit(dev))) {
476		device_printf(dev, "Disabled by device hint\n");
477		return (ENXIO);
478	}
479
480	adapter = device_get_softc(dev);
481	adapter->dev = adapter->osdep.dev = dev;
482	hw = &adapter->hw;
483	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484
485	/* SYSCTL stuff */
486	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489	    em_sysctl_nvm_info, "I", "NVM Information");
490
491	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494	    em_sysctl_debug_info, "I", "Debug Information");
495
496	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499	    em_set_flowcntl, "I", "Flow Control");
500
501	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502
503	/* Determine hardware and mac info */
504	em_identify_hardware(adapter);
505
506	/* Setup PCI resources */
507	if (em_allocate_pci_resources(adapter)) {
508		device_printf(dev, "Allocation of PCI resources failed\n");
509		error = ENXIO;
510		goto err_pci;
511	}
512
513	/*
514	** For ICH8 and family we need to
515	** map the flash memory, and this
516	** must happen after the MAC is
517	** identified
518	*/
519	if ((hw->mac.type == e1000_ich8lan) ||
520	    (hw->mac.type == e1000_ich9lan) ||
521	    (hw->mac.type == e1000_ich10lan) ||
522	    (hw->mac.type == e1000_pchlan) ||
523	    (hw->mac.type == e1000_pch2lan)) {
524		int rid = EM_BAR_TYPE_FLASH;
525		adapter->flash = bus_alloc_resource_any(dev,
526		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
527		if (adapter->flash == NULL) {
528			device_printf(dev, "Mapping of Flash failed\n");
529			error = ENXIO;
530			goto err_pci;
531		}
532		/* This is used in the shared code */
533		hw->flash_address = (u8 *)adapter->flash;
534		adapter->osdep.flash_bus_space_tag =
535		    rman_get_bustag(adapter->flash);
536		adapter->osdep.flash_bus_space_handle =
537		    rman_get_bushandle(adapter->flash);
538	}
539
540	/* Do Shared Code initialization */
541	if (e1000_setup_init_funcs(hw, TRUE)) {
542		device_printf(dev, "Setup of Shared code failed\n");
543		error = ENXIO;
544		goto err_pci;
545	}
546
547	e1000_get_bus_info(hw);
548
549	/* Set up some sysctls for the tunable interrupt delays */
550	em_add_int_delay_sysctl(adapter, "rx_int_delay",
551	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
552	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553	em_add_int_delay_sysctl(adapter, "tx_int_delay",
554	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557	    "receive interrupt delay limit in usecs",
558	    &adapter->rx_abs_int_delay,
559	    E1000_REGISTER(hw, E1000_RADV),
560	    em_rx_abs_int_delay_dflt);
561	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562	    "transmit interrupt delay limit in usecs",
563	    &adapter->tx_abs_int_delay,
564	    E1000_REGISTER(hw, E1000_TADV),
565	    em_tx_abs_int_delay_dflt);
566
567	/* Sysctl for limiting the amount of work done in the taskqueue */
568	em_set_sysctl_value(adapter, "rx_processing_limit",
569	    "max number of rx packets to process", &adapter->rx_process_limit,
570	    em_rx_process_limit);
571
572	/*
573	 * Validate number of transmit and receive descriptors. It
574	 * must not exceed hardware maximum, and must be multiple
575	 * of E1000_DBA_ALIGN.
576	 */
577	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580		    EM_DEFAULT_TXD, em_txd);
581		adapter->num_tx_desc = EM_DEFAULT_TXD;
582	} else
583		adapter->num_tx_desc = em_txd;
584
585	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588		    EM_DEFAULT_RXD, em_rxd);
589		adapter->num_rx_desc = EM_DEFAULT_RXD;
590	} else
591		adapter->num_rx_desc = em_rxd;
592
593	hw->mac.autoneg = DO_AUTO_NEG;
594	hw->phy.autoneg_wait_to_complete = FALSE;
595	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596
597	/* Copper options */
598	if (hw->phy.media_type == e1000_media_type_copper) {
599		hw->phy.mdix = AUTO_ALL_MODES;
600		hw->phy.disable_polarity_correction = FALSE;
601		hw->phy.ms_type = EM_MASTER_SLAVE;
602	}
603
604	/*
605	 * Set the frame limits assuming
606	 * standard ethernet sized frames.
607	 */
608	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610
611	/*
612	 * This controls when hardware reports transmit completion
613	 * status.
614	 */
615	hw->mac.report_tx_early = 1;
616
617	/*
618	** Get queue/ring memory
619	*/
620	if (em_allocate_queues(adapter)) {
621		error = ENOMEM;
622		goto err_pci;
623	}
624
625	/* Allocate multicast array memory. */
626	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628	if (adapter->mta == NULL) {
629		device_printf(dev, "Can not allocate multicast setup array\n");
630		error = ENOMEM;
631		goto err_late;
632	}
633
634	/* Check SOL/IDER usage */
635	if (e1000_check_reset_block(hw))
636		device_printf(dev, "PHY reset is blocked"
637		    " due to SOL/IDER session.\n");
638
639	/* Sysctl for setting Energy Efficient Ethernet */
640	hw->dev_spec.ich8lan.eee_disable = eee_setting;
641	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644	    adapter, 0, em_sysctl_eee, "I",
645	    "Disable Energy Efficient Ethernet");
646
647	/*
648	** Start from a known state, this is
649	** important in reading the nvm and
650	** mac from that.
651	*/
652	e1000_reset_hw(hw);
653
654
655	/* Make sure we have a good EEPROM before we read from it */
656	if (e1000_validate_nvm_checksum(hw) < 0) {
657		/*
658		** Some PCI-E parts fail the first check due to
659		** the link being in sleep state, call it again,
660		** if it fails a second time its a real issue.
661		*/
662		if (e1000_validate_nvm_checksum(hw) < 0) {
663			device_printf(dev,
664			    "The EEPROM Checksum Is Not Valid\n");
665			error = EIO;
666			goto err_late;
667		}
668	}
669
670	/* Copy the permanent MAC address out of the EEPROM */
671	if (e1000_read_mac_addr(hw) < 0) {
672		device_printf(dev, "EEPROM read error while reading MAC"
673		    " address\n");
674		error = EIO;
675		goto err_late;
676	}
677
678	if (!em_is_valid_ether_addr(hw->mac.addr)) {
679		device_printf(dev, "Invalid MAC address\n");
680		error = EIO;
681		goto err_late;
682	}
683
684	/*
685	**  Do interrupt configuration
686	*/
687	if (adapter->msix > 1) /* Do MSIX */
688		error = em_allocate_msix(adapter);
689	else  /* MSI or Legacy */
690		error = em_allocate_legacy(adapter);
691	if (error)
692		goto err_late;
693
694	/*
695	 * Get Wake-on-Lan and Management info for later use
696	 */
697	em_get_wakeup(dev);
698
699	/* Setup OS specific network interface */
700	if (em_setup_interface(dev, adapter) != 0)
701		goto err_late;
702
703	em_reset(adapter);
704
705	/* Initialize statistics */
706	em_update_stats_counters(adapter);
707
708	hw->mac.get_link_status = 1;
709	em_update_link_status(adapter);
710
711	/* Register for VLAN events */
712	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
716
717	em_add_hw_stats(adapter);
718
719	/* Non-AMT based hardware can now take control from firmware */
720	if (adapter->has_manage && !adapter->has_amt)
721		em_get_hw_control(adapter);
722
723	/* Tell the stack that the interface is not active */
724	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726
727	adapter->led_dev = led_create(em_led_func, adapter,
728	    device_get_nameunit(dev));
729#ifdef DEV_NETMAP
730	em_netmap_attach(adapter);
731#endif /* DEV_NETMAP */
732
733	INIT_DEBUGOUT("em_attach: end");
734
735	return (0);
736
737err_late:
738	em_free_transmit_structures(adapter);
739	em_free_receive_structures(adapter);
740	em_release_hw_control(adapter);
741	if (adapter->ifp != NULL)
742		if_free(adapter->ifp);
743err_pci:
744	em_free_pci_resources(adapter);
745	free(adapter->mta, M_DEVBUF);
746	EM_CORE_LOCK_DESTROY(adapter);
747
748	return (error);
749}
750
751/*********************************************************************
752 *  Device removal routine
753 *
754 *  The detach entry point is called when the driver is being removed.
755 *  This routine stops the adapter and deallocates all the resources
756 *  that were allocated for driver operation.
757 *
758 *  return 0 on success, positive on failure
759 *********************************************************************/
760
761static int
762em_detach(device_t dev)
763{
764	struct adapter	*adapter = device_get_softc(dev);
765	struct ifnet	*ifp = adapter->ifp;
766
767	INIT_DEBUGOUT("em_detach: begin");
768
769	/* Make sure VLANS are not using driver */
770	if (adapter->ifp->if_vlantrunk != NULL) {
771		device_printf(dev,"Vlan in use, detach first\n");
772		return (EBUSY);
773	}
774
775#ifdef DEVICE_POLLING
776	if (ifp->if_capenable & IFCAP_POLLING)
777		ether_poll_deregister(ifp);
778#endif
779
780	if (adapter->led_dev != NULL)
781		led_destroy(adapter->led_dev);
782
783	EM_CORE_LOCK(adapter);
784	adapter->in_detach = 1;
785	em_stop(adapter);
786	EM_CORE_UNLOCK(adapter);
787	EM_CORE_LOCK_DESTROY(adapter);
788
789	e1000_phy_hw_reset(&adapter->hw);
790
791	em_release_manageability(adapter);
792	em_release_hw_control(adapter);
793
794	/* Unregister VLAN events */
795	if (adapter->vlan_attach != NULL)
796		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797	if (adapter->vlan_detach != NULL)
798		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
799
800	ether_ifdetach(adapter->ifp);
801	callout_drain(&adapter->timer);
802
803#ifdef DEV_NETMAP
804	netmap_detach(ifp);
805#endif /* DEV_NETMAP */
806
807	em_free_pci_resources(adapter);
808	bus_generic_detach(dev);
809	if_free(ifp);
810
811	em_free_transmit_structures(adapter);
812	em_free_receive_structures(adapter);
813
814	em_release_hw_control(adapter);
815	free(adapter->mta, M_DEVBUF);
816
817	return (0);
818}
819
820/*********************************************************************
821 *
822 *  Shutdown entry point
823 *
824 **********************************************************************/
825
826static int
827em_shutdown(device_t dev)
828{
829	return em_suspend(dev);
830}
831
832/*
833 * Suspend/resume device methods.
834 */
835static int
836em_suspend(device_t dev)
837{
838	struct adapter *adapter = device_get_softc(dev);
839
840	EM_CORE_LOCK(adapter);
841
842        em_release_manageability(adapter);
843	em_release_hw_control(adapter);
844	em_enable_wakeup(dev);
845
846	EM_CORE_UNLOCK(adapter);
847
848	return bus_generic_suspend(dev);
849}
850
851static int
852em_resume(device_t dev)
853{
854	struct adapter *adapter = device_get_softc(dev);
855	struct tx_ring	*txr = adapter->tx_rings;
856	struct ifnet *ifp = adapter->ifp;
857
858	EM_CORE_LOCK(adapter);
859	if (adapter->hw.mac.type == e1000_pch2lan)
860		e1000_resume_workarounds_pchlan(&adapter->hw);
861	em_init_locked(adapter);
862	em_init_manageability(adapter);
863
864	if ((ifp->if_flags & IFF_UP) &&
865	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867			EM_TX_LOCK(txr);
868#ifdef EM_MULTIQUEUE
869			if (!drbr_empty(ifp, txr->br))
870				em_mq_start_locked(ifp, txr, NULL);
871#else
872			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873				em_start_locked(ifp, txr);
874#endif
875			EM_TX_UNLOCK(txr);
876		}
877	}
878	EM_CORE_UNLOCK(adapter);
879
880	return bus_generic_resume(dev);
881}
882
883
884#ifdef EM_MULTIQUEUE
885/*********************************************************************
886 *  Multiqueue Transmit routines
887 *
888 *  em_mq_start is called by the stack to initiate a transmit.
889 *  however, if busy the driver can queue the request rather
890 *  than do an immediate send. It is this that is an advantage
891 *  in this driver, rather than also having multiple tx queues.
892 **********************************************************************/
893static int
894em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895{
896	struct adapter  *adapter = txr->adapter;
897        struct mbuf     *next;
898        int             err = 0, enq = 0;
899
900	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901	    IFF_DRV_RUNNING || adapter->link_active == 0) {
902		if (m != NULL)
903			err = drbr_enqueue(ifp, txr->br, m);
904		return (err);
905	}
906
907	enq = 0;
908	if (m == NULL) {
909		next = drbr_dequeue(ifp, txr->br);
910	} else if (drbr_needs_enqueue(ifp, txr->br)) {
911		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912			return (err);
913		next = drbr_dequeue(ifp, txr->br);
914	} else
915		next = m;
916
917	/* Process the queue */
918	while (next != NULL) {
919		if ((err = em_xmit(txr, &next)) != 0) {
920                        if (next != NULL)
921                                err = drbr_enqueue(ifp, txr->br, next);
922                        break;
923		}
924		enq++;
925		ifp->if_obytes += next->m_pkthdr.len;
926		if (next->m_flags & M_MCAST)
927			ifp->if_omcasts++;
928		ETHER_BPF_MTAP(ifp, next);
929		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
930                        break;
931		next = drbr_dequeue(ifp, txr->br);
932	}
933
934	if (enq > 0) {
935                /* Set the watchdog */
936                txr->queue_status = EM_QUEUE_WORKING;
937		txr->watchdog_time = ticks;
938	}
939
940	if (txr->tx_avail < EM_MAX_SCATTER)
941		em_txeof(txr);
942	if (txr->tx_avail < EM_MAX_SCATTER)
943		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
944	return (err);
945}
946
947/*
948** Multiqueue capable stack interface
949*/
950static int
951em_mq_start(struct ifnet *ifp, struct mbuf *m)
952{
953	struct adapter	*adapter = ifp->if_softc;
954	struct tx_ring	*txr = adapter->tx_rings;
955	int 		error;
956
957	if (EM_TX_TRYLOCK(txr)) {
958		error = em_mq_start_locked(ifp, txr, m);
959		EM_TX_UNLOCK(txr);
960	} else
961		error = drbr_enqueue(ifp, txr->br, m);
962
963	return (error);
964}
965
966/*
967** Flush all ring buffers
968*/
969static void
970em_qflush(struct ifnet *ifp)
971{
972	struct adapter  *adapter = ifp->if_softc;
973	struct tx_ring  *txr = adapter->tx_rings;
974	struct mbuf     *m;
975
976	for (int i = 0; i < adapter->num_queues; i++, txr++) {
977		EM_TX_LOCK(txr);
978		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
979			m_freem(m);
980		EM_TX_UNLOCK(txr);
981	}
982	if_qflush(ifp);
983}
984#else  /* !EM_MULTIQUEUE */
985
986static void
987em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
988{
989	struct adapter	*adapter = ifp->if_softc;
990	struct mbuf	*m_head;
991
992	EM_TX_LOCK_ASSERT(txr);
993
994	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
995	    IFF_DRV_RUNNING)
996		return;
997
998	if (!adapter->link_active)
999		return;
1000
1001	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1002        	/* Call cleanup if number of TX descriptors low */
1003		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1004			em_txeof(txr);
1005		if (txr->tx_avail < EM_MAX_SCATTER) {
1006			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1007			break;
1008		}
1009                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1010		if (m_head == NULL)
1011			break;
1012		/*
1013		 *  Encapsulation can modify our pointer, and or make it
1014		 *  NULL on failure.  In that event, we can't requeue.
1015		 */
1016		if (em_xmit(txr, &m_head)) {
1017			if (m_head == NULL)
1018				break;
1019			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1020			break;
1021		}
1022
1023		/* Send a copy of the frame to the BPF listener */
1024		ETHER_BPF_MTAP(ifp, m_head);
1025
1026		/* Set timeout in case hardware has problems transmitting. */
1027		txr->watchdog_time = ticks;
1028                txr->queue_status = EM_QUEUE_WORKING;
1029	}
1030
1031	return;
1032}
1033
1034static void
1035em_start(struct ifnet *ifp)
1036{
1037	struct adapter	*adapter = ifp->if_softc;
1038	struct tx_ring	*txr = adapter->tx_rings;
1039
1040	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1041		EM_TX_LOCK(txr);
1042		em_start_locked(ifp, txr);
1043		EM_TX_UNLOCK(txr);
1044	}
1045	return;
1046}
1047#endif /* EM_MULTIQUEUE */
1048
1049/*********************************************************************
1050 *  Ioctl entry point
1051 *
1052 *  em_ioctl is called when the user wants to configure the
1053 *  interface.
1054 *
1055 *  return 0 on success, positive on failure
1056 **********************************************************************/
1057
1058static int
1059em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1060{
1061	struct adapter	*adapter = ifp->if_softc;
1062	struct ifreq	*ifr = (struct ifreq *)data;
1063#if defined(INET) || defined(INET6)
1064	struct ifaddr	*ifa = (struct ifaddr *)data;
1065#endif
1066	bool		avoid_reset = FALSE;
1067	int		error = 0;
1068
1069	if (adapter->in_detach)
1070		return (error);
1071
1072	switch (command) {
1073	case SIOCSIFADDR:
1074#ifdef INET
1075		if (ifa->ifa_addr->sa_family == AF_INET)
1076			avoid_reset = TRUE;
1077#endif
1078#ifdef INET6
1079		if (ifa->ifa_addr->sa_family == AF_INET6)
1080			avoid_reset = TRUE;
1081#endif
1082		/*
1083		** Calling init results in link renegotiation,
1084		** so we avoid doing it when possible.
1085		*/
1086		if (avoid_reset) {
1087			ifp->if_flags |= IFF_UP;
1088			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1089				em_init(adapter);
1090#ifdef INET
1091			if (!(ifp->if_flags & IFF_NOARP))
1092				arp_ifinit(ifp, ifa);
1093#endif
1094		} else
1095			error = ether_ioctl(ifp, command, data);
1096		break;
1097	case SIOCSIFMTU:
1098	    {
1099		int max_frame_size;
1100
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1102
1103		EM_CORE_LOCK(adapter);
1104		switch (adapter->hw.mac.type) {
1105		case e1000_82571:
1106		case e1000_82572:
1107		case e1000_ich9lan:
1108		case e1000_ich10lan:
1109		case e1000_pch2lan:
1110		case e1000_82574:
1111		case e1000_82583:
1112		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1113			max_frame_size = 9234;
1114			break;
1115		case e1000_pchlan:
1116			max_frame_size = 4096;
1117			break;
1118			/* Adapters that do not support jumbo frames */
1119		case e1000_ich8lan:
1120			max_frame_size = ETHER_MAX_LEN;
1121			break;
1122		default:
1123			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1124		}
1125		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1126		    ETHER_CRC_LEN) {
1127			EM_CORE_UNLOCK(adapter);
1128			error = EINVAL;
1129			break;
1130		}
1131
1132		ifp->if_mtu = ifr->ifr_mtu;
1133		adapter->max_frame_size =
1134		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1135		em_init_locked(adapter);
1136		EM_CORE_UNLOCK(adapter);
1137		break;
1138	    }
1139	case SIOCSIFFLAGS:
1140		IOCTL_DEBUGOUT("ioctl rcv'd:\
1141		    SIOCSIFFLAGS (Set Interface Flags)");
1142		EM_CORE_LOCK(adapter);
1143		if (ifp->if_flags & IFF_UP) {
1144			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1145				if ((ifp->if_flags ^ adapter->if_flags) &
1146				    (IFF_PROMISC | IFF_ALLMULTI)) {
1147					em_disable_promisc(adapter);
1148					em_set_promisc(adapter);
1149				}
1150			} else
1151				em_init_locked(adapter);
1152		} else
1153			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1154				em_stop(adapter);
1155		adapter->if_flags = ifp->if_flags;
1156		EM_CORE_UNLOCK(adapter);
1157		break;
1158	case SIOCADDMULTI:
1159	case SIOCDELMULTI:
1160		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1161		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1162			EM_CORE_LOCK(adapter);
1163			em_disable_intr(adapter);
1164			em_set_multi(adapter);
1165#ifdef DEVICE_POLLING
1166			if (!(ifp->if_capenable & IFCAP_POLLING))
1167#endif
1168				em_enable_intr(adapter);
1169			EM_CORE_UNLOCK(adapter);
1170		}
1171		break;
1172	case SIOCSIFMEDIA:
1173		/* Check SOL/IDER usage */
1174		EM_CORE_LOCK(adapter);
1175		if (e1000_check_reset_block(&adapter->hw)) {
1176			EM_CORE_UNLOCK(adapter);
1177			device_printf(adapter->dev, "Media change is"
1178			    " blocked due to SOL/IDER session.\n");
1179			break;
1180		}
1181		EM_CORE_UNLOCK(adapter);
1182		/* falls thru */
1183	case SIOCGIFMEDIA:
1184		IOCTL_DEBUGOUT("ioctl rcv'd: \
1185		    SIOCxIFMEDIA (Get/Set Interface Media)");
1186		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1187		break;
1188	case SIOCSIFCAP:
1189	    {
1190		int mask, reinit;
1191
1192		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1193		reinit = 0;
1194		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1195#ifdef DEVICE_POLLING
1196		if (mask & IFCAP_POLLING) {
1197			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1198				error = ether_poll_register(em_poll, ifp);
1199				if (error)
1200					return (error);
1201				EM_CORE_LOCK(adapter);
1202				em_disable_intr(adapter);
1203				ifp->if_capenable |= IFCAP_POLLING;
1204				EM_CORE_UNLOCK(adapter);
1205			} else {
1206				error = ether_poll_deregister(ifp);
1207				/* Enable interrupt even in error case */
1208				EM_CORE_LOCK(adapter);
1209				em_enable_intr(adapter);
1210				ifp->if_capenable &= ~IFCAP_POLLING;
1211				EM_CORE_UNLOCK(adapter);
1212			}
1213		}
1214#endif
1215		if (mask & IFCAP_HWCSUM) {
1216			ifp->if_capenable ^= IFCAP_HWCSUM;
1217			reinit = 1;
1218		}
1219		if (mask & IFCAP_TSO4) {
1220			ifp->if_capenable ^= IFCAP_TSO4;
1221			reinit = 1;
1222		}
1223		if (mask & IFCAP_VLAN_HWTAGGING) {
1224			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1225			reinit = 1;
1226		}
1227		if (mask & IFCAP_VLAN_HWFILTER) {
1228			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1229			reinit = 1;
1230		}
1231		if (mask & IFCAP_VLAN_HWTSO) {
1232			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1233			reinit = 1;
1234		}
1235		if ((mask & IFCAP_WOL) &&
1236		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1237			if (mask & IFCAP_WOL_MCAST)
1238				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1239			if (mask & IFCAP_WOL_MAGIC)
1240				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1241		}
1242		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1243			em_init(adapter);
1244		VLAN_CAPABILITIES(ifp);
1245		break;
1246	    }
1247
1248	default:
1249		error = ether_ioctl(ifp, command, data);
1250		break;
1251	}
1252
1253	return (error);
1254}
1255
1256
1257/*********************************************************************
1258 *  Init entry point
1259 *
1260 *  This routine is used in two ways. It is used by the stack as
1261 *  init entry point in network interface structure. It is also used
1262 *  by the driver as a hw/sw initialization routine to get to a
1263 *  consistent state.
1264 *
1265 *  return 0 on success, positive on failure
1266 **********************************************************************/
1267
1268static void
1269em_init_locked(struct adapter *adapter)
1270{
1271	struct ifnet	*ifp = adapter->ifp;
1272	device_t	dev = adapter->dev;
1273
1274	INIT_DEBUGOUT("em_init: begin");
1275
1276	EM_CORE_LOCK_ASSERT(adapter);
1277
1278	em_disable_intr(adapter);
1279	callout_stop(&adapter->timer);
1280
1281	/* Get the latest mac address, User can use a LAA */
1282        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1283              ETHER_ADDR_LEN);
1284
1285	/* Put the address into the Receive Address Array */
1286	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1287
1288	/*
1289	 * With the 82571 adapter, RAR[0] may be overwritten
1290	 * when the other port is reset, we make a duplicate
1291	 * in RAR[14] for that eventuality, this assures
1292	 * the interface continues to function.
1293	 */
1294	if (adapter->hw.mac.type == e1000_82571) {
1295		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1296		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1297		    E1000_RAR_ENTRIES - 1);
1298	}
1299
1300	/* Initialize the hardware */
1301	em_reset(adapter);
1302	em_update_link_status(adapter);
1303
1304	/* Setup VLAN support, basic and offload if available */
1305	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1306
1307	/* Set hardware offload abilities */
1308	ifp->if_hwassist = 0;
1309	if (ifp->if_capenable & IFCAP_TXCSUM)
1310		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1311	if (ifp->if_capenable & IFCAP_TSO4)
1312		ifp->if_hwassist |= CSUM_TSO;
1313
1314	/* Configure for OS presence */
1315	em_init_manageability(adapter);
1316
1317	/* Prepare transmit descriptors and buffers */
1318	em_setup_transmit_structures(adapter);
1319	em_initialize_transmit_unit(adapter);
1320
1321	/* Setup Multicast table */
1322	em_set_multi(adapter);
1323
1324	/*
1325	** Figure out the desired mbuf
1326	** pool for doing jumbos
1327	*/
1328	if (adapter->max_frame_size <= 2048)
1329		adapter->rx_mbuf_sz = MCLBYTES;
1330	else if (adapter->max_frame_size <= 4096)
1331		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1332	else
1333		adapter->rx_mbuf_sz = MJUM9BYTES;
1334
1335	/* Prepare receive descriptors and buffers */
1336	if (em_setup_receive_structures(adapter)) {
1337		device_printf(dev, "Could not setup receive structures\n");
1338		em_stop(adapter);
1339		return;
1340	}
1341	em_initialize_receive_unit(adapter);
1342
1343	/* Use real VLAN Filter support? */
1344	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1345		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1346			/* Use real VLAN Filter support */
1347			em_setup_vlan_hw_support(adapter);
1348		else {
1349			u32 ctrl;
1350			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1351			ctrl |= E1000_CTRL_VME;
1352			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1353		}
1354	}
1355
1356	/* Don't lose promiscuous settings */
1357	em_set_promisc(adapter);
1358
1359	/* Set the interface as ACTIVE */
1360	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1361	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1362
1363	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1364	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1365
1366	/* MSI/X configuration for 82574 */
1367	if (adapter->hw.mac.type == e1000_82574) {
1368		int tmp;
1369		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1370		tmp |= E1000_CTRL_EXT_PBA_CLR;
1371		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1372		/* Set the IVAR - interrupt vector routing. */
1373		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1374	}
1375
1376#ifdef DEVICE_POLLING
1377	/*
1378	 * Only enable interrupts if we are not polling, make sure
1379	 * they are off otherwise.
1380	 */
1381	if (ifp->if_capenable & IFCAP_POLLING)
1382		em_disable_intr(adapter);
1383	else
1384#endif /* DEVICE_POLLING */
1385		em_enable_intr(adapter);
1386
1387	/* AMT based hardware can now take control from firmware */
1388	if (adapter->has_manage && adapter->has_amt)
1389		em_get_hw_control(adapter);
1390}
1391
1392static void
1393em_init(void *arg)
1394{
1395	struct adapter *adapter = arg;
1396
1397	EM_CORE_LOCK(adapter);
1398	em_init_locked(adapter);
1399	EM_CORE_UNLOCK(adapter);
1400}
1401
1402
1403#ifdef DEVICE_POLLING
1404/*********************************************************************
1405 *
1406 *  Legacy polling routine: note this only works with single queue
1407 *
1408 *********************************************************************/
1409static int
1410em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1411{
1412	struct adapter *adapter = ifp->if_softc;
1413	struct tx_ring	*txr = adapter->tx_rings;
1414	struct rx_ring	*rxr = adapter->rx_rings;
1415	u32		reg_icr;
1416	int		rx_done;
1417
1418	EM_CORE_LOCK(adapter);
1419	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1420		EM_CORE_UNLOCK(adapter);
1421		return (0);
1422	}
1423
1424	if (cmd == POLL_AND_CHECK_STATUS) {
1425		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1426		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1427			callout_stop(&adapter->timer);
1428			adapter->hw.mac.get_link_status = 1;
1429			em_update_link_status(adapter);
1430			callout_reset(&adapter->timer, hz,
1431			    em_local_timer, adapter);
1432		}
1433	}
1434	EM_CORE_UNLOCK(adapter);
1435
1436	em_rxeof(rxr, count, &rx_done);
1437
1438	EM_TX_LOCK(txr);
1439	em_txeof(txr);
1440#ifdef EM_MULTIQUEUE
1441	if (!drbr_empty(ifp, txr->br))
1442		em_mq_start_locked(ifp, txr, NULL);
1443#else
1444	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445		em_start_locked(ifp, txr);
1446#endif
1447	EM_TX_UNLOCK(txr);
1448
1449	return (rx_done);
1450}
1451#endif /* DEVICE_POLLING */
1452
1453
1454/*********************************************************************
1455 *
1456 *  Fast Legacy/MSI Combined Interrupt Service routine
1457 *
1458 *********************************************************************/
1459static int
1460em_irq_fast(void *arg)
1461{
1462	struct adapter	*adapter = arg;
1463	struct ifnet	*ifp;
1464	u32		reg_icr;
1465
1466	ifp = adapter->ifp;
1467
1468	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1469
1470	/* Hot eject?  */
1471	if (reg_icr == 0xffffffff)
1472		return FILTER_STRAY;
1473
1474	/* Definitely not our interrupt.  */
1475	if (reg_icr == 0x0)
1476		return FILTER_STRAY;
1477
1478	/*
1479	 * Starting with the 82571 chip, bit 31 should be used to
1480	 * determine whether the interrupt belongs to us.
1481	 */
1482	if (adapter->hw.mac.type >= e1000_82571 &&
1483	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1484		return FILTER_STRAY;
1485
1486	em_disable_intr(adapter);
1487	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1488
1489	/* Link status change */
1490	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1491		adapter->hw.mac.get_link_status = 1;
1492		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1493	}
1494
1495	if (reg_icr & E1000_ICR_RXO)
1496		adapter->rx_overruns++;
1497	return FILTER_HANDLED;
1498}
1499
1500/* Combined RX/TX handler, used by Legacy and MSI */
1501static void
1502em_handle_que(void *context, int pending)
1503{
1504	struct adapter	*adapter = context;
1505	struct ifnet	*ifp = adapter->ifp;
1506	struct tx_ring	*txr = adapter->tx_rings;
1507	struct rx_ring	*rxr = adapter->rx_rings;
1508
1509
1510	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1511		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1512		EM_TX_LOCK(txr);
1513		em_txeof(txr);
1514#ifdef EM_MULTIQUEUE
1515		if (!drbr_empty(ifp, txr->br))
1516			em_mq_start_locked(ifp, txr, NULL);
1517#else
1518		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1519			em_start_locked(ifp, txr);
1520#endif
1521		EM_TX_UNLOCK(txr);
1522		if (more) {
1523			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1524			return;
1525		}
1526	}
1527
1528	em_enable_intr(adapter);
1529	return;
1530}
1531
1532
1533/*********************************************************************
1534 *
1535 *  MSIX Interrupt Service Routines
1536 *
1537 **********************************************************************/
1538static void
1539em_msix_tx(void *arg)
1540{
1541	struct tx_ring *txr = arg;
1542	struct adapter *adapter = txr->adapter;
1543	struct ifnet	*ifp = adapter->ifp;
1544
1545	++txr->tx_irq;
1546	EM_TX_LOCK(txr);
1547	em_txeof(txr);
1548#ifdef EM_MULTIQUEUE
1549	if (!drbr_empty(ifp, txr->br))
1550		em_mq_start_locked(ifp, txr, NULL);
1551#else
1552	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1553		em_start_locked(ifp, txr);
1554#endif
1555	/* Reenable this interrupt */
1556	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1557	EM_TX_UNLOCK(txr);
1558	return;
1559}
1560
1561/*********************************************************************
1562 *
1563 *  MSIX RX Interrupt Service routine
1564 *
1565 **********************************************************************/
1566
1567static void
1568em_msix_rx(void *arg)
1569{
1570	struct rx_ring	*rxr = arg;
1571	struct adapter	*adapter = rxr->adapter;
1572	bool		more;
1573
1574	++rxr->rx_irq;
1575	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1576	if (more)
1577		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1578	else
1579		/* Reenable this interrupt */
1580		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1581	return;
1582}
1583
1584/*********************************************************************
1585 *
1586 *  MSIX Link Fast Interrupt Service routine
1587 *
1588 **********************************************************************/
1589static void
1590em_msix_link(void *arg)
1591{
1592	struct adapter	*adapter = arg;
1593	u32		reg_icr;
1594
1595	++adapter->link_irq;
1596	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1597
1598	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1599		adapter->hw.mac.get_link_status = 1;
1600		em_handle_link(adapter, 0);
1601	} else
1602		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1603		    EM_MSIX_LINK | E1000_IMS_LSC);
1604	return;
1605}
1606
1607static void
1608em_handle_rx(void *context, int pending)
1609{
1610	struct rx_ring	*rxr = context;
1611	struct adapter	*adapter = rxr->adapter;
1612        bool            more;
1613
1614	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1615	if (more)
1616		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1617	else
1618		/* Reenable this interrupt */
1619		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1620}
1621
1622static void
1623em_handle_tx(void *context, int pending)
1624{
1625	struct tx_ring	*txr = context;
1626	struct adapter	*adapter = txr->adapter;
1627	struct ifnet	*ifp = adapter->ifp;
1628
1629	EM_TX_LOCK(txr);
1630	em_txeof(txr);
1631#ifdef EM_MULTIQUEUE
1632	if (!drbr_empty(ifp, txr->br))
1633		em_mq_start_locked(ifp, txr, NULL);
1634#else
1635	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1636		em_start_locked(ifp, txr);
1637#endif
1638	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1639	EM_TX_UNLOCK(txr);
1640}
1641
1642static void
1643em_handle_link(void *context, int pending)
1644{
1645	struct adapter	*adapter = context;
1646	struct tx_ring	*txr = adapter->tx_rings;
1647	struct ifnet *ifp = adapter->ifp;
1648
1649	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1650		return;
1651
1652	EM_CORE_LOCK(adapter);
1653	callout_stop(&adapter->timer);
1654	em_update_link_status(adapter);
1655	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1657	    EM_MSIX_LINK | E1000_IMS_LSC);
1658	if (adapter->link_active) {
1659		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1660			EM_TX_LOCK(txr);
1661#ifdef EM_MULTIQUEUE
1662			if (!drbr_empty(ifp, txr->br))
1663				em_mq_start_locked(ifp, txr, NULL);
1664#else
1665			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666				em_start_locked(ifp, txr);
1667#endif
1668			EM_TX_UNLOCK(txr);
1669		}
1670	}
1671	EM_CORE_UNLOCK(adapter);
1672}
1673
1674
1675/*********************************************************************
1676 *
1677 *  Media Ioctl callback
1678 *
1679 *  This routine is called whenever the user queries the status of
1680 *  the interface using ifconfig.
1681 *
1682 **********************************************************************/
1683static void
1684em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1685{
1686	struct adapter *adapter = ifp->if_softc;
1687	u_char fiber_type = IFM_1000_SX;
1688
1689	INIT_DEBUGOUT("em_media_status: begin");
1690
1691	EM_CORE_LOCK(adapter);
1692	em_update_link_status(adapter);
1693
1694	ifmr->ifm_status = IFM_AVALID;
1695	ifmr->ifm_active = IFM_ETHER;
1696
1697	if (!adapter->link_active) {
1698		EM_CORE_UNLOCK(adapter);
1699		return;
1700	}
1701
1702	ifmr->ifm_status |= IFM_ACTIVE;
1703
1704	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1705	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1706		ifmr->ifm_active |= fiber_type | IFM_FDX;
1707	} else {
1708		switch (adapter->link_speed) {
1709		case 10:
1710			ifmr->ifm_active |= IFM_10_T;
1711			break;
1712		case 100:
1713			ifmr->ifm_active |= IFM_100_TX;
1714			break;
1715		case 1000:
1716			ifmr->ifm_active |= IFM_1000_T;
1717			break;
1718		}
1719		if (adapter->link_duplex == FULL_DUPLEX)
1720			ifmr->ifm_active |= IFM_FDX;
1721		else
1722			ifmr->ifm_active |= IFM_HDX;
1723	}
1724	EM_CORE_UNLOCK(adapter);
1725}
1726
1727/*********************************************************************
1728 *
1729 *  Media Ioctl callback
1730 *
1731 *  This routine is called when the user changes speed/duplex using
1732 *  media/mediopt option with ifconfig.
1733 *
1734 **********************************************************************/
1735static int
1736em_media_change(struct ifnet *ifp)
1737{
1738	struct adapter *adapter = ifp->if_softc;
1739	struct ifmedia  *ifm = &adapter->media;
1740
1741	INIT_DEBUGOUT("em_media_change: begin");
1742
1743	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1744		return (EINVAL);
1745
1746	EM_CORE_LOCK(adapter);
1747	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1748	case IFM_AUTO:
1749		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1751		break;
1752	case IFM_1000_LX:
1753	case IFM_1000_SX:
1754	case IFM_1000_T:
1755		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1756		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1757		break;
1758	case IFM_100_TX:
1759		adapter->hw.mac.autoneg = FALSE;
1760		adapter->hw.phy.autoneg_advertised = 0;
1761		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1762			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1763		else
1764			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1765		break;
1766	case IFM_10_T:
1767		adapter->hw.mac.autoneg = FALSE;
1768		adapter->hw.phy.autoneg_advertised = 0;
1769		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1770			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1771		else
1772			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1773		break;
1774	default:
1775		device_printf(adapter->dev, "Unsupported media type\n");
1776	}
1777
1778	em_init_locked(adapter);
1779	EM_CORE_UNLOCK(adapter);
1780
1781	return (0);
1782}
1783
1784/*********************************************************************
1785 *
1786 *  This routine maps the mbufs to tx descriptors.
1787 *
1788 *  return 0 on success, positive on failure
1789 **********************************************************************/
1790
1791static int
1792em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793{
1794	struct adapter		*adapter = txr->adapter;
1795	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1796	bus_dmamap_t		map;
1797	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1798	struct e1000_tx_desc	*ctxd = NULL;
1799	struct mbuf		*m_head;
1800	struct ether_header	*eh;
1801	struct ip		*ip = NULL;
1802	struct tcphdr		*tp = NULL;
1803	u32			txd_upper, txd_lower, txd_used, txd_saved;
1804	int			ip_off, poff;
1805	int			nsegs, i, j, first, last = 0;
1806	int			error, do_tso, tso_desc = 0, remap = 1;
1807
1808retry:
1809	m_head = *m_headp;
1810	txd_upper = txd_lower = txd_used = txd_saved = 0;
1811	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1812	ip_off = poff = 0;
1813
1814	/*
1815	 * Intel recommends entire IP/TCP header length reside in a single
1816	 * buffer. If multiple descriptors are used to describe the IP and
1817	 * TCP header, each descriptor should describe one or more
1818	 * complete headers; descriptors referencing only parts of headers
1819	 * are not supported. If all layer headers are not coalesced into
1820	 * a single buffer, each buffer should not cross a 4KB boundary,
1821	 * or be larger than the maximum read request size.
1822	 * Controller also requires modifing IP/TCP header to make TSO work
1823	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1824	 * IP/TCP header into a single buffer to meet the requirement of
1825	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1826	 * which also has similiar restrictions.
1827	 */
1828	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1829		if (do_tso || (m_head->m_next != NULL &&
1830		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1831			if (M_WRITABLE(*m_headp) == 0) {
1832				m_head = m_dup(*m_headp, M_DONTWAIT);
1833				m_freem(*m_headp);
1834				if (m_head == NULL) {
1835					*m_headp = NULL;
1836					return (ENOBUFS);
1837				}
1838				*m_headp = m_head;
1839			}
1840		}
1841		/*
1842		 * XXX
1843		 * Assume IPv4, we don't have TSO/checksum offload support
1844		 * for IPv6 yet.
1845		 */
1846		ip_off = sizeof(struct ether_header);
1847		m_head = m_pullup(m_head, ip_off);
1848		if (m_head == NULL) {
1849			*m_headp = NULL;
1850			return (ENOBUFS);
1851		}
1852		eh = mtod(m_head, struct ether_header *);
1853		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1854			ip_off = sizeof(struct ether_vlan_header);
1855			m_head = m_pullup(m_head, ip_off);
1856			if (m_head == NULL) {
1857				*m_headp = NULL;
1858				return (ENOBUFS);
1859			}
1860		}
1861		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1862		if (m_head == NULL) {
1863			*m_headp = NULL;
1864			return (ENOBUFS);
1865		}
1866		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867		poff = ip_off + (ip->ip_hl << 2);
1868		if (do_tso) {
1869			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1870			if (m_head == NULL) {
1871				*m_headp = NULL;
1872				return (ENOBUFS);
1873			}
1874			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1875			/*
1876			 * TSO workaround:
1877			 *   pull 4 more bytes of data into it.
1878			 */
1879			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1880			if (m_head == NULL) {
1881				*m_headp = NULL;
1882				return (ENOBUFS);
1883			}
1884			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885			ip->ip_len = 0;
1886			ip->ip_sum = 0;
1887			/*
1888			 * The pseudo TCP checksum does not include TCP payload
1889			 * length so driver should recompute the checksum here
1890			 * what hardware expect to see. This is adherence of
1891			 * Microsoft's Large Send specification.
1892			 */
1893			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1895			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1896		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1897			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1898			if (m_head == NULL) {
1899				*m_headp = NULL;
1900				return (ENOBUFS);
1901			}
1902			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1904			if (m_head == NULL) {
1905				*m_headp = NULL;
1906				return (ENOBUFS);
1907			}
1908			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1909			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1910		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1911			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1912			if (m_head == NULL) {
1913				*m_headp = NULL;
1914				return (ENOBUFS);
1915			}
1916			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1917		}
1918		*m_headp = m_head;
1919	}
1920
1921	/*
1922	 * Map the packet for DMA
1923	 *
1924	 * Capture the first descriptor index,
1925	 * this descriptor will have the index
1926	 * of the EOP which is the only one that
1927	 * now gets a DONE bit writeback.
1928	 */
1929	first = txr->next_avail_desc;
1930	tx_buffer = &txr->tx_buffers[first];
1931	tx_buffer_mapped = tx_buffer;
1932	map = tx_buffer->map;
1933
1934	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1935	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1936
1937	/*
1938	 * There are two types of errors we can (try) to handle:
1939	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1940	 *   out of segments.  Defragment the mbuf chain and try again.
1941	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1942	 *   at this point in time.  Defer sending and try again later.
1943	 * All other errors, in particular EINVAL, are fatal and prevent the
1944	 * mbuf chain from ever going through.  Drop it and report error.
1945	 */
1946	if (error == EFBIG && remap) {
1947		struct mbuf *m;
1948
1949		m = m_defrag(*m_headp, M_DONTWAIT);
1950		if (m == NULL) {
1951			adapter->mbuf_alloc_failed++;
1952			m_freem(*m_headp);
1953			*m_headp = NULL;
1954			return (ENOBUFS);
1955		}
1956		*m_headp = m;
1957
1958		/* Try it again, but only once */
1959		remap = 0;
1960		goto retry;
1961	} else if (error == ENOMEM) {
1962		adapter->no_tx_dma_setup++;
1963		return (error);
1964	} else if (error != 0) {
1965		adapter->no_tx_dma_setup++;
1966		m_freem(*m_headp);
1967		*m_headp = NULL;
1968		return (error);
1969	}
1970
1971	/*
1972	 * TSO Hardware workaround, if this packet is not
1973	 * TSO, and is only a single descriptor long, and
1974	 * it follows a TSO burst, then we need to add a
1975	 * sentinel descriptor to prevent premature writeback.
1976	 */
1977	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1978		if (nsegs == 1)
1979			tso_desc = TRUE;
1980		txr->tx_tso = FALSE;
1981	}
1982
1983        if (nsegs > (txr->tx_avail - 2)) {
1984                txr->no_desc_avail++;
1985		bus_dmamap_unload(txr->txtag, map);
1986		return (ENOBUFS);
1987        }
1988	m_head = *m_headp;
1989
1990	/* Do hardware assists */
1991	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1992		em_tso_setup(txr, m_head, ip_off, ip, tp,
1993		    &txd_upper, &txd_lower);
1994		/* we need to make a final sentinel transmit desc */
1995		tso_desc = TRUE;
1996	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1997		em_transmit_checksum_setup(txr, m_head,
1998		    ip_off, ip, &txd_upper, &txd_lower);
1999
2000	if (m_head->m_flags & M_VLANTAG) {
2001		/* Set the vlan id. */
2002		txd_upper |=
2003		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2004                /* Tell hardware to add tag */
2005                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2006        }
2007
2008	i = txr->next_avail_desc;
2009
2010	/* Set up our transmit descriptors */
2011	for (j = 0; j < nsegs; j++) {
2012		bus_size_t seg_len;
2013		bus_addr_t seg_addr;
2014
2015		tx_buffer = &txr->tx_buffers[i];
2016		ctxd = &txr->tx_base[i];
2017		seg_addr = segs[j].ds_addr;
2018		seg_len  = segs[j].ds_len;
2019		/*
2020		** TSO Workaround:
2021		** If this is the last descriptor, we want to
2022		** split it so we have a small final sentinel
2023		*/
2024		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2025			seg_len -= 4;
2026			ctxd->buffer_addr = htole64(seg_addr);
2027			ctxd->lower.data = htole32(
2028			adapter->txd_cmd | txd_lower | seg_len);
2029			ctxd->upper.data =
2030			    htole32(txd_upper);
2031			if (++i == adapter->num_tx_desc)
2032				i = 0;
2033			/* Now make the sentinel */
2034			++txd_used; /* using an extra txd */
2035			ctxd = &txr->tx_base[i];
2036			tx_buffer = &txr->tx_buffers[i];
2037			ctxd->buffer_addr =
2038			    htole64(seg_addr + seg_len);
2039			ctxd->lower.data = htole32(
2040			adapter->txd_cmd | txd_lower | 4);
2041			ctxd->upper.data =
2042			    htole32(txd_upper);
2043			last = i;
2044			if (++i == adapter->num_tx_desc)
2045				i = 0;
2046		} else {
2047			ctxd->buffer_addr = htole64(seg_addr);
2048			ctxd->lower.data = htole32(
2049			adapter->txd_cmd | txd_lower | seg_len);
2050			ctxd->upper.data =
2051			    htole32(txd_upper);
2052			last = i;
2053			if (++i == adapter->num_tx_desc)
2054				i = 0;
2055		}
2056		tx_buffer->m_head = NULL;
2057		tx_buffer->next_eop = -1;
2058	}
2059
2060	txr->next_avail_desc = i;
2061	txr->tx_avail -= nsegs;
2062	if (tso_desc) /* TSO used an extra for sentinel */
2063		txr->tx_avail -= txd_used;
2064
2065        tx_buffer->m_head = m_head;
2066	/*
2067	** Here we swap the map so the last descriptor,
2068	** which gets the completion interrupt has the
2069	** real map, and the first descriptor gets the
2070	** unused map from this descriptor.
2071	*/
2072	tx_buffer_mapped->map = tx_buffer->map;
2073	tx_buffer->map = map;
2074        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2075
2076        /*
2077         * Last Descriptor of Packet
2078	 * needs End Of Packet (EOP)
2079	 * and Report Status (RS)
2080         */
2081        ctxd->lower.data |=
2082	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2083	/*
2084	 * Keep track in the first buffer which
2085	 * descriptor will be written back
2086	 */
2087	tx_buffer = &txr->tx_buffers[first];
2088	tx_buffer->next_eop = last;
2089	/* Update the watchdog time early and often */
2090	txr->watchdog_time = ticks;
2091
2092	/*
2093	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2094	 * that this frame is available to transmit.
2095	 */
2096	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2097	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2098	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2099
2100	return (0);
2101}
2102
2103static void
2104em_set_promisc(struct adapter *adapter)
2105{
2106	struct ifnet	*ifp = adapter->ifp;
2107	u32		reg_rctl;
2108
2109	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2110
2111	if (ifp->if_flags & IFF_PROMISC) {
2112		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2113		/* Turn this on if you want to see bad packets */
2114		if (em_debug_sbp)
2115			reg_rctl |= E1000_RCTL_SBP;
2116		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2117	} else if (ifp->if_flags & IFF_ALLMULTI) {
2118		reg_rctl |= E1000_RCTL_MPE;
2119		reg_rctl &= ~E1000_RCTL_UPE;
2120		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2121	}
2122}
2123
2124static void
2125em_disable_promisc(struct adapter *adapter)
2126{
2127	u32	reg_rctl;
2128
2129	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2130
2131	reg_rctl &=  (~E1000_RCTL_UPE);
2132	reg_rctl &=  (~E1000_RCTL_MPE);
2133	reg_rctl &=  (~E1000_RCTL_SBP);
2134	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135}
2136
2137
2138/*********************************************************************
2139 *  Multicast Update
2140 *
2141 *  This routine is called whenever multicast address list is updated.
2142 *
2143 **********************************************************************/
2144
2145static void
2146em_set_multi(struct adapter *adapter)
2147{
2148	struct ifnet	*ifp = adapter->ifp;
2149	struct ifmultiaddr *ifma;
2150	u32 reg_rctl = 0;
2151	u8  *mta; /* Multicast array memory */
2152	int mcnt = 0;
2153
2154	IOCTL_DEBUGOUT("em_set_multi: begin");
2155
2156	mta = adapter->mta;
2157	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2158
2159	if (adapter->hw.mac.type == e1000_82542 &&
2160	    adapter->hw.revision_id == E1000_REVISION_2) {
2161		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2163			e1000_pci_clear_mwi(&adapter->hw);
2164		reg_rctl |= E1000_RCTL_RST;
2165		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2166		msec_delay(5);
2167	}
2168
2169#if __FreeBSD_version < 800000
2170	IF_ADDR_LOCK(ifp);
2171#else
2172	if_maddr_rlock(ifp);
2173#endif
2174	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2175		if (ifma->ifma_addr->sa_family != AF_LINK)
2176			continue;
2177
2178		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2179			break;
2180
2181		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2182		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2183		mcnt++;
2184	}
2185#if __FreeBSD_version < 800000
2186	IF_ADDR_UNLOCK(ifp);
2187#else
2188	if_maddr_runlock(ifp);
2189#endif
2190	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2191		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2192		reg_rctl |= E1000_RCTL_MPE;
2193		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2194	} else
2195		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2196
2197	if (adapter->hw.mac.type == e1000_82542 &&
2198	    adapter->hw.revision_id == E1000_REVISION_2) {
2199		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2200		reg_rctl &= ~E1000_RCTL_RST;
2201		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2202		msec_delay(5);
2203		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2204			e1000_pci_set_mwi(&adapter->hw);
2205	}
2206}
2207
2208
2209/*********************************************************************
2210 *  Timer routine
2211 *
2212 *  This routine checks for link status and updates statistics.
2213 *
2214 **********************************************************************/
2215
2216static void
2217em_local_timer(void *arg)
2218{
2219	struct adapter	*adapter = arg;
2220	struct ifnet	*ifp = adapter->ifp;
2221	struct tx_ring	*txr = adapter->tx_rings;
2222	struct rx_ring	*rxr = adapter->rx_rings;
2223	u32		trigger;
2224
2225	EM_CORE_LOCK_ASSERT(adapter);
2226
2227	em_update_link_status(adapter);
2228	em_update_stats_counters(adapter);
2229
2230	/* Reset LAA into RAR[0] on 82571 */
2231	if ((adapter->hw.mac.type == e1000_82571) &&
2232	    e1000_get_laa_state_82571(&adapter->hw))
2233		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2234
2235	/* Mask to use in the irq trigger */
2236	if (adapter->msix_mem)
2237		trigger = rxr->ims; /* RX for 82574 */
2238	else
2239		trigger = E1000_ICS_RXDMT0;
2240
2241	/*
2242	** Check on the state of the TX queue(s), this
2243	** can be done without the lock because its RO
2244	** and the HUNG state will be static if set.
2245	*/
2246	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2247		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2248		    (adapter->pause_frames == 0))
2249			goto hung;
2250		/* Schedule a TX tasklet if needed */
2251		if (txr->tx_avail <= EM_MAX_SCATTER)
2252			taskqueue_enqueue(txr->tq, &txr->tx_task);
2253	}
2254
2255	adapter->pause_frames = 0;
2256	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2257#ifndef DEVICE_POLLING
2258	/* Trigger an RX interrupt to guarantee mbuf refresh */
2259	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2260#endif
2261	return;
2262hung:
2263	/* Looks like we're hung */
2264	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2265	device_printf(adapter->dev,
2266	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2267	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2268	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2269	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2270	    "Next TX to Clean = %d\n",
2271	    txr->me, txr->tx_avail, txr->next_to_clean);
2272	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2273	adapter->watchdog_events++;
2274	adapter->pause_frames = 0;
2275	em_init_locked(adapter);
2276}
2277
2278
2279static void
2280em_update_link_status(struct adapter *adapter)
2281{
2282	struct e1000_hw *hw = &adapter->hw;
2283	struct ifnet *ifp = adapter->ifp;
2284	device_t dev = adapter->dev;
2285	struct tx_ring *txr = adapter->tx_rings;
2286	u32 link_check = 0;
2287
2288	/* Get the cached link value or read phy for real */
2289	switch (hw->phy.media_type) {
2290	case e1000_media_type_copper:
2291		if (hw->mac.get_link_status) {
2292			/* Do the work to read phy */
2293			e1000_check_for_link(hw);
2294			link_check = !hw->mac.get_link_status;
2295			if (link_check) /* ESB2 fix */
2296				e1000_cfg_on_link_up(hw);
2297		} else
2298			link_check = TRUE;
2299		break;
2300	case e1000_media_type_fiber:
2301		e1000_check_for_link(hw);
2302		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2303                                 E1000_STATUS_LU);
2304		break;
2305	case e1000_media_type_internal_serdes:
2306		e1000_check_for_link(hw);
2307		link_check = adapter->hw.mac.serdes_has_link;
2308		break;
2309	default:
2310	case e1000_media_type_unknown:
2311		break;
2312	}
2313
2314	/* Now check for a transition */
2315	if (link_check && (adapter->link_active == 0)) {
2316		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2317		    &adapter->link_duplex);
2318		/* Check if we must disable SPEED_MODE bit on PCI-E */
2319		if ((adapter->link_speed != SPEED_1000) &&
2320		    ((hw->mac.type == e1000_82571) ||
2321		    (hw->mac.type == e1000_82572))) {
2322			int tarc0;
2323			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2324			tarc0 &= ~SPEED_MODE_BIT;
2325			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2326		}
2327		if (bootverbose)
2328			device_printf(dev, "Link is up %d Mbps %s\n",
2329			    adapter->link_speed,
2330			    ((adapter->link_duplex == FULL_DUPLEX) ?
2331			    "Full Duplex" : "Half Duplex"));
2332		adapter->link_active = 1;
2333		adapter->smartspeed = 0;
2334		ifp->if_baudrate = adapter->link_speed * 1000000;
2335		if_link_state_change(ifp, LINK_STATE_UP);
2336	} else if (!link_check && (adapter->link_active == 1)) {
2337		ifp->if_baudrate = adapter->link_speed = 0;
2338		adapter->link_duplex = 0;
2339		if (bootverbose)
2340			device_printf(dev, "Link is Down\n");
2341		adapter->link_active = 0;
2342		/* Link down, disable watchdog */
2343		for (int i = 0; i < adapter->num_queues; i++, txr++)
2344			txr->queue_status = EM_QUEUE_IDLE;
2345		if_link_state_change(ifp, LINK_STATE_DOWN);
2346	}
2347}
2348
2349/*********************************************************************
2350 *
2351 *  This routine disables all traffic on the adapter by issuing a
2352 *  global reset on the MAC and deallocates TX/RX buffers.
2353 *
2354 *  This routine should always be called with BOTH the CORE
2355 *  and TX locks.
2356 **********************************************************************/
2357
2358static void
2359em_stop(void *arg)
2360{
2361	struct adapter	*adapter = arg;
2362	struct ifnet	*ifp = adapter->ifp;
2363	struct tx_ring	*txr = adapter->tx_rings;
2364
2365	EM_CORE_LOCK_ASSERT(adapter);
2366
2367	INIT_DEBUGOUT("em_stop: begin");
2368
2369	em_disable_intr(adapter);
2370	callout_stop(&adapter->timer);
2371
2372	/* Tell the stack that the interface is no longer active */
2373	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2374	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2375
2376        /* Unarm watchdog timer. */
2377	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2378		EM_TX_LOCK(txr);
2379		txr->queue_status = EM_QUEUE_IDLE;
2380		EM_TX_UNLOCK(txr);
2381	}
2382
2383	e1000_reset_hw(&adapter->hw);
2384	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2385
2386	e1000_led_off(&adapter->hw);
2387	e1000_cleanup_led(&adapter->hw);
2388}
2389
2390
2391/*********************************************************************
2392 *
2393 *  Determine hardware revision.
2394 *
2395 **********************************************************************/
2396static void
2397em_identify_hardware(struct adapter *adapter)
2398{
2399	device_t dev = adapter->dev;
2400
2401	/* Make sure our PCI config space has the necessary stuff set */
2402	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2403	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2404	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2405		device_printf(dev, "Memory Access and/or Bus Master bits "
2406		    "were not set!\n");
2407		adapter->hw.bus.pci_cmd_word |=
2408		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2409		pci_write_config(dev, PCIR_COMMAND,
2410		    adapter->hw.bus.pci_cmd_word, 2);
2411	}
2412
2413	/* Save off the information about this board */
2414	adapter->hw.vendor_id = pci_get_vendor(dev);
2415	adapter->hw.device_id = pci_get_device(dev);
2416	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2417	adapter->hw.subsystem_vendor_id =
2418	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2419	adapter->hw.subsystem_device_id =
2420	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2421
2422	/* Do Shared Code Init and Setup */
2423	if (e1000_set_mac_type(&adapter->hw)) {
2424		device_printf(dev, "Setup init failure\n");
2425		return;
2426	}
2427}
2428
2429static int
2430em_allocate_pci_resources(struct adapter *adapter)
2431{
2432	device_t	dev = adapter->dev;
2433	int		rid;
2434
2435	rid = PCIR_BAR(0);
2436	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2437	    &rid, RF_ACTIVE);
2438	if (adapter->memory == NULL) {
2439		device_printf(dev, "Unable to allocate bus resource: memory\n");
2440		return (ENXIO);
2441	}
2442	adapter->osdep.mem_bus_space_tag =
2443	    rman_get_bustag(adapter->memory);
2444	adapter->osdep.mem_bus_space_handle =
2445	    rman_get_bushandle(adapter->memory);
2446	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2447
2448	/* Default to a single queue */
2449	adapter->num_queues = 1;
2450
2451	/*
2452	 * Setup MSI/X or MSI if PCI Express
2453	 */
2454	adapter->msix = em_setup_msix(adapter);
2455
2456	adapter->hw.back = &adapter->osdep;
2457
2458	return (0);
2459}
2460
2461/*********************************************************************
2462 *
2463 *  Setup the Legacy or MSI Interrupt handler
2464 *
2465 **********************************************************************/
2466int
2467em_allocate_legacy(struct adapter *adapter)
2468{
2469	device_t dev = adapter->dev;
2470	struct tx_ring	*txr = adapter->tx_rings;
2471	int error, rid = 0;
2472
2473	/* Manually turn off all interrupts */
2474	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2475
2476	if (adapter->msix == 1) /* using MSI */
2477		rid = 1;
2478	/* We allocate a single interrupt resource */
2479	adapter->res = bus_alloc_resource_any(dev,
2480	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2481	if (adapter->res == NULL) {
2482		device_printf(dev, "Unable to allocate bus resource: "
2483		    "interrupt\n");
2484		return (ENXIO);
2485	}
2486
2487	/*
2488	 * Allocate a fast interrupt and the associated
2489	 * deferred processing contexts.
2490	 */
2491	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2492	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2493	    taskqueue_thread_enqueue, &adapter->tq);
2494	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2495	    device_get_nameunit(adapter->dev));
2496	/* Use a TX only tasklet for local timer */
2497	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2498	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2499	    taskqueue_thread_enqueue, &txr->tq);
2500	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2501	    device_get_nameunit(adapter->dev));
2502	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2503	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2504	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2505		device_printf(dev, "Failed to register fast interrupt "
2506			    "handler: %d\n", error);
2507		taskqueue_free(adapter->tq);
2508		adapter->tq = NULL;
2509		return (error);
2510	}
2511
2512	return (0);
2513}
2514
2515/*********************************************************************
2516 *
2517 *  Setup the MSIX Interrupt handlers
2518 *   This is not really Multiqueue, rather
2519 *   its just seperate interrupt vectors
2520 *   for TX, RX, and Link.
2521 *
2522 **********************************************************************/
2523int
2524em_allocate_msix(struct adapter *adapter)
2525{
2526	device_t	dev = adapter->dev;
2527	struct		tx_ring *txr = adapter->tx_rings;
2528	struct		rx_ring *rxr = adapter->rx_rings;
2529	int		error, rid, vector = 0;
2530
2531
2532	/* Make sure all interrupts are disabled */
2533	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2534
2535	/* First set up ring resources */
2536	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2537
2538		/* RX ring */
2539		rid = vector + 1;
2540
2541		rxr->res = bus_alloc_resource_any(dev,
2542		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2543		if (rxr->res == NULL) {
2544			device_printf(dev,
2545			    "Unable to allocate bus resource: "
2546			    "RX MSIX Interrupt %d\n", i);
2547			return (ENXIO);
2548		}
2549		if ((error = bus_setup_intr(dev, rxr->res,
2550		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2551		    rxr, &rxr->tag)) != 0) {
2552			device_printf(dev, "Failed to register RX handler");
2553			return (error);
2554		}
2555#if __FreeBSD_version >= 800504
2556		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2557#endif
2558		rxr->msix = vector++; /* NOTE increment vector for TX */
2559		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2560		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2561		    taskqueue_thread_enqueue, &rxr->tq);
2562		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2563		    device_get_nameunit(adapter->dev));
2564		/*
2565		** Set the bit to enable interrupt
2566		** in E1000_IMS -- bits 20 and 21
2567		** are for RX0 and RX1, note this has
2568		** NOTHING to do with the MSIX vector
2569		*/
2570		rxr->ims = 1 << (20 + i);
2571		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2572
2573		/* TX ring */
2574		rid = vector + 1;
2575		txr->res = bus_alloc_resource_any(dev,
2576		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2577		if (txr->res == NULL) {
2578			device_printf(dev,
2579			    "Unable to allocate bus resource: "
2580			    "TX MSIX Interrupt %d\n", i);
2581			return (ENXIO);
2582		}
2583		if ((error = bus_setup_intr(dev, txr->res,
2584		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2585		    txr, &txr->tag)) != 0) {
2586			device_printf(dev, "Failed to register TX handler");
2587			return (error);
2588		}
2589#if __FreeBSD_version >= 800504
2590		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2591#endif
2592		txr->msix = vector++; /* Increment vector for next pass */
2593		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2594		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2595		    taskqueue_thread_enqueue, &txr->tq);
2596		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2597		    device_get_nameunit(adapter->dev));
2598		/*
2599		** Set the bit to enable interrupt
2600		** in E1000_IMS -- bits 22 and 23
2601		** are for TX0 and TX1, note this has
2602		** NOTHING to do with the MSIX vector
2603		*/
2604		txr->ims = 1 << (22 + i);
2605		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2606	}
2607
2608	/* Link interrupt */
2609	++rid;
2610	adapter->res = bus_alloc_resource_any(dev,
2611	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2612	if (!adapter->res) {
2613		device_printf(dev,"Unable to allocate "
2614		    "bus resource: Link interrupt [%d]\n", rid);
2615		return (ENXIO);
2616        }
2617	/* Set the link handler function */
2618	error = bus_setup_intr(dev, adapter->res,
2619	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2620	    em_msix_link, adapter, &adapter->tag);
2621	if (error) {
2622		adapter->res = NULL;
2623		device_printf(dev, "Failed to register LINK handler");
2624		return (error);
2625	}
2626#if __FreeBSD_version >= 800504
2627		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2628#endif
2629	adapter->linkvec = vector;
2630	adapter->ivars |=  (8 | vector) << 16;
2631	adapter->ivars |= 0x80000000;
2632
2633	return (0);
2634}
2635
2636
2637static void
2638em_free_pci_resources(struct adapter *adapter)
2639{
2640	device_t	dev = adapter->dev;
2641	struct tx_ring	*txr;
2642	struct rx_ring	*rxr;
2643	int		rid;
2644
2645
2646	/*
2647	** Release all the queue interrupt resources:
2648	*/
2649	for (int i = 0; i < adapter->num_queues; i++) {
2650		txr = &adapter->tx_rings[i];
2651		rxr = &adapter->rx_rings[i];
2652		/* an early abort? */
2653		if ((txr == NULL) || (rxr == NULL))
2654			break;
2655		rid = txr->msix +1;
2656		if (txr->tag != NULL) {
2657			bus_teardown_intr(dev, txr->res, txr->tag);
2658			txr->tag = NULL;
2659		}
2660		if (txr->res != NULL)
2661			bus_release_resource(dev, SYS_RES_IRQ,
2662			    rid, txr->res);
2663		rid = rxr->msix +1;
2664		if (rxr->tag != NULL) {
2665			bus_teardown_intr(dev, rxr->res, rxr->tag);
2666			rxr->tag = NULL;
2667		}
2668		if (rxr->res != NULL)
2669			bus_release_resource(dev, SYS_RES_IRQ,
2670			    rid, rxr->res);
2671	}
2672
2673        if (adapter->linkvec) /* we are doing MSIX */
2674                rid = adapter->linkvec + 1;
2675        else
2676                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2677
2678	if (adapter->tag != NULL) {
2679		bus_teardown_intr(dev, adapter->res, adapter->tag);
2680		adapter->tag = NULL;
2681	}
2682
2683	if (adapter->res != NULL)
2684		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2685
2686
2687	if (adapter->msix)
2688		pci_release_msi(dev);
2689
2690	if (adapter->msix_mem != NULL)
2691		bus_release_resource(dev, SYS_RES_MEMORY,
2692		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2693
2694	if (adapter->memory != NULL)
2695		bus_release_resource(dev, SYS_RES_MEMORY,
2696		    PCIR_BAR(0), adapter->memory);
2697
2698	if (adapter->flash != NULL)
2699		bus_release_resource(dev, SYS_RES_MEMORY,
2700		    EM_FLASH, adapter->flash);
2701}
2702
2703/*
2704 * Setup MSI or MSI/X
2705 */
2706static int
2707em_setup_msix(struct adapter *adapter)
2708{
2709	device_t dev = adapter->dev;
2710	int val = 0;
2711
2712	/*
2713	** Setup MSI/X for Hartwell: tests have shown
2714	** use of two queues to be unstable, and to
2715	** provide no great gain anyway, so we simply
2716	** seperate the interrupts and use a single queue.
2717	*/
2718	if ((adapter->hw.mac.type == e1000_82574) &&
2719	    (em_enable_msix == TRUE)) {
2720		/* Map the MSIX BAR */
2721		int rid = PCIR_BAR(EM_MSIX_BAR);
2722		adapter->msix_mem = bus_alloc_resource_any(dev,
2723		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2724       		if (!adapter->msix_mem) {
2725			/* May not be enabled */
2726               		device_printf(adapter->dev,
2727			    "Unable to map MSIX table \n");
2728			goto msi;
2729       		}
2730		val = pci_msix_count(dev);
2731		/* We only need 3 vectors */
2732		if (val > 3)
2733			val = 3;
2734		if ((val != 3) && (val != 5)) {
2735			bus_release_resource(dev, SYS_RES_MEMORY,
2736			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2737			adapter->msix_mem = NULL;
2738               		device_printf(adapter->dev,
2739			    "MSIX: incorrect vectors, using MSI\n");
2740			goto msi;
2741		}
2742
2743		if (pci_alloc_msix(dev, &val) == 0) {
2744			device_printf(adapter->dev,
2745			    "Using MSIX interrupts "
2746			    "with %d vectors\n", val);
2747		}
2748
2749		return (val);
2750	}
2751msi:
2752       	val = pci_msi_count(dev);
2753       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2754               	adapter->msix = 1;
2755               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2756		return (val);
2757	}
2758	/* Should only happen due to manual configuration */
2759	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2760	return (0);
2761}
2762
2763
2764/*********************************************************************
2765 *
2766 *  Initialize the hardware to a configuration
2767 *  as specified by the adapter structure.
2768 *
2769 **********************************************************************/
2770static void
2771em_reset(struct adapter *adapter)
2772{
2773	device_t	dev = adapter->dev;
2774	struct ifnet	*ifp = adapter->ifp;
2775	struct e1000_hw	*hw = &adapter->hw;
2776	u16		rx_buffer_size;
2777	u32		pba;
2778
2779	INIT_DEBUGOUT("em_reset: begin");
2780
2781	/* Set up smart power down as default off on newer adapters. */
2782	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2783	    hw->mac.type == e1000_82572)) {
2784		u16 phy_tmp = 0;
2785
2786		/* Speed up time to link by disabling smart power down. */
2787		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2788		phy_tmp &= ~IGP02E1000_PM_SPD;
2789		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2790	}
2791
2792	/*
2793	 * Packet Buffer Allocation (PBA)
2794	 * Writing PBA sets the receive portion of the buffer
2795	 * the remainder is used for the transmit buffer.
2796	 */
2797	switch (hw->mac.type) {
2798	/* Total Packet Buffer on these is 48K */
2799	case e1000_82571:
2800	case e1000_82572:
2801	case e1000_80003es2lan:
2802			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2803		break;
2804	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2805			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2806		break;
2807	case e1000_82574:
2808	case e1000_82583:
2809			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2810		break;
2811	case e1000_ich8lan:
2812		pba = E1000_PBA_8K;
2813		break;
2814	case e1000_ich9lan:
2815	case e1000_ich10lan:
2816		/* Boost Receive side for jumbo frames */
2817		if (adapter->max_frame_size > 4096)
2818			pba = E1000_PBA_14K;
2819		else
2820			pba = E1000_PBA_10K;
2821		break;
2822	case e1000_pchlan:
2823	case e1000_pch2lan:
2824		pba = E1000_PBA_26K;
2825		break;
2826	default:
2827		if (adapter->max_frame_size > 8192)
2828			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2829		else
2830			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2831	}
2832	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2833
2834	/*
2835	 * These parameters control the automatic generation (Tx) and
2836	 * response (Rx) to Ethernet PAUSE frames.
2837	 * - High water mark should allow for at least two frames to be
2838	 *   received after sending an XOFF.
2839	 * - Low water mark works best when it is very near the high water mark.
2840	 *   This allows the receiver to restart by sending XON when it has
2841	 *   drained a bit. Here we use an arbitary value of 1500 which will
2842	 *   restart after one full frame is pulled from the buffer. There
2843	 *   could be several smaller frames in the buffer and if so they will
2844	 *   not trigger the XON until their total number reduces the buffer
2845	 *   by 1500.
2846	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2847	 */
2848	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2849	hw->fc.high_water = rx_buffer_size -
2850	    roundup2(adapter->max_frame_size, 1024);
2851	hw->fc.low_water = hw->fc.high_water - 1500;
2852
2853	if (adapter->fc) /* locally set flow control value? */
2854		hw->fc.requested_mode = adapter->fc;
2855	else
2856		hw->fc.requested_mode = e1000_fc_full;
2857
2858	if (hw->mac.type == e1000_80003es2lan)
2859		hw->fc.pause_time = 0xFFFF;
2860	else
2861		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2862
2863	hw->fc.send_xon = TRUE;
2864
2865	/* Device specific overrides/settings */
2866	switch (hw->mac.type) {
2867	case e1000_pchlan:
2868		/* Workaround: no TX flow ctrl for PCH */
2869                hw->fc.requested_mode = e1000_fc_rx_pause;
2870		hw->fc.pause_time = 0xFFFF; /* override */
2871		if (ifp->if_mtu > ETHERMTU) {
2872			hw->fc.high_water = 0x3500;
2873			hw->fc.low_water = 0x1500;
2874		} else {
2875			hw->fc.high_water = 0x5000;
2876			hw->fc.low_water = 0x3000;
2877		}
2878		hw->fc.refresh_time = 0x1000;
2879		break;
2880	case e1000_pch2lan:
2881		hw->fc.high_water = 0x5C20;
2882		hw->fc.low_water = 0x5048;
2883		hw->fc.pause_time = 0x0650;
2884		hw->fc.refresh_time = 0x0400;
2885		/* Jumbos need adjusted PBA */
2886		if (ifp->if_mtu > ETHERMTU)
2887			E1000_WRITE_REG(hw, E1000_PBA, 12);
2888		else
2889			E1000_WRITE_REG(hw, E1000_PBA, 26);
2890		break;
2891        case e1000_ich9lan:
2892        case e1000_ich10lan:
2893		if (ifp->if_mtu > ETHERMTU) {
2894			hw->fc.high_water = 0x2800;
2895			hw->fc.low_water = hw->fc.high_water - 8;
2896			break;
2897		}
2898		/* else fall thru */
2899	default:
2900		if (hw->mac.type == e1000_80003es2lan)
2901			hw->fc.pause_time = 0xFFFF;
2902		break;
2903	}
2904
2905	/* Issue a global reset */
2906	e1000_reset_hw(hw);
2907	E1000_WRITE_REG(hw, E1000_WUC, 0);
2908	em_disable_aspm(adapter);
2909	/* and a re-init */
2910	if (e1000_init_hw(hw) < 0) {
2911		device_printf(dev, "Hardware Initialization Failed\n");
2912		return;
2913	}
2914
2915	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2916	e1000_get_phy_info(hw);
2917	e1000_check_for_link(hw);
2918	return;
2919}
2920
2921/*********************************************************************
2922 *
2923 *  Setup networking device structure and register an interface.
2924 *
2925 **********************************************************************/
2926static int
2927em_setup_interface(device_t dev, struct adapter *adapter)
2928{
2929	struct ifnet   *ifp;
2930
2931	INIT_DEBUGOUT("em_setup_interface: begin");
2932
2933	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2934	if (ifp == NULL) {
2935		device_printf(dev, "can not allocate ifnet structure\n");
2936		return (-1);
2937	}
2938	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2939	ifp->if_init =  em_init;
2940	ifp->if_softc = adapter;
2941	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2942	ifp->if_ioctl = em_ioctl;
2943#ifdef EM_MULTIQUEUE
2944	/* Multiqueue stack interface */
2945	ifp->if_transmit = em_mq_start;
2946	ifp->if_qflush = em_qflush;
2947#else
2948	ifp->if_start = em_start;
2949	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2950	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2951	IFQ_SET_READY(&ifp->if_snd);
2952#endif
2953
2954	ether_ifattach(ifp, adapter->hw.mac.addr);
2955
2956	ifp->if_capabilities = ifp->if_capenable = 0;
2957
2958
2959	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2960	ifp->if_capabilities |= IFCAP_TSO4;
2961	/*
2962	 * Tell the upper layer(s) we
2963	 * support full VLAN capability
2964	 */
2965	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2966	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2967			     |  IFCAP_VLAN_HWTSO
2968			     |  IFCAP_VLAN_MTU;
2969	ifp->if_capenable = ifp->if_capabilities;
2970
2971	/*
2972	** Don't turn this on by default, if vlans are
2973	** created on another pseudo device (eg. lagg)
2974	** then vlan events are not passed thru, breaking
2975	** operation, but with HW FILTER off it works. If
2976	** using vlans directly on the em driver you can
2977	** enable this and get full hardware tag filtering.
2978	*/
2979	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2980
2981#ifdef DEVICE_POLLING
2982	ifp->if_capabilities |= IFCAP_POLLING;
2983#endif
2984
2985	/* Enable only WOL MAGIC by default */
2986	if (adapter->wol) {
2987		ifp->if_capabilities |= IFCAP_WOL;
2988		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2989	}
2990
2991	/*
2992	 * Specify the media types supported by this adapter and register
2993	 * callbacks to update media and link information
2994	 */
2995	ifmedia_init(&adapter->media, IFM_IMASK,
2996	    em_media_change, em_media_status);
2997	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2998	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2999		u_char fiber_type = IFM_1000_SX;	/* default type */
3000
3001		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3002			    0, NULL);
3003		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3004	} else {
3005		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3006		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3007			    0, NULL);
3008		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3009			    0, NULL);
3010		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3011			    0, NULL);
3012		if (adapter->hw.phy.type != e1000_phy_ife) {
3013			ifmedia_add(&adapter->media,
3014				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3015			ifmedia_add(&adapter->media,
3016				IFM_ETHER | IFM_1000_T, 0, NULL);
3017		}
3018	}
3019	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3020	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3021	return (0);
3022}
3023
3024
3025/*
3026 * Manage DMA'able memory.
3027 */
3028static void
3029em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3030{
3031	if (error)
3032		return;
3033	*(bus_addr_t *) arg = segs[0].ds_addr;
3034}
3035
3036static int
3037em_dma_malloc(struct adapter *adapter, bus_size_t size,
3038        struct em_dma_alloc *dma, int mapflags)
3039{
3040	int error;
3041
3042	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3043				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3044				BUS_SPACE_MAXADDR,	/* lowaddr */
3045				BUS_SPACE_MAXADDR,	/* highaddr */
3046				NULL, NULL,		/* filter, filterarg */
3047				size,			/* maxsize */
3048				1,			/* nsegments */
3049				size,			/* maxsegsize */
3050				0,			/* flags */
3051				NULL,			/* lockfunc */
3052				NULL,			/* lockarg */
3053				&dma->dma_tag);
3054	if (error) {
3055		device_printf(adapter->dev,
3056		    "%s: bus_dma_tag_create failed: %d\n",
3057		    __func__, error);
3058		goto fail_0;
3059	}
3060
3061	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3062	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3063	if (error) {
3064		device_printf(adapter->dev,
3065		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3066		    __func__, (uintmax_t)size, error);
3067		goto fail_2;
3068	}
3069
3070	dma->dma_paddr = 0;
3071	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3072	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3073	if (error || dma->dma_paddr == 0) {
3074		device_printf(adapter->dev,
3075		    "%s: bus_dmamap_load failed: %d\n",
3076		    __func__, error);
3077		goto fail_3;
3078	}
3079
3080	return (0);
3081
3082fail_3:
3083	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3084fail_2:
3085	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3086	bus_dma_tag_destroy(dma->dma_tag);
3087fail_0:
3088	dma->dma_map = NULL;
3089	dma->dma_tag = NULL;
3090
3091	return (error);
3092}
3093
3094static void
3095em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3096{
3097	if (dma->dma_tag == NULL)
3098		return;
3099	if (dma->dma_map != NULL) {
3100		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3101		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3102		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3103		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3104		dma->dma_map = NULL;
3105	}
3106	bus_dma_tag_destroy(dma->dma_tag);
3107	dma->dma_tag = NULL;
3108}
3109
3110
3111/*********************************************************************
3112 *
3113 *  Allocate memory for the transmit and receive rings, and then
3114 *  the descriptors associated with each, called only once at attach.
3115 *
3116 **********************************************************************/
3117static int
3118em_allocate_queues(struct adapter *adapter)
3119{
3120	device_t		dev = adapter->dev;
3121	struct tx_ring		*txr = NULL;
3122	struct rx_ring		*rxr = NULL;
3123	int rsize, tsize, error = E1000_SUCCESS;
3124	int txconf = 0, rxconf = 0;
3125
3126
3127	/* Allocate the TX ring struct memory */
3128	if (!(adapter->tx_rings =
3129	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3130	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3131		device_printf(dev, "Unable to allocate TX ring memory\n");
3132		error = ENOMEM;
3133		goto fail;
3134	}
3135
3136	/* Now allocate the RX */
3137	if (!(adapter->rx_rings =
3138	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3139	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3140		device_printf(dev, "Unable to allocate RX ring memory\n");
3141		error = ENOMEM;
3142		goto rx_fail;
3143	}
3144
3145	tsize = roundup2(adapter->num_tx_desc *
3146	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3147	/*
3148	 * Now set up the TX queues, txconf is needed to handle the
3149	 * possibility that things fail midcourse and we need to
3150	 * undo memory gracefully
3151	 */
3152	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3153		/* Set up some basics */
3154		txr = &adapter->tx_rings[i];
3155		txr->adapter = adapter;
3156		txr->me = i;
3157
3158		/* Initialize the TX lock */
3159		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3160		    device_get_nameunit(dev), txr->me);
3161		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3162
3163		if (em_dma_malloc(adapter, tsize,
3164			&txr->txdma, BUS_DMA_NOWAIT)) {
3165			device_printf(dev,
3166			    "Unable to allocate TX Descriptor memory\n");
3167			error = ENOMEM;
3168			goto err_tx_desc;
3169		}
3170		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3171		bzero((void *)txr->tx_base, tsize);
3172
3173        	if (em_allocate_transmit_buffers(txr)) {
3174			device_printf(dev,
3175			    "Critical Failure setting up transmit buffers\n");
3176			error = ENOMEM;
3177			goto err_tx_desc;
3178        	}
3179#if __FreeBSD_version >= 800000
3180		/* Allocate a buf ring */
3181		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3182		    M_WAITOK, &txr->tx_mtx);
3183#endif
3184	}
3185
3186	/*
3187	 * Next the RX queues...
3188	 */
3189	rsize = roundup2(adapter->num_rx_desc *
3190	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3191	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3192		rxr = &adapter->rx_rings[i];
3193		rxr->adapter = adapter;
3194		rxr->me = i;
3195
3196		/* Initialize the RX lock */
3197		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3198		    device_get_nameunit(dev), txr->me);
3199		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3200
3201		if (em_dma_malloc(adapter, rsize,
3202			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3203			device_printf(dev,
3204			    "Unable to allocate RxDescriptor memory\n");
3205			error = ENOMEM;
3206			goto err_rx_desc;
3207		}
3208		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3209		bzero((void *)rxr->rx_base, rsize);
3210
3211        	/* Allocate receive buffers for the ring*/
3212		if (em_allocate_receive_buffers(rxr)) {
3213			device_printf(dev,
3214			    "Critical Failure setting up receive buffers\n");
3215			error = ENOMEM;
3216			goto err_rx_desc;
3217		}
3218	}
3219
3220	return (0);
3221
3222err_rx_desc:
3223	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3224		em_dma_free(adapter, &rxr->rxdma);
3225err_tx_desc:
3226	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3227		em_dma_free(adapter, &txr->txdma);
3228	free(adapter->rx_rings, M_DEVBUF);
3229rx_fail:
3230#if __FreeBSD_version >= 800000
3231	buf_ring_free(txr->br, M_DEVBUF);
3232#endif
3233	free(adapter->tx_rings, M_DEVBUF);
3234fail:
3235	return (error);
3236}
3237
3238
3239/*********************************************************************
3240 *
3241 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3242 *  the information needed to transmit a packet on the wire. This is
3243 *  called only once at attach, setup is done every reset.
3244 *
3245 **********************************************************************/
3246static int
3247em_allocate_transmit_buffers(struct tx_ring *txr)
3248{
3249	struct adapter *adapter = txr->adapter;
3250	device_t dev = adapter->dev;
3251	struct em_buffer *txbuf;
3252	int error, i;
3253
3254	/*
3255	 * Setup DMA descriptor areas.
3256	 */
3257	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3258			       1, 0,			/* alignment, bounds */
3259			       BUS_SPACE_MAXADDR,	/* lowaddr */
3260			       BUS_SPACE_MAXADDR,	/* highaddr */
3261			       NULL, NULL,		/* filter, filterarg */
3262			       EM_TSO_SIZE,		/* maxsize */
3263			       EM_MAX_SCATTER,		/* nsegments */
3264			       PAGE_SIZE,		/* maxsegsize */
3265			       0,			/* flags */
3266			       NULL,			/* lockfunc */
3267			       NULL,			/* lockfuncarg */
3268			       &txr->txtag))) {
3269		device_printf(dev,"Unable to allocate TX DMA tag\n");
3270		goto fail;
3271	}
3272
3273	if (!(txr->tx_buffers =
3274	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3275	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3276		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3277		error = ENOMEM;
3278		goto fail;
3279	}
3280
3281        /* Create the descriptor buffer dma maps */
3282	txbuf = txr->tx_buffers;
3283	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3284		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3285		if (error != 0) {
3286			device_printf(dev, "Unable to create TX DMA map\n");
3287			goto fail;
3288		}
3289	}
3290
3291	return 0;
3292fail:
3293	/* We free all, it handles case where we are in the middle */
3294	em_free_transmit_structures(adapter);
3295	return (error);
3296}
3297
3298/*********************************************************************
3299 *
3300 *  Initialize a transmit ring.
3301 *
3302 **********************************************************************/
3303static void
3304em_setup_transmit_ring(struct tx_ring *txr)
3305{
3306	struct adapter *adapter = txr->adapter;
3307	struct em_buffer *txbuf;
3308	int i;
3309#ifdef DEV_NETMAP
3310	struct netmap_adapter *na = NA(adapter->ifp);
3311	struct netmap_slot *slot;
3312#endif /* DEV_NETMAP */
3313
3314	/* Clear the old descriptor contents */
3315	EM_TX_LOCK(txr);
3316#ifdef DEV_NETMAP
3317	slot = netmap_reset(na, NR_TX, txr->me, 0);
3318#endif /* DEV_NETMAP */
3319
3320	bzero((void *)txr->tx_base,
3321	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3322	/* Reset indices */
3323	txr->next_avail_desc = 0;
3324	txr->next_to_clean = 0;
3325
3326	/* Free any existing tx buffers. */
3327        txbuf = txr->tx_buffers;
3328	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3329		if (txbuf->m_head != NULL) {
3330			bus_dmamap_sync(txr->txtag, txbuf->map,
3331			    BUS_DMASYNC_POSTWRITE);
3332			bus_dmamap_unload(txr->txtag, txbuf->map);
3333			m_freem(txbuf->m_head);
3334			txbuf->m_head = NULL;
3335		}
3336#ifdef DEV_NETMAP
3337		if (slot) {
3338			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3339			uint64_t paddr;
3340			void *addr;
3341
3342			addr = PNMB(slot + si, &paddr);
3343			txr->tx_base[i].buffer_addr = htole64(paddr);
3344			/* reload the map for netmap mode */
3345			netmap_load_map(txr->txtag, txbuf->map, addr);
3346		}
3347#endif /* DEV_NETMAP */
3348
3349		/* clear the watch index */
3350		txbuf->next_eop = -1;
3351        }
3352
3353	/* Set number of descriptors available */
3354	txr->tx_avail = adapter->num_tx_desc;
3355	txr->queue_status = EM_QUEUE_IDLE;
3356
3357	/* Clear checksum offload context. */
3358	txr->last_hw_offload = 0;
3359	txr->last_hw_ipcss = 0;
3360	txr->last_hw_ipcso = 0;
3361	txr->last_hw_tucss = 0;
3362	txr->last_hw_tucso = 0;
3363
3364	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3365	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3366	EM_TX_UNLOCK(txr);
3367}
3368
3369/*********************************************************************
3370 *
3371 *  Initialize all transmit rings.
3372 *
3373 **********************************************************************/
3374static void
3375em_setup_transmit_structures(struct adapter *adapter)
3376{
3377	struct tx_ring *txr = adapter->tx_rings;
3378
3379	for (int i = 0; i < adapter->num_queues; i++, txr++)
3380		em_setup_transmit_ring(txr);
3381
3382	return;
3383}
3384
3385/*********************************************************************
3386 *
3387 *  Enable transmit unit.
3388 *
3389 **********************************************************************/
3390static void
3391em_initialize_transmit_unit(struct adapter *adapter)
3392{
3393	struct tx_ring	*txr = adapter->tx_rings;
3394	struct e1000_hw	*hw = &adapter->hw;
3395	u32	tctl, tarc, tipg = 0;
3396
3397	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3398
3399	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3400		u64 bus_addr = txr->txdma.dma_paddr;
3401		/* Base and Len of TX Ring */
3402		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3403	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3404		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3405	    	    (u32)(bus_addr >> 32));
3406		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3407	    	    (u32)bus_addr);
3408		/* Init the HEAD/TAIL indices */
3409		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3410		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3411
3412		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3413		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3414		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3415
3416		txr->queue_status = EM_QUEUE_IDLE;
3417	}
3418
3419	/* Set the default values for the Tx Inter Packet Gap timer */
3420	switch (adapter->hw.mac.type) {
3421	case e1000_80003es2lan:
3422		tipg = DEFAULT_82543_TIPG_IPGR1;
3423		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3424		    E1000_TIPG_IPGR2_SHIFT;
3425		break;
3426	default:
3427		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3428		    (adapter->hw.phy.media_type ==
3429		    e1000_media_type_internal_serdes))
3430			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3431		else
3432			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3433		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3434		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3435	}
3436
3437	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3438	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3439
3440	if(adapter->hw.mac.type >= e1000_82540)
3441		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3442		    adapter->tx_abs_int_delay.value);
3443
3444	if ((adapter->hw.mac.type == e1000_82571) ||
3445	    (adapter->hw.mac.type == e1000_82572)) {
3446		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3447		tarc |= SPEED_MODE_BIT;
3448		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3449	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3450		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3451		tarc |= 1;
3452		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3453		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3454		tarc |= 1;
3455		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3456	}
3457
3458	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3459	if (adapter->tx_int_delay.value > 0)
3460		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3461
3462	/* Program the Transmit Control Register */
3463	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3464	tctl &= ~E1000_TCTL_CT;
3465	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3466		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3467
3468	if (adapter->hw.mac.type >= e1000_82571)
3469		tctl |= E1000_TCTL_MULR;
3470
3471	/* This write will effectively turn on the transmit unit. */
3472	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3473
3474}
3475
3476
3477/*********************************************************************
3478 *
3479 *  Free all transmit rings.
3480 *
3481 **********************************************************************/
3482static void
3483em_free_transmit_structures(struct adapter *adapter)
3484{
3485	struct tx_ring *txr = adapter->tx_rings;
3486
3487	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3488		EM_TX_LOCK(txr);
3489		em_free_transmit_buffers(txr);
3490		em_dma_free(adapter, &txr->txdma);
3491		EM_TX_UNLOCK(txr);
3492		EM_TX_LOCK_DESTROY(txr);
3493	}
3494
3495	free(adapter->tx_rings, M_DEVBUF);
3496}
3497
3498/*********************************************************************
3499 *
3500 *  Free transmit ring related data structures.
3501 *
3502 **********************************************************************/
3503static void
3504em_free_transmit_buffers(struct tx_ring *txr)
3505{
3506	struct adapter		*adapter = txr->adapter;
3507	struct em_buffer	*txbuf;
3508
3509	INIT_DEBUGOUT("free_transmit_ring: begin");
3510
3511	if (txr->tx_buffers == NULL)
3512		return;
3513
3514	for (int i = 0; i < adapter->num_tx_desc; i++) {
3515		txbuf = &txr->tx_buffers[i];
3516		if (txbuf->m_head != NULL) {
3517			bus_dmamap_sync(txr->txtag, txbuf->map,
3518			    BUS_DMASYNC_POSTWRITE);
3519			bus_dmamap_unload(txr->txtag,
3520			    txbuf->map);
3521			m_freem(txbuf->m_head);
3522			txbuf->m_head = NULL;
3523			if (txbuf->map != NULL) {
3524				bus_dmamap_destroy(txr->txtag,
3525				    txbuf->map);
3526				txbuf->map = NULL;
3527			}
3528		} else if (txbuf->map != NULL) {
3529			bus_dmamap_unload(txr->txtag,
3530			    txbuf->map);
3531			bus_dmamap_destroy(txr->txtag,
3532			    txbuf->map);
3533			txbuf->map = NULL;
3534		}
3535	}
3536#if __FreeBSD_version >= 800000
3537	if (txr->br != NULL)
3538		buf_ring_free(txr->br, M_DEVBUF);
3539#endif
3540	if (txr->tx_buffers != NULL) {
3541		free(txr->tx_buffers, M_DEVBUF);
3542		txr->tx_buffers = NULL;
3543	}
3544	if (txr->txtag != NULL) {
3545		bus_dma_tag_destroy(txr->txtag);
3546		txr->txtag = NULL;
3547	}
3548	return;
3549}
3550
3551
3552/*********************************************************************
3553 *  The offload context is protocol specific (TCP/UDP) and thus
3554 *  only needs to be set when the protocol changes. The occasion
3555 *  of a context change can be a performance detriment, and
3556 *  might be better just disabled. The reason arises in the way
3557 *  in which the controller supports pipelined requests from the
3558 *  Tx data DMA. Up to four requests can be pipelined, and they may
3559 *  belong to the same packet or to multiple packets. However all
3560 *  requests for one packet are issued before a request is issued
3561 *  for a subsequent packet and if a request for the next packet
3562 *  requires a context change, that request will be stalled
3563 *  until the previous request completes. This means setting up
3564 *  a new context effectively disables pipelined Tx data DMA which
3565 *  in turn greatly slow down performance to send small sized
3566 *  frames.
3567 **********************************************************************/
3568static void
3569em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3570    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3571{
3572	struct adapter			*adapter = txr->adapter;
3573	struct e1000_context_desc	*TXD = NULL;
3574	struct em_buffer		*tx_buffer;
3575	int				cur, hdr_len;
3576	u32				cmd = 0;
3577	u16				offload = 0;
3578	u8				ipcso, ipcss, tucso, tucss;
3579
3580	ipcss = ipcso = tucss = tucso = 0;
3581	hdr_len = ip_off + (ip->ip_hl << 2);
3582	cur = txr->next_avail_desc;
3583
3584	/* Setup of IP header checksum. */
3585	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3586		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3587		offload |= CSUM_IP;
3588		ipcss = ip_off;
3589		ipcso = ip_off + offsetof(struct ip, ip_sum);
3590		/*
3591		 * Start offset for header checksum calculation.
3592		 * End offset for header checksum calculation.
3593		 * Offset of place to put the checksum.
3594		 */
3595		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3596		TXD->lower_setup.ip_fields.ipcss = ipcss;
3597		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3598		TXD->lower_setup.ip_fields.ipcso = ipcso;
3599		cmd |= E1000_TXD_CMD_IP;
3600	}
3601
3602	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3603 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3604 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3605 		offload |= CSUM_TCP;
3606 		tucss = hdr_len;
3607 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3608 		/*
3609 		 * Setting up new checksum offload context for every frames
3610 		 * takes a lot of processing time for hardware. This also
3611 		 * reduces performance a lot for small sized frames so avoid
3612 		 * it if driver can use previously configured checksum
3613 		 * offload context.
3614 		 */
3615 		if (txr->last_hw_offload == offload) {
3616 			if (offload & CSUM_IP) {
3617 				if (txr->last_hw_ipcss == ipcss &&
3618 				    txr->last_hw_ipcso == ipcso &&
3619 				    txr->last_hw_tucss == tucss &&
3620 				    txr->last_hw_tucso == tucso)
3621 					return;
3622 			} else {
3623 				if (txr->last_hw_tucss == tucss &&
3624 				    txr->last_hw_tucso == tucso)
3625 					return;
3626 			}
3627  		}
3628 		txr->last_hw_offload = offload;
3629 		txr->last_hw_tucss = tucss;
3630 		txr->last_hw_tucso = tucso;
3631 		/*
3632 		 * Start offset for payload checksum calculation.
3633 		 * End offset for payload checksum calculation.
3634 		 * Offset of place to put the checksum.
3635 		 */
3636		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3637 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3638 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3639 		TXD->upper_setup.tcp_fields.tucso = tucso;
3640 		cmd |= E1000_TXD_CMD_TCP;
3641 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3642 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3643 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3644 		tucss = hdr_len;
3645 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3646 		/*
3647 		 * Setting up new checksum offload context for every frames
3648 		 * takes a lot of processing time for hardware. This also
3649 		 * reduces performance a lot for small sized frames so avoid
3650 		 * it if driver can use previously configured checksum
3651 		 * offload context.
3652 		 */
3653 		if (txr->last_hw_offload == offload) {
3654 			if (offload & CSUM_IP) {
3655 				if (txr->last_hw_ipcss == ipcss &&
3656 				    txr->last_hw_ipcso == ipcso &&
3657 				    txr->last_hw_tucss == tucss &&
3658 				    txr->last_hw_tucso == tucso)
3659 					return;
3660 			} else {
3661 				if (txr->last_hw_tucss == tucss &&
3662 				    txr->last_hw_tucso == tucso)
3663 					return;
3664 			}
3665 		}
3666 		txr->last_hw_offload = offload;
3667 		txr->last_hw_tucss = tucss;
3668 		txr->last_hw_tucso = tucso;
3669 		/*
3670 		 * Start offset for header checksum calculation.
3671 		 * End offset for header checksum calculation.
3672 		 * Offset of place to put the checksum.
3673 		 */
3674		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3675 		TXD->upper_setup.tcp_fields.tucss = tucss;
3676 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3677 		TXD->upper_setup.tcp_fields.tucso = tucso;
3678  	}
3679
3680 	if (offload & CSUM_IP) {
3681 		txr->last_hw_ipcss = ipcss;
3682 		txr->last_hw_ipcso = ipcso;
3683  	}
3684
3685	TXD->tcp_seg_setup.data = htole32(0);
3686	TXD->cmd_and_length =
3687	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3688	tx_buffer = &txr->tx_buffers[cur];
3689	tx_buffer->m_head = NULL;
3690	tx_buffer->next_eop = -1;
3691
3692	if (++cur == adapter->num_tx_desc)
3693		cur = 0;
3694
3695	txr->tx_avail--;
3696	txr->next_avail_desc = cur;
3697}
3698
3699
3700/**********************************************************************
3701 *
3702 *  Setup work for hardware segmentation offload (TSO)
3703 *
3704 **********************************************************************/
3705static void
3706em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3707    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3708{
3709	struct adapter			*adapter = txr->adapter;
3710	struct e1000_context_desc	*TXD;
3711	struct em_buffer		*tx_buffer;
3712	int cur, hdr_len;
3713
3714	/*
3715	 * In theory we can use the same TSO context if and only if
3716	 * frame is the same type(IP/TCP) and the same MSS. However
3717	 * checking whether a frame has the same IP/TCP structure is
3718	 * hard thing so just ignore that and always restablish a
3719	 * new TSO context.
3720	 */
3721	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3722	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3723		      E1000_TXD_DTYP_D |	/* Data descr type */
3724		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3725
3726	/* IP and/or TCP header checksum calculation and insertion. */
3727	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3728
3729	cur = txr->next_avail_desc;
3730	tx_buffer = &txr->tx_buffers[cur];
3731	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3732
3733	/*
3734	 * Start offset for header checksum calculation.
3735	 * End offset for header checksum calculation.
3736	 * Offset of place put the checksum.
3737	 */
3738	TXD->lower_setup.ip_fields.ipcss = ip_off;
3739	TXD->lower_setup.ip_fields.ipcse =
3740	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3741	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3742	/*
3743	 * Start offset for payload checksum calculation.
3744	 * End offset for payload checksum calculation.
3745	 * Offset of place to put the checksum.
3746	 */
3747	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3748	TXD->upper_setup.tcp_fields.tucse = 0;
3749	TXD->upper_setup.tcp_fields.tucso =
3750	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3751	/*
3752	 * Payload size per packet w/o any headers.
3753	 * Length of all headers up to payload.
3754	 */
3755	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3756	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3757
3758	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3759				E1000_TXD_CMD_DEXT |	/* Extended descr */
3760				E1000_TXD_CMD_TSE |	/* TSE context */
3761				E1000_TXD_CMD_IP |	/* Do IP csum */
3762				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3763				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3764
3765	tx_buffer->m_head = NULL;
3766	tx_buffer->next_eop = -1;
3767
3768	if (++cur == adapter->num_tx_desc)
3769		cur = 0;
3770
3771	txr->tx_avail--;
3772	txr->next_avail_desc = cur;
3773	txr->tx_tso = TRUE;
3774}
3775
3776
3777/**********************************************************************
3778 *
3779 *  Examine each tx_buffer in the used queue. If the hardware is done
3780 *  processing the packet then free associated resources. The
3781 *  tx_buffer is put back on the free queue.
3782 *
3783 **********************************************************************/
3784static void
3785em_txeof(struct tx_ring *txr)
3786{
3787	struct adapter	*adapter = txr->adapter;
3788        int first, last, done, processed;
3789        struct em_buffer *tx_buffer;
3790        struct e1000_tx_desc   *tx_desc, *eop_desc;
3791	struct ifnet   *ifp = adapter->ifp;
3792
3793	EM_TX_LOCK_ASSERT(txr);
3794#ifdef DEV_NETMAP
3795	if (ifp->if_capenable & IFCAP_NETMAP) {
3796		struct netmap_adapter *na = NA(ifp);
3797
3798		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3799		EM_TX_UNLOCK(txr);
3800		EM_CORE_LOCK(adapter);
3801		selwakeuppri(&na->tx_si, PI_NET);
3802		EM_CORE_UNLOCK(adapter);
3803		EM_TX_LOCK(txr);
3804		return;
3805	}
3806#endif /* DEV_NETMAP */
3807
3808	/* No work, make sure watchdog is off */
3809        if (txr->tx_avail == adapter->num_tx_desc) {
3810		txr->queue_status = EM_QUEUE_IDLE;
3811                return;
3812	}
3813
3814	processed = 0;
3815        first = txr->next_to_clean;
3816        tx_desc = &txr->tx_base[first];
3817        tx_buffer = &txr->tx_buffers[first];
3818	last = tx_buffer->next_eop;
3819        eop_desc = &txr->tx_base[last];
3820
3821	/*
3822	 * What this does is get the index of the
3823	 * first descriptor AFTER the EOP of the
3824	 * first packet, that way we can do the
3825	 * simple comparison on the inner while loop.
3826	 */
3827	if (++last == adapter->num_tx_desc)
3828 		last = 0;
3829	done = last;
3830
3831        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3832            BUS_DMASYNC_POSTREAD);
3833
3834        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3835		/* We clean the range of the packet */
3836		while (first != done) {
3837                	tx_desc->upper.data = 0;
3838                	tx_desc->lower.data = 0;
3839                	tx_desc->buffer_addr = 0;
3840                	++txr->tx_avail;
3841			++processed;
3842
3843			if (tx_buffer->m_head) {
3844				bus_dmamap_sync(txr->txtag,
3845				    tx_buffer->map,
3846				    BUS_DMASYNC_POSTWRITE);
3847				bus_dmamap_unload(txr->txtag,
3848				    tx_buffer->map);
3849                        	m_freem(tx_buffer->m_head);
3850                        	tx_buffer->m_head = NULL;
3851                	}
3852			tx_buffer->next_eop = -1;
3853			txr->watchdog_time = ticks;
3854
3855	                if (++first == adapter->num_tx_desc)
3856				first = 0;
3857
3858	                tx_buffer = &txr->tx_buffers[first];
3859			tx_desc = &txr->tx_base[first];
3860		}
3861		++ifp->if_opackets;
3862		/* See if we can continue to the next packet */
3863		last = tx_buffer->next_eop;
3864		if (last != -1) {
3865        		eop_desc = &txr->tx_base[last];
3866			/* Get new done point */
3867			if (++last == adapter->num_tx_desc) last = 0;
3868			done = last;
3869		} else
3870			break;
3871        }
3872        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3873            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3874
3875        txr->next_to_clean = first;
3876
3877	/*
3878	** Watchdog calculation, we know there's
3879	** work outstanding or the first return
3880	** would have been taken, so none processed
3881	** for too long indicates a hang. local timer
3882	** will examine this and do a reset if needed.
3883	*/
3884	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3885		txr->queue_status = EM_QUEUE_HUNG;
3886
3887        /*
3888         * If we have a minimum free, clear IFF_DRV_OACTIVE
3889         * to tell the stack that it is OK to send packets.
3890	 * Notice that all writes of OACTIVE happen under the
3891	 * TX lock which, with a single queue, guarantees
3892	 * sanity.
3893         */
3894        if (txr->tx_avail >= EM_MAX_SCATTER)
3895		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3896
3897	/* Disable watchdog if all clean */
3898	if (txr->tx_avail == adapter->num_tx_desc) {
3899		txr->queue_status = EM_QUEUE_IDLE;
3900	}
3901}
3902
3903
3904/*********************************************************************
3905 *
3906 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3907 *
3908 **********************************************************************/
3909static void
3910em_refresh_mbufs(struct rx_ring *rxr, int limit)
3911{
3912	struct adapter		*adapter = rxr->adapter;
3913	struct mbuf		*m;
3914	bus_dma_segment_t	segs[1];
3915	struct em_buffer	*rxbuf;
3916	int			i, j, error, nsegs;
3917	bool			cleaned = FALSE;
3918
3919	i = j = rxr->next_to_refresh;
3920	/*
3921	** Get one descriptor beyond
3922	** our work mark to control
3923	** the loop.
3924	*/
3925	if (++j == adapter->num_rx_desc)
3926		j = 0;
3927
3928	while (j != limit) {
3929		rxbuf = &rxr->rx_buffers[i];
3930		if (rxbuf->m_head == NULL) {
3931			m = m_getjcl(M_DONTWAIT, MT_DATA,
3932			    M_PKTHDR, adapter->rx_mbuf_sz);
3933			/*
3934			** If we have a temporary resource shortage
3935			** that causes a failure, just abort refresh
3936			** for now, we will return to this point when
3937			** reinvoked from em_rxeof.
3938			*/
3939			if (m == NULL)
3940				goto update;
3941		} else
3942			m = rxbuf->m_head;
3943
3944		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3945		m->m_flags |= M_PKTHDR;
3946		m->m_data = m->m_ext.ext_buf;
3947
3948		/* Use bus_dma machinery to setup the memory mapping  */
3949		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3950		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3951		if (error != 0) {
3952			printf("Refresh mbufs: hdr dmamap load"
3953			    " failure - %d\n", error);
3954			m_free(m);
3955			rxbuf->m_head = NULL;
3956			goto update;
3957		}
3958		rxbuf->m_head = m;
3959		bus_dmamap_sync(rxr->rxtag,
3960		    rxbuf->map, BUS_DMASYNC_PREREAD);
3961		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3962		cleaned = TRUE;
3963
3964		i = j; /* Next is precalulated for us */
3965		rxr->next_to_refresh = i;
3966		/* Calculate next controlling index */
3967		if (++j == adapter->num_rx_desc)
3968			j = 0;
3969	}
3970update:
3971	/*
3972	** Update the tail pointer only if,
3973	** and as far as we have refreshed.
3974	*/
3975	if (cleaned)
3976		E1000_WRITE_REG(&adapter->hw,
3977		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3978
3979	return;
3980}
3981
3982
3983/*********************************************************************
3984 *
3985 *  Allocate memory for rx_buffer structures. Since we use one
3986 *  rx_buffer per received packet, the maximum number of rx_buffer's
3987 *  that we'll need is equal to the number of receive descriptors
3988 *  that we've allocated.
3989 *
3990 **********************************************************************/
3991static int
3992em_allocate_receive_buffers(struct rx_ring *rxr)
3993{
3994	struct adapter		*adapter = rxr->adapter;
3995	device_t		dev = adapter->dev;
3996	struct em_buffer	*rxbuf;
3997	int			error;
3998
3999	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4000	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4001	if (rxr->rx_buffers == NULL) {
4002		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4003		return (ENOMEM);
4004	}
4005
4006	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4007				1, 0,			/* alignment, bounds */
4008				BUS_SPACE_MAXADDR,	/* lowaddr */
4009				BUS_SPACE_MAXADDR,	/* highaddr */
4010				NULL, NULL,		/* filter, filterarg */
4011				MJUM9BYTES,		/* maxsize */
4012				1,			/* nsegments */
4013				MJUM9BYTES,		/* maxsegsize */
4014				0,			/* flags */
4015				NULL,			/* lockfunc */
4016				NULL,			/* lockarg */
4017				&rxr->rxtag);
4018	if (error) {
4019		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4020		    __func__, error);
4021		goto fail;
4022	}
4023
4024	rxbuf = rxr->rx_buffers;
4025	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4026		rxbuf = &rxr->rx_buffers[i];
4027		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4028		    &rxbuf->map);
4029		if (error) {
4030			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4031			    __func__, error);
4032			goto fail;
4033		}
4034	}
4035
4036	return (0);
4037
4038fail:
4039	em_free_receive_structures(adapter);
4040	return (error);
4041}
4042
4043
4044/*********************************************************************
4045 *
4046 *  Initialize a receive ring and its buffers.
4047 *
4048 **********************************************************************/
4049static int
4050em_setup_receive_ring(struct rx_ring *rxr)
4051{
4052	struct	adapter 	*adapter = rxr->adapter;
4053	struct em_buffer	*rxbuf;
4054	bus_dma_segment_t	seg[1];
4055	int			rsize, nsegs, error = 0;
4056#ifdef DEV_NETMAP
4057	struct netmap_adapter *na = NA(adapter->ifp);
4058	struct netmap_slot *slot;
4059#endif
4060
4061
4062	/* Clear the ring contents */
4063	EM_RX_LOCK(rxr);
4064	rsize = roundup2(adapter->num_rx_desc *
4065	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4066	bzero((void *)rxr->rx_base, rsize);
4067#ifdef DEV_NETMAP
4068	slot = netmap_reset(na, NR_RX, 0, 0);
4069#endif
4070
4071	/*
4072	** Free current RX buffer structs and their mbufs
4073	*/
4074	for (int i = 0; i < adapter->num_rx_desc; i++) {
4075		rxbuf = &rxr->rx_buffers[i];
4076		if (rxbuf->m_head != NULL) {
4077			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4078			    BUS_DMASYNC_POSTREAD);
4079			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4080			m_freem(rxbuf->m_head);
4081			rxbuf->m_head = NULL; /* mark as freed */
4082		}
4083	}
4084
4085	/* Now replenish the mbufs */
4086        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4087		rxbuf = &rxr->rx_buffers[j];
4088#ifdef DEV_NETMAP
4089		if (slot) {
4090			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4091			uint64_t paddr;
4092			void *addr;
4093
4094			addr = PNMB(slot + si, &paddr);
4095			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4096			/* Update descriptor */
4097			rxr->rx_base[j].buffer_addr = htole64(paddr);
4098			continue;
4099		}
4100#endif /* DEV_NETMAP */
4101		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4102		    M_PKTHDR, adapter->rx_mbuf_sz);
4103		if (rxbuf->m_head == NULL) {
4104			error = ENOBUFS;
4105			goto fail;
4106		}
4107		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4108		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4109		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4110
4111		/* Get the memory mapping */
4112		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4113		    rxbuf->map, rxbuf->m_head, seg,
4114		    &nsegs, BUS_DMA_NOWAIT);
4115		if (error != 0) {
4116			m_freem(rxbuf->m_head);
4117			rxbuf->m_head = NULL;
4118			goto fail;
4119		}
4120		bus_dmamap_sync(rxr->rxtag,
4121		    rxbuf->map, BUS_DMASYNC_PREREAD);
4122
4123		/* Update descriptor */
4124		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4125	}
4126	rxr->next_to_check = 0;
4127	rxr->next_to_refresh = 0;
4128	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4129	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4130
4131fail:
4132	EM_RX_UNLOCK(rxr);
4133	return (error);
4134}
4135
4136/*********************************************************************
4137 *
4138 *  Initialize all receive rings.
4139 *
4140 **********************************************************************/
4141static int
4142em_setup_receive_structures(struct adapter *adapter)
4143{
4144	struct rx_ring *rxr = adapter->rx_rings;
4145	int q;
4146
4147	for (q = 0; q < adapter->num_queues; q++, rxr++)
4148		if (em_setup_receive_ring(rxr))
4149			goto fail;
4150
4151	return (0);
4152fail:
4153	/*
4154	 * Free RX buffers allocated so far, we will only handle
4155	 * the rings that completed, the failing case will have
4156	 * cleaned up for itself. 'q' failed, so its the terminus.
4157	 */
4158	for (int i = 0; i < q; ++i) {
4159		rxr = &adapter->rx_rings[i];
4160		for (int n = 0; n < adapter->num_rx_desc; n++) {
4161			struct em_buffer *rxbuf;
4162			rxbuf = &rxr->rx_buffers[n];
4163			if (rxbuf->m_head != NULL) {
4164				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4165			  	  BUS_DMASYNC_POSTREAD);
4166				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4167				m_freem(rxbuf->m_head);
4168				rxbuf->m_head = NULL;
4169			}
4170		}
4171		rxr->next_to_check = 0;
4172		rxr->next_to_refresh = 0;
4173	}
4174
4175	return (ENOBUFS);
4176}
4177
4178/*********************************************************************
4179 *
4180 *  Free all receive rings.
4181 *
4182 **********************************************************************/
4183static void
4184em_free_receive_structures(struct adapter *adapter)
4185{
4186	struct rx_ring *rxr = adapter->rx_rings;
4187
4188	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4189		em_free_receive_buffers(rxr);
4190		/* Free the ring memory as well */
4191		em_dma_free(adapter, &rxr->rxdma);
4192		EM_RX_LOCK_DESTROY(rxr);
4193	}
4194
4195	free(adapter->rx_rings, M_DEVBUF);
4196}
4197
4198
4199/*********************************************************************
4200 *
4201 *  Free receive ring data structures
4202 *
4203 **********************************************************************/
4204static void
4205em_free_receive_buffers(struct rx_ring *rxr)
4206{
4207	struct adapter		*adapter = rxr->adapter;
4208	struct em_buffer	*rxbuf = NULL;
4209
4210	INIT_DEBUGOUT("free_receive_buffers: begin");
4211
4212	if (rxr->rx_buffers != NULL) {
4213		for (int i = 0; i < adapter->num_rx_desc; i++) {
4214			rxbuf = &rxr->rx_buffers[i];
4215			if (rxbuf->map != NULL) {
4216				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4217				    BUS_DMASYNC_POSTREAD);
4218				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4219				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4220			}
4221			if (rxbuf->m_head != NULL) {
4222				m_freem(rxbuf->m_head);
4223				rxbuf->m_head = NULL;
4224			}
4225		}
4226		free(rxr->rx_buffers, M_DEVBUF);
4227		rxr->rx_buffers = NULL;
4228		rxr->next_to_check = 0;
4229		rxr->next_to_refresh = 0;
4230	}
4231
4232	if (rxr->rxtag != NULL) {
4233		bus_dma_tag_destroy(rxr->rxtag);
4234		rxr->rxtag = NULL;
4235	}
4236
4237	return;
4238}
4239
4240
4241/*********************************************************************
4242 *
4243 *  Enable receive unit.
4244 *
4245 **********************************************************************/
4246#define MAX_INTS_PER_SEC	8000
4247#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4248
4249static void
4250em_initialize_receive_unit(struct adapter *adapter)
4251{
4252	struct rx_ring	*rxr = adapter->rx_rings;
4253	struct ifnet	*ifp = adapter->ifp;
4254	struct e1000_hw	*hw = &adapter->hw;
4255	u64	bus_addr;
4256	u32	rctl, rxcsum;
4257
4258	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4259
4260	/*
4261	 * Make sure receives are disabled while setting
4262	 * up the descriptor ring
4263	 */
4264	rctl = E1000_READ_REG(hw, E1000_RCTL);
4265	/* Do not disable if ever enabled on this hardware */
4266	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4267		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4268
4269	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4270	    adapter->rx_abs_int_delay.value);
4271	/*
4272	 * Set the interrupt throttling rate. Value is calculated
4273	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4274	 */
4275	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4276
4277	/*
4278	** When using MSIX interrupts we need to throttle
4279	** using the EITR register (82574 only)
4280	*/
4281	if (hw->mac.type == e1000_82574) {
4282		for (int i = 0; i < 4; i++)
4283			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4284			    DEFAULT_ITR);
4285		/* Disable accelerated acknowledge */
4286		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4287	}
4288
4289	if (ifp->if_capenable & IFCAP_RXCSUM) {
4290		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4291		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4292		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4293	}
4294
4295	/*
4296	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4297	** long latencies are observed, like Lenovo X60. This
4298	** change eliminates the problem, but since having positive
4299	** values in RDTR is a known source of problems on other
4300	** platforms another solution is being sought.
4301	*/
4302	if (hw->mac.type == e1000_82573)
4303		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4304
4305	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4306		/* Setup the Base and Length of the Rx Descriptor Ring */
4307		bus_addr = rxr->rxdma.dma_paddr;
4308		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4309		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4310		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4311		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4312		/* Setup the Head and Tail Descriptor Pointers */
4313		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4314#ifdef DEV_NETMAP
4315		/*
4316		 * an init() while a netmap client is active must
4317		 * preserve the rx buffers passed to userspace.
4318		 * In this driver it means we adjust RDT to
4319		 * something different from na->num_rx_desc - 1.
4320		 */
4321		if (ifp->if_capenable & IFCAP_NETMAP) {
4322			struct netmap_adapter *na = NA(adapter->ifp);
4323			struct netmap_kring *kring = &na->rx_rings[i];
4324			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4325
4326			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4327		} else
4328#endif /* DEV_NETMAP */
4329		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4330	}
4331
4332	/* Set PTHRESH for improved jumbo performance */
4333	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4334	    (adapter->hw.mac.type == e1000_pch2lan) ||
4335	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4336	    (ifp->if_mtu > ETHERMTU)) {
4337		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4338		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4339	}
4340
4341	if (adapter->hw.mac.type == e1000_pch2lan) {
4342		if (ifp->if_mtu > ETHERMTU)
4343			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4344		else
4345			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4346	}
4347
4348	/* Setup the Receive Control Register */
4349	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4350	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4351	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4352	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4353
4354        /* Strip the CRC */
4355        rctl |= E1000_RCTL_SECRC;
4356
4357        /* Make sure VLAN Filters are off */
4358        rctl &= ~E1000_RCTL_VFE;
4359	rctl &= ~E1000_RCTL_SBP;
4360
4361	if (adapter->rx_mbuf_sz == MCLBYTES)
4362		rctl |= E1000_RCTL_SZ_2048;
4363	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4364		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4365	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4366		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4367
4368	if (ifp->if_mtu > ETHERMTU)
4369		rctl |= E1000_RCTL_LPE;
4370	else
4371		rctl &= ~E1000_RCTL_LPE;
4372
4373	/* Write out the settings */
4374	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4375
4376	return;
4377}
4378
4379
4380/*********************************************************************
4381 *
4382 *  This routine executes in interrupt context. It replenishes
4383 *  the mbufs in the descriptor and sends data which has been
4384 *  dma'ed into host memory to upper layer.
4385 *
4386 *  We loop at most count times if count is > 0, or until done if
4387 *  count < 0.
4388 *
4389 *  For polling we also now return the number of cleaned packets
4390 *********************************************************************/
4391static bool
4392em_rxeof(struct rx_ring *rxr, int count, int *done)
4393{
4394	struct adapter		*adapter = rxr->adapter;
4395	struct ifnet		*ifp = adapter->ifp;
4396	struct mbuf		*mp, *sendmp;
4397	u8			status = 0;
4398	u16 			len;
4399	int			i, processed, rxdone = 0;
4400	bool			eop;
4401	struct e1000_rx_desc	*cur;
4402
4403	EM_RX_LOCK(rxr);
4404
4405#ifdef DEV_NETMAP
4406	if (ifp->if_capenable & IFCAP_NETMAP) {
4407		struct netmap_adapter *na = NA(ifp);
4408
4409		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4410		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4411		EM_RX_UNLOCK(rxr);
4412		EM_CORE_LOCK(adapter);
4413		selwakeuppri(&na->rx_si, PI_NET);
4414		EM_CORE_UNLOCK(adapter);
4415		return (0);
4416	}
4417#endif /* DEV_NETMAP */
4418
4419	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4420
4421		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4422			break;
4423
4424		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4425		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4426
4427		cur = &rxr->rx_base[i];
4428		status = cur->status;
4429		mp = sendmp = NULL;
4430
4431		if ((status & E1000_RXD_STAT_DD) == 0)
4432			break;
4433
4434		len = le16toh(cur->length);
4435		eop = (status & E1000_RXD_STAT_EOP) != 0;
4436
4437		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4438		    (rxr->discard == TRUE)) {
4439			adapter->dropped_pkts++;
4440			++rxr->rx_discarded;
4441			if (!eop) /* Catch subsequent segs */
4442				rxr->discard = TRUE;
4443			else
4444				rxr->discard = FALSE;
4445			em_rx_discard(rxr, i);
4446			goto next_desc;
4447		}
4448
4449		/* Assign correct length to the current fragment */
4450		mp = rxr->rx_buffers[i].m_head;
4451		mp->m_len = len;
4452
4453		/* Trigger for refresh */
4454		rxr->rx_buffers[i].m_head = NULL;
4455
4456		/* First segment? */
4457		if (rxr->fmp == NULL) {
4458			mp->m_pkthdr.len = len;
4459			rxr->fmp = rxr->lmp = mp;
4460		} else {
4461			/* Chain mbuf's together */
4462			mp->m_flags &= ~M_PKTHDR;
4463			rxr->lmp->m_next = mp;
4464			rxr->lmp = mp;
4465			rxr->fmp->m_pkthdr.len += len;
4466		}
4467
4468		if (eop) {
4469			--count;
4470			sendmp = rxr->fmp;
4471			sendmp->m_pkthdr.rcvif = ifp;
4472			ifp->if_ipackets++;
4473			em_receive_checksum(cur, sendmp);
4474#ifndef __NO_STRICT_ALIGNMENT
4475			if (adapter->max_frame_size >
4476			    (MCLBYTES - ETHER_ALIGN) &&
4477			    em_fixup_rx(rxr) != 0)
4478				goto skip;
4479#endif
4480			if (status & E1000_RXD_STAT_VP) {
4481				sendmp->m_pkthdr.ether_vtag =
4482				    le16toh(cur->special);
4483				sendmp->m_flags |= M_VLANTAG;
4484			}
4485#ifndef __NO_STRICT_ALIGNMENT
4486skip:
4487#endif
4488			rxr->fmp = rxr->lmp = NULL;
4489		}
4490next_desc:
4491		/* Zero out the receive descriptors status. */
4492		cur->status = 0;
4493		++rxdone;	/* cumulative for POLL */
4494		++processed;
4495
4496		/* Advance our pointers to the next descriptor. */
4497		if (++i == adapter->num_rx_desc)
4498			i = 0;
4499
4500		/* Send to the stack */
4501		if (sendmp != NULL) {
4502			rxr->next_to_check = i;
4503			EM_RX_UNLOCK(rxr);
4504			(*ifp->if_input)(ifp, sendmp);
4505			EM_RX_LOCK(rxr);
4506			i = rxr->next_to_check;
4507		}
4508
4509		/* Only refresh mbufs every 8 descriptors */
4510		if (processed == 8) {
4511			em_refresh_mbufs(rxr, i);
4512			processed = 0;
4513		}
4514	}
4515
4516	/* Catch any remaining refresh work */
4517	if (e1000_rx_unrefreshed(rxr))
4518		em_refresh_mbufs(rxr, i);
4519
4520	rxr->next_to_check = i;
4521	if (done != NULL)
4522		*done = rxdone;
4523	EM_RX_UNLOCK(rxr);
4524
4525	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4526}
4527
4528static __inline void
4529em_rx_discard(struct rx_ring *rxr, int i)
4530{
4531	struct em_buffer	*rbuf;
4532
4533	rbuf = &rxr->rx_buffers[i];
4534	/* Free any previous pieces */
4535	if (rxr->fmp != NULL) {
4536		rxr->fmp->m_flags |= M_PKTHDR;
4537		m_freem(rxr->fmp);
4538		rxr->fmp = NULL;
4539		rxr->lmp = NULL;
4540	}
4541	/*
4542	** Free buffer and allow em_refresh_mbufs()
4543	** to clean up and recharge buffer.
4544	*/
4545	if (rbuf->m_head) {
4546		m_free(rbuf->m_head);
4547		rbuf->m_head = NULL;
4548	}
4549	return;
4550}
4551
4552#ifndef __NO_STRICT_ALIGNMENT
4553/*
4554 * When jumbo frames are enabled we should realign entire payload on
4555 * architecures with strict alignment. This is serious design mistake of 8254x
4556 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4557 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4558 * payload. On architecures without strict alignment restrictions 8254x still
4559 * performs unaligned memory access which would reduce the performance too.
4560 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4561 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4562 * existing mbuf chain.
4563 *
4564 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4565 * not used at all on architectures with strict alignment.
4566 */
4567static int
4568em_fixup_rx(struct rx_ring *rxr)
4569{
4570	struct adapter *adapter = rxr->adapter;
4571	struct mbuf *m, *n;
4572	int error;
4573
4574	error = 0;
4575	m = rxr->fmp;
4576	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4577		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4578		m->m_data += ETHER_HDR_LEN;
4579	} else {
4580		MGETHDR(n, M_DONTWAIT, MT_DATA);
4581		if (n != NULL) {
4582			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4583			m->m_data += ETHER_HDR_LEN;
4584			m->m_len -= ETHER_HDR_LEN;
4585			n->m_len = ETHER_HDR_LEN;
4586			M_MOVE_PKTHDR(n, m);
4587			n->m_next = m;
4588			rxr->fmp = n;
4589		} else {
4590			adapter->dropped_pkts++;
4591			m_freem(rxr->fmp);
4592			rxr->fmp = NULL;
4593			error = ENOMEM;
4594		}
4595	}
4596
4597	return (error);
4598}
4599#endif
4600
4601/*********************************************************************
4602 *
4603 *  Verify that the hardware indicated that the checksum is valid.
4604 *  Inform the stack about the status of checksum so that stack
4605 *  doesn't spend time verifying the checksum.
4606 *
4607 *********************************************************************/
4608static void
4609em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4610{
4611	/* Ignore Checksum bit is set */
4612	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4613		mp->m_pkthdr.csum_flags = 0;
4614		return;
4615	}
4616
4617	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4618		/* Did it pass? */
4619		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4620			/* IP Checksum Good */
4621			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4622			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4623
4624		} else {
4625			mp->m_pkthdr.csum_flags = 0;
4626		}
4627	}
4628
4629	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4630		/* Did it pass? */
4631		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4632			mp->m_pkthdr.csum_flags |=
4633			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4634			mp->m_pkthdr.csum_data = htons(0xffff);
4635		}
4636	}
4637}
4638
4639/*
4640 * This routine is run via an vlan
4641 * config EVENT
4642 */
4643static void
4644em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4645{
4646	struct adapter	*adapter = ifp->if_softc;
4647	u32		index, bit;
4648
4649	if (ifp->if_softc !=  arg)   /* Not our event */
4650		return;
4651
4652	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4653                return;
4654
4655	EM_CORE_LOCK(adapter);
4656	index = (vtag >> 5) & 0x7F;
4657	bit = vtag & 0x1F;
4658	adapter->shadow_vfta[index] |= (1 << bit);
4659	++adapter->num_vlans;
4660	/* Re-init to load the changes */
4661	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4662		em_init_locked(adapter);
4663	EM_CORE_UNLOCK(adapter);
4664}
4665
4666/*
4667 * This routine is run via an vlan
4668 * unconfig EVENT
4669 */
4670static void
4671em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4672{
4673	struct adapter	*adapter = ifp->if_softc;
4674	u32		index, bit;
4675
4676	if (ifp->if_softc !=  arg)
4677		return;
4678
4679	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4680                return;
4681
4682	EM_CORE_LOCK(adapter);
4683	index = (vtag >> 5) & 0x7F;
4684	bit = vtag & 0x1F;
4685	adapter->shadow_vfta[index] &= ~(1 << bit);
4686	--adapter->num_vlans;
4687	/* Re-init to load the changes */
4688	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4689		em_init_locked(adapter);
4690	EM_CORE_UNLOCK(adapter);
4691}
4692
4693static void
4694em_setup_vlan_hw_support(struct adapter *adapter)
4695{
4696	struct e1000_hw *hw = &adapter->hw;
4697	u32             reg;
4698
4699	/*
4700	** We get here thru init_locked, meaning
4701	** a soft reset, this has already cleared
4702	** the VFTA and other state, so if there
4703	** have been no vlan's registered do nothing.
4704	*/
4705	if (adapter->num_vlans == 0)
4706                return;
4707
4708	/*
4709	** A soft reset zero's out the VFTA, so
4710	** we need to repopulate it now.
4711	*/
4712	for (int i = 0; i < EM_VFTA_SIZE; i++)
4713                if (adapter->shadow_vfta[i] != 0)
4714			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4715                            i, adapter->shadow_vfta[i]);
4716
4717	reg = E1000_READ_REG(hw, E1000_CTRL);
4718	reg |= E1000_CTRL_VME;
4719	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4720
4721	/* Enable the Filter Table */
4722	reg = E1000_READ_REG(hw, E1000_RCTL);
4723	reg &= ~E1000_RCTL_CFIEN;
4724	reg |= E1000_RCTL_VFE;
4725	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4726}
4727
4728static void
4729em_enable_intr(struct adapter *adapter)
4730{
4731	struct e1000_hw *hw = &adapter->hw;
4732	u32 ims_mask = IMS_ENABLE_MASK;
4733
4734	if (hw->mac.type == e1000_82574) {
4735		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4736		ims_mask |= EM_MSIX_MASK;
4737	}
4738	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4739}
4740
4741static void
4742em_disable_intr(struct adapter *adapter)
4743{
4744	struct e1000_hw *hw = &adapter->hw;
4745
4746	if (hw->mac.type == e1000_82574)
4747		E1000_WRITE_REG(hw, EM_EIAC, 0);
4748	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4749}
4750
4751/*
4752 * Bit of a misnomer, what this really means is
4753 * to enable OS management of the system... aka
4754 * to disable special hardware management features
4755 */
4756static void
4757em_init_manageability(struct adapter *adapter)
4758{
4759	/* A shared code workaround */
4760#define E1000_82542_MANC2H E1000_MANC2H
4761	if (adapter->has_manage) {
4762		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4763		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4764
4765		/* disable hardware interception of ARP */
4766		manc &= ~(E1000_MANC_ARP_EN);
4767
4768                /* enable receiving management packets to the host */
4769		manc |= E1000_MANC_EN_MNG2HOST;
4770#define E1000_MNG2HOST_PORT_623 (1 << 5)
4771#define E1000_MNG2HOST_PORT_664 (1 << 6)
4772		manc2h |= E1000_MNG2HOST_PORT_623;
4773		manc2h |= E1000_MNG2HOST_PORT_664;
4774		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4775		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4776	}
4777}
4778
4779/*
4780 * Give control back to hardware management
4781 * controller if there is one.
4782 */
4783static void
4784em_release_manageability(struct adapter *adapter)
4785{
4786	if (adapter->has_manage) {
4787		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4788
4789		/* re-enable hardware interception of ARP */
4790		manc |= E1000_MANC_ARP_EN;
4791		manc &= ~E1000_MANC_EN_MNG2HOST;
4792
4793		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4794	}
4795}
4796
4797/*
4798 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4799 * For ASF and Pass Through versions of f/w this means
4800 * that the driver is loaded. For AMT version type f/w
4801 * this means that the network i/f is open.
4802 */
4803static void
4804em_get_hw_control(struct adapter *adapter)
4805{
4806	u32 ctrl_ext, swsm;
4807
4808	if (adapter->hw.mac.type == e1000_82573) {
4809		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4810		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4811		    swsm | E1000_SWSM_DRV_LOAD);
4812		return;
4813	}
4814	/* else */
4815	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4816	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4817	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4818	return;
4819}
4820
4821/*
4822 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4823 * For ASF and Pass Through versions of f/w this means that
4824 * the driver is no longer loaded. For AMT versions of the
4825 * f/w this means that the network i/f is closed.
4826 */
4827static void
4828em_release_hw_control(struct adapter *adapter)
4829{
4830	u32 ctrl_ext, swsm;
4831
4832	if (!adapter->has_manage)
4833		return;
4834
4835	if (adapter->hw.mac.type == e1000_82573) {
4836		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4837		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4838		    swsm & ~E1000_SWSM_DRV_LOAD);
4839		return;
4840	}
4841	/* else */
4842	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4843	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4844	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4845	return;
4846}
4847
4848static int
4849em_is_valid_ether_addr(u8 *addr)
4850{
4851	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4852
4853	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4854		return (FALSE);
4855	}
4856
4857	return (TRUE);
4858}
4859
4860/*
4861** Parse the interface capabilities with regard
4862** to both system management and wake-on-lan for
4863** later use.
4864*/
4865static void
4866em_get_wakeup(device_t dev)
4867{
4868	struct adapter	*adapter = device_get_softc(dev);
4869	u16		eeprom_data = 0, device_id, apme_mask;
4870
4871	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4872	apme_mask = EM_EEPROM_APME;
4873
4874	switch (adapter->hw.mac.type) {
4875	case e1000_82573:
4876	case e1000_82583:
4877		adapter->has_amt = TRUE;
4878		/* Falls thru */
4879	case e1000_82571:
4880	case e1000_82572:
4881	case e1000_80003es2lan:
4882		if (adapter->hw.bus.func == 1) {
4883			e1000_read_nvm(&adapter->hw,
4884			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4885			break;
4886		} else
4887			e1000_read_nvm(&adapter->hw,
4888			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4889		break;
4890	case e1000_ich8lan:
4891	case e1000_ich9lan:
4892	case e1000_ich10lan:
4893	case e1000_pchlan:
4894	case e1000_pch2lan:
4895		apme_mask = E1000_WUC_APME;
4896		adapter->has_amt = TRUE;
4897		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4898		break;
4899	default:
4900		e1000_read_nvm(&adapter->hw,
4901		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902		break;
4903	}
4904	if (eeprom_data & apme_mask)
4905		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4906	/*
4907         * We have the eeprom settings, now apply the special cases
4908         * where the eeprom may be wrong or the board won't support
4909         * wake on lan on a particular port
4910	 */
4911	device_id = pci_get_device(dev);
4912        switch (device_id) {
4913	case E1000_DEV_ID_82571EB_FIBER:
4914		/* Wake events only supported on port A for dual fiber
4915		 * regardless of eeprom setting */
4916		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4917		    E1000_STATUS_FUNC_1)
4918			adapter->wol = 0;
4919		break;
4920	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4921	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4922	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4923                /* if quad port adapter, disable WoL on all but port A */
4924		if (global_quad_port_a != 0)
4925			adapter->wol = 0;
4926		/* Reset for multiple quad port adapters */
4927		if (++global_quad_port_a == 4)
4928			global_quad_port_a = 0;
4929                break;
4930	}
4931	return;
4932}
4933
4934
4935/*
4936 * Enable PCI Wake On Lan capability
4937 */
4938static void
4939em_enable_wakeup(device_t dev)
4940{
4941	struct adapter	*adapter = device_get_softc(dev);
4942	struct ifnet	*ifp = adapter->ifp;
4943	u32		pmc, ctrl, ctrl_ext, rctl;
4944	u16     	status;
4945
4946	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4947		return;
4948
4949	/* Advertise the wakeup capability */
4950	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4951	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4952	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4953	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4954
4955	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4956	    (adapter->hw.mac.type == e1000_pchlan) ||
4957	    (adapter->hw.mac.type == e1000_ich9lan) ||
4958	    (adapter->hw.mac.type == e1000_ich10lan))
4959		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4960
4961	/* Keep the laser running on Fiber adapters */
4962	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4963	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4964		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4965		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4966		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4967	}
4968
4969	/*
4970	** Determine type of Wakeup: note that wol
4971	** is set with all bits on by default.
4972	*/
4973	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4974		adapter->wol &= ~E1000_WUFC_MAG;
4975
4976	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4977		adapter->wol &= ~E1000_WUFC_MC;
4978	else {
4979		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4980		rctl |= E1000_RCTL_MPE;
4981		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4982	}
4983
4984	if ((adapter->hw.mac.type == e1000_pchlan) ||
4985	    (adapter->hw.mac.type == e1000_pch2lan)) {
4986		if (em_enable_phy_wakeup(adapter))
4987			return;
4988	} else {
4989		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4990		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4991	}
4992
4993	if (adapter->hw.phy.type == e1000_phy_igp_3)
4994		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4995
4996        /* Request PME */
4997        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4998	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4999	if (ifp->if_capenable & IFCAP_WOL)
5000		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5001        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5002
5003	return;
5004}
5005
5006/*
5007** WOL in the newer chipset interfaces (pchlan)
5008** require thing to be copied into the phy
5009*/
5010static int
5011em_enable_phy_wakeup(struct adapter *adapter)
5012{
5013	struct e1000_hw *hw = &adapter->hw;
5014	u32 mreg, ret = 0;
5015	u16 preg;
5016
5017	/* copy MAC RARs to PHY RARs */
5018	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5019
5020	/* copy MAC MTA to PHY MTA */
5021	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5022		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5023		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5024		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5025		    (u16)((mreg >> 16) & 0xFFFF));
5026	}
5027
5028	/* configure PHY Rx Control register */
5029	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5030	mreg = E1000_READ_REG(hw, E1000_RCTL);
5031	if (mreg & E1000_RCTL_UPE)
5032		preg |= BM_RCTL_UPE;
5033	if (mreg & E1000_RCTL_MPE)
5034		preg |= BM_RCTL_MPE;
5035	preg &= ~(BM_RCTL_MO_MASK);
5036	if (mreg & E1000_RCTL_MO_3)
5037		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5038				<< BM_RCTL_MO_SHIFT);
5039	if (mreg & E1000_RCTL_BAM)
5040		preg |= BM_RCTL_BAM;
5041	if (mreg & E1000_RCTL_PMCF)
5042		preg |= BM_RCTL_PMCF;
5043	mreg = E1000_READ_REG(hw, E1000_CTRL);
5044	if (mreg & E1000_CTRL_RFCE)
5045		preg |= BM_RCTL_RFCE;
5046	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5047
5048	/* enable PHY wakeup in MAC register */
5049	E1000_WRITE_REG(hw, E1000_WUC,
5050	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5051	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5052
5053	/* configure and enable PHY wakeup in PHY registers */
5054	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5055	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5056
5057	/* activate PHY wakeup */
5058	ret = hw->phy.ops.acquire(hw);
5059	if (ret) {
5060		printf("Could not acquire PHY\n");
5061		return ret;
5062	}
5063	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5064	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5065	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5066	if (ret) {
5067		printf("Could not read PHY page 769\n");
5068		goto out;
5069	}
5070	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5071	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5072	if (ret)
5073		printf("Could not set PHY Host Wakeup bit\n");
5074out:
5075	hw->phy.ops.release(hw);
5076
5077	return ret;
5078}
5079
5080static void
5081em_led_func(void *arg, int onoff)
5082{
5083	struct adapter	*adapter = arg;
5084
5085	EM_CORE_LOCK(adapter);
5086	if (onoff) {
5087		e1000_setup_led(&adapter->hw);
5088		e1000_led_on(&adapter->hw);
5089	} else {
5090		e1000_led_off(&adapter->hw);
5091		e1000_cleanup_led(&adapter->hw);
5092	}
5093	EM_CORE_UNLOCK(adapter);
5094}
5095
5096/*
5097** Disable the L0S and L1 LINK states
5098*/
5099static void
5100em_disable_aspm(struct adapter *adapter)
5101{
5102	int		base, reg;
5103	u16		link_cap,link_ctrl;
5104	device_t	dev = adapter->dev;
5105
5106	switch (adapter->hw.mac.type) {
5107		case e1000_82573:
5108		case e1000_82574:
5109		case e1000_82583:
5110			break;
5111		default:
5112			return;
5113	}
5114	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5115		return;
5116	reg = base + PCIER_LINK_CAP;
5117	link_cap = pci_read_config(dev, reg, 2);
5118	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5119		return;
5120	reg = base + PCIER_LINK_CTL;
5121	link_ctrl = pci_read_config(dev, reg, 2);
5122	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5123	pci_write_config(dev, reg, link_ctrl, 2);
5124	return;
5125}
5126
5127/**********************************************************************
5128 *
5129 *  Update the board statistics counters.
5130 *
5131 **********************************************************************/
5132static void
5133em_update_stats_counters(struct adapter *adapter)
5134{
5135	struct ifnet   *ifp;
5136
5137	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5138	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5139		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5140		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5141	}
5142	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5143	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5144	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5145	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5146
5147	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5148	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5149	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5150	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5151	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5152	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5153	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5154	/*
5155	** For watchdog management we need to know if we have been
5156	** paused during the last interval, so capture that here.
5157	*/
5158	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5159	adapter->stats.xoffrxc += adapter->pause_frames;
5160	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5161	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5162	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5163	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5164	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5165	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5166	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5167	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5168	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5169	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5170	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5171	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5172
5173	/* For the 64-bit byte counters the low dword must be read first. */
5174	/* Both registers clear on the read of the high dword */
5175
5176	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5177	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5178	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5179	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5180
5181	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5182	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5183	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5184	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5185	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5186
5187	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5188	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5189
5190	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5191	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5192	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5193	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5194	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5195	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5196	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5197	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5198	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5199	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5200
5201	/* Interrupt Counts */
5202
5203	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5204	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5205	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5206	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5207	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5208	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5209	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5210	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5211	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5212
5213	if (adapter->hw.mac.type >= e1000_82543) {
5214		adapter->stats.algnerrc +=
5215		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5216		adapter->stats.rxerrc +=
5217		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5218		adapter->stats.tncrs +=
5219		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5220		adapter->stats.cexterr +=
5221		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5222		adapter->stats.tsctc +=
5223		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5224		adapter->stats.tsctfc +=
5225		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5226	}
5227	ifp = adapter->ifp;
5228
5229	ifp->if_collisions = adapter->stats.colc;
5230
5231	/* Rx Errors */
5232	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5233	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5234	    adapter->stats.ruc + adapter->stats.roc +
5235	    adapter->stats.mpc + adapter->stats.cexterr;
5236
5237	/* Tx Errors */
5238	ifp->if_oerrors = adapter->stats.ecol +
5239	    adapter->stats.latecol + adapter->watchdog_events;
5240}
5241
5242/* Export a single 32-bit register via a read-only sysctl. */
5243static int
5244em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5245{
5246	struct adapter *adapter;
5247	u_int val;
5248
5249	adapter = oidp->oid_arg1;
5250	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5251	return (sysctl_handle_int(oidp, &val, 0, req));
5252}
5253
5254/*
5255 * Add sysctl variables, one per statistic, to the system.
5256 */
5257static void
5258em_add_hw_stats(struct adapter *adapter)
5259{
5260	device_t dev = adapter->dev;
5261
5262	struct tx_ring *txr = adapter->tx_rings;
5263	struct rx_ring *rxr = adapter->rx_rings;
5264
5265	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5266	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5267	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5268	struct e1000_hw_stats *stats = &adapter->stats;
5269
5270	struct sysctl_oid *stat_node, *queue_node, *int_node;
5271	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5272
5273#define QUEUE_NAME_LEN 32
5274	char namebuf[QUEUE_NAME_LEN];
5275
5276	/* Driver Statistics */
5277	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5278			CTLFLAG_RD, &adapter->link_irq,
5279			"Link MSIX IRQ Handled");
5280	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5281			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5282			 "Std mbuf failed");
5283	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5284			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5285			 "Std mbuf cluster failed");
5286	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5287			CTLFLAG_RD, &adapter->dropped_pkts,
5288			"Driver dropped packets");
5289	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5290			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5291			"Driver tx dma failure in xmit");
5292	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5293			CTLFLAG_RD, &adapter->rx_overruns,
5294			"RX overruns");
5295	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5296			CTLFLAG_RD, &adapter->watchdog_events,
5297			"Watchdog timeouts");
5298
5299	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5300			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5301			em_sysctl_reg_handler, "IU",
5302			"Device Control Register");
5303	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5304			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5305			em_sysctl_reg_handler, "IU",
5306			"Receiver Control Register");
5307	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5308			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5309			"Flow Control High Watermark");
5310	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5311			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5312			"Flow Control Low Watermark");
5313
5314	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5315		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5316		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5317					    CTLFLAG_RD, NULL, "Queue Name");
5318		queue_list = SYSCTL_CHILDREN(queue_node);
5319
5320		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5321				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5322				E1000_TDH(txr->me),
5323				em_sysctl_reg_handler, "IU",
5324 				"Transmit Descriptor Head");
5325		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5326				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5327				E1000_TDT(txr->me),
5328				em_sysctl_reg_handler, "IU",
5329 				"Transmit Descriptor Tail");
5330		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5331				CTLFLAG_RD, &txr->tx_irq,
5332				"Queue MSI-X Transmit Interrupts");
5333		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5334				CTLFLAG_RD, &txr->no_desc_avail,
5335				"Queue No Descriptor Available");
5336
5337		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5338				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339				E1000_RDH(rxr->me),
5340				em_sysctl_reg_handler, "IU",
5341				"Receive Descriptor Head");
5342		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5343				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344				E1000_RDT(rxr->me),
5345				em_sysctl_reg_handler, "IU",
5346				"Receive Descriptor Tail");
5347		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5348				CTLFLAG_RD, &rxr->rx_irq,
5349				"Queue MSI-X Receive Interrupts");
5350	}
5351
5352	/* MAC stats get their own sub node */
5353
5354	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5355				    CTLFLAG_RD, NULL, "Statistics");
5356	stat_list = SYSCTL_CHILDREN(stat_node);
5357
5358	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5359			CTLFLAG_RD, &stats->ecol,
5360			"Excessive collisions");
5361	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5362			CTLFLAG_RD, &stats->scc,
5363			"Single collisions");
5364	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5365			CTLFLAG_RD, &stats->mcc,
5366			"Multiple collisions");
5367	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5368			CTLFLAG_RD, &stats->latecol,
5369			"Late collisions");
5370	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5371			CTLFLAG_RD, &stats->colc,
5372			"Collision Count");
5373	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5374			CTLFLAG_RD, &adapter->stats.symerrs,
5375			"Symbol Errors");
5376	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5377			CTLFLAG_RD, &adapter->stats.sec,
5378			"Sequence Errors");
5379	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5380			CTLFLAG_RD, &adapter->stats.dc,
5381			"Defer Count");
5382	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5383			CTLFLAG_RD, &adapter->stats.mpc,
5384			"Missed Packets");
5385	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5386			CTLFLAG_RD, &adapter->stats.rnbc,
5387			"Receive No Buffers");
5388	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5389			CTLFLAG_RD, &adapter->stats.ruc,
5390			"Receive Undersize");
5391	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5392			CTLFLAG_RD, &adapter->stats.rfc,
5393			"Fragmented Packets Received ");
5394	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5395			CTLFLAG_RD, &adapter->stats.roc,
5396			"Oversized Packets Received");
5397	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5398			CTLFLAG_RD, &adapter->stats.rjc,
5399			"Recevied Jabber");
5400	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5401			CTLFLAG_RD, &adapter->stats.rxerrc,
5402			"Receive Errors");
5403	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5404			CTLFLAG_RD, &adapter->stats.crcerrs,
5405			"CRC errors");
5406	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5407			CTLFLAG_RD, &adapter->stats.algnerrc,
5408			"Alignment Errors");
5409	/* On 82575 these are collision counts */
5410	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5411			CTLFLAG_RD, &adapter->stats.cexterr,
5412			"Collision/Carrier extension errors");
5413	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5414			CTLFLAG_RD, &adapter->stats.xonrxc,
5415			"XON Received");
5416	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5417			CTLFLAG_RD, &adapter->stats.xontxc,
5418			"XON Transmitted");
5419	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5420			CTLFLAG_RD, &adapter->stats.xoffrxc,
5421			"XOFF Received");
5422	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5423			CTLFLAG_RD, &adapter->stats.xofftxc,
5424			"XOFF Transmitted");
5425
5426	/* Packet Reception Stats */
5427	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5428			CTLFLAG_RD, &adapter->stats.tpr,
5429			"Total Packets Received ");
5430	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5431			CTLFLAG_RD, &adapter->stats.gprc,
5432			"Good Packets Received");
5433	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5434			CTLFLAG_RD, &adapter->stats.bprc,
5435			"Broadcast Packets Received");
5436	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5437			CTLFLAG_RD, &adapter->stats.mprc,
5438			"Multicast Packets Received");
5439	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5440			CTLFLAG_RD, &adapter->stats.prc64,
5441			"64 byte frames received ");
5442	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5443			CTLFLAG_RD, &adapter->stats.prc127,
5444			"65-127 byte frames received");
5445	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5446			CTLFLAG_RD, &adapter->stats.prc255,
5447			"128-255 byte frames received");
5448	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5449			CTLFLAG_RD, &adapter->stats.prc511,
5450			"256-511 byte frames received");
5451	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5452			CTLFLAG_RD, &adapter->stats.prc1023,
5453			"512-1023 byte frames received");
5454	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5455			CTLFLAG_RD, &adapter->stats.prc1522,
5456			"1023-1522 byte frames received");
5457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5458 			CTLFLAG_RD, &adapter->stats.gorc,
5459 			"Good Octets Received");
5460
5461	/* Packet Transmission Stats */
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5463 			CTLFLAG_RD, &adapter->stats.gotc,
5464 			"Good Octets Transmitted");
5465	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5466			CTLFLAG_RD, &adapter->stats.tpt,
5467			"Total Packets Transmitted");
5468	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5469			CTLFLAG_RD, &adapter->stats.gptc,
5470			"Good Packets Transmitted");
5471	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5472			CTLFLAG_RD, &adapter->stats.bptc,
5473			"Broadcast Packets Transmitted");
5474	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5475			CTLFLAG_RD, &adapter->stats.mptc,
5476			"Multicast Packets Transmitted");
5477	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5478			CTLFLAG_RD, &adapter->stats.ptc64,
5479			"64 byte frames transmitted ");
5480	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5481			CTLFLAG_RD, &adapter->stats.ptc127,
5482			"65-127 byte frames transmitted");
5483	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5484			CTLFLAG_RD, &adapter->stats.ptc255,
5485			"128-255 byte frames transmitted");
5486	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5487			CTLFLAG_RD, &adapter->stats.ptc511,
5488			"256-511 byte frames transmitted");
5489	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5490			CTLFLAG_RD, &adapter->stats.ptc1023,
5491			"512-1023 byte frames transmitted");
5492	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5493			CTLFLAG_RD, &adapter->stats.ptc1522,
5494			"1024-1522 byte frames transmitted");
5495	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5496			CTLFLAG_RD, &adapter->stats.tsctc,
5497			"TSO Contexts Transmitted");
5498	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5499			CTLFLAG_RD, &adapter->stats.tsctfc,
5500			"TSO Contexts Failed");
5501
5502
5503	/* Interrupt Stats */
5504
5505	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5506				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5507	int_list = SYSCTL_CHILDREN(int_node);
5508
5509	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5510			CTLFLAG_RD, &adapter->stats.iac,
5511			"Interrupt Assertion Count");
5512
5513	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5514			CTLFLAG_RD, &adapter->stats.icrxptc,
5515			"Interrupt Cause Rx Pkt Timer Expire Count");
5516
5517	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5518			CTLFLAG_RD, &adapter->stats.icrxatc,
5519			"Interrupt Cause Rx Abs Timer Expire Count");
5520
5521	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5522			CTLFLAG_RD, &adapter->stats.ictxptc,
5523			"Interrupt Cause Tx Pkt Timer Expire Count");
5524
5525	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5526			CTLFLAG_RD, &adapter->stats.ictxatc,
5527			"Interrupt Cause Tx Abs Timer Expire Count");
5528
5529	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5530			CTLFLAG_RD, &adapter->stats.ictxqec,
5531			"Interrupt Cause Tx Queue Empty Count");
5532
5533	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5534			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5535			"Interrupt Cause Tx Queue Min Thresh Count");
5536
5537	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5538			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5539			"Interrupt Cause Rx Desc Min Thresh Count");
5540
5541	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5542			CTLFLAG_RD, &adapter->stats.icrxoc,
5543			"Interrupt Cause Receiver Overrun Count");
5544}
5545
5546/**********************************************************************
5547 *
5548 *  This routine provides a way to dump out the adapter eeprom,
5549 *  often a useful debug/service tool. This only dumps the first
5550 *  32 words, stuff that matters is in that extent.
5551 *
5552 **********************************************************************/
5553static int
5554em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5555{
5556	struct adapter *adapter = (struct adapter *)arg1;
5557	int error;
5558	int result;
5559
5560	result = -1;
5561	error = sysctl_handle_int(oidp, &result, 0, req);
5562
5563	if (error || !req->newptr)
5564		return (error);
5565
5566	/*
5567	 * This value will cause a hex dump of the
5568	 * first 32 16-bit words of the EEPROM to
5569	 * the screen.
5570	 */
5571	if (result == 1)
5572		em_print_nvm_info(adapter);
5573
5574	return (error);
5575}
5576
5577static void
5578em_print_nvm_info(struct adapter *adapter)
5579{
5580	u16	eeprom_data;
5581	int	i, j, row = 0;
5582
5583	/* Its a bit crude, but it gets the job done */
5584	printf("\nInterface EEPROM Dump:\n");
5585	printf("Offset\n0x0000  ");
5586	for (i = 0, j = 0; i < 32; i++, j++) {
5587		if (j == 8) { /* Make the offset block */
5588			j = 0; ++row;
5589			printf("\n0x00%x0  ",row);
5590		}
5591		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5592		printf("%04x ", eeprom_data);
5593	}
5594	printf("\n");
5595}
5596
5597static int
5598em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5599{
5600	struct em_int_delay_info *info;
5601	struct adapter *adapter;
5602	u32 regval;
5603	int error, usecs, ticks;
5604
5605	info = (struct em_int_delay_info *)arg1;
5606	usecs = info->value;
5607	error = sysctl_handle_int(oidp, &usecs, 0, req);
5608	if (error != 0 || req->newptr == NULL)
5609		return (error);
5610	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5611		return (EINVAL);
5612	info->value = usecs;
5613	ticks = EM_USECS_TO_TICKS(usecs);
5614
5615	adapter = info->adapter;
5616
5617	EM_CORE_LOCK(adapter);
5618	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5619	regval = (regval & ~0xffff) | (ticks & 0xffff);
5620	/* Handle a few special cases. */
5621	switch (info->offset) {
5622	case E1000_RDTR:
5623		break;
5624	case E1000_TIDV:
5625		if (ticks == 0) {
5626			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5627			/* Don't write 0 into the TIDV register. */
5628			regval++;
5629		} else
5630			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5631		break;
5632	}
5633	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5634	EM_CORE_UNLOCK(adapter);
5635	return (0);
5636}
5637
5638static void
5639em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5640	const char *description, struct em_int_delay_info *info,
5641	int offset, int value)
5642{
5643	info->adapter = adapter;
5644	info->offset = offset;
5645	info->value = value;
5646	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5647	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5648	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5649	    info, 0, em_sysctl_int_delay, "I", description);
5650}
5651
5652static void
5653em_set_sysctl_value(struct adapter *adapter, const char *name,
5654	const char *description, int *limit, int value)
5655{
5656	*limit = value;
5657	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5658	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5659	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5660}
5661
5662
5663/*
5664** Set flow control using sysctl:
5665** Flow control values:
5666**      0 - off
5667**      1 - rx pause
5668**      2 - tx pause
5669**      3 - full
5670*/
5671static int
5672em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5673{
5674        int		error;
5675	static int	input = 3; /* default is full */
5676        struct adapter	*adapter = (struct adapter *) arg1;
5677
5678        error = sysctl_handle_int(oidp, &input, 0, req);
5679
5680        if ((error) || (req->newptr == NULL))
5681                return (error);
5682
5683	if (input == adapter->fc) /* no change? */
5684		return (error);
5685
5686        switch (input) {
5687                case e1000_fc_rx_pause:
5688                case e1000_fc_tx_pause:
5689                case e1000_fc_full:
5690                case e1000_fc_none:
5691                        adapter->hw.fc.requested_mode = input;
5692			adapter->fc = input;
5693                        break;
5694                default:
5695			/* Do nothing */
5696			return (error);
5697        }
5698
5699        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5700        e1000_force_mac_fc(&adapter->hw);
5701        return (error);
5702}
5703
5704/*
5705** Manage Energy Efficient Ethernet:
5706** Control values:
5707**     0/1 - enabled/disabled
5708*/
5709static int
5710em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5711{
5712       struct adapter *adapter = (struct adapter *) arg1;
5713       int             error, value;
5714
5715       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5716       error = sysctl_handle_int(oidp, &value, 0, req);
5717       if (error || req->newptr == NULL)
5718               return (error);
5719       EM_CORE_LOCK(adapter);
5720       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5721       em_init_locked(adapter);
5722       EM_CORE_UNLOCK(adapter);
5723       return (0);
5724}
5725
5726static int
5727em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5728{
5729	struct adapter *adapter;
5730	int error;
5731	int result;
5732
5733	result = -1;
5734	error = sysctl_handle_int(oidp, &result, 0, req);
5735
5736	if (error || !req->newptr)
5737		return (error);
5738
5739	if (result == 1) {
5740		adapter = (struct adapter *)arg1;
5741		em_print_debug_info(adapter);
5742        }
5743
5744	return (error);
5745}
5746
5747/*
5748** This routine is meant to be fluid, add whatever is
5749** needed for debugging a problem.  -jfv
5750*/
5751static void
5752em_print_debug_info(struct adapter *adapter)
5753{
5754	device_t dev = adapter->dev;
5755	struct tx_ring *txr = adapter->tx_rings;
5756	struct rx_ring *rxr = adapter->rx_rings;
5757
5758	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5759		printf("Interface is RUNNING ");
5760	else
5761		printf("Interface is NOT RUNNING\n");
5762
5763	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5764		printf("and INACTIVE\n");
5765	else
5766		printf("and ACTIVE\n");
5767
5768	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5769	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5770	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5771	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5772	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5773	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5774	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5775	device_printf(dev, "TX descriptors avail = %d\n",
5776	    txr->tx_avail);
5777	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5778	    txr->no_desc_avail);
5779	device_printf(dev, "RX discarded packets = %ld\n",
5780	    rxr->rx_discarded);
5781	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5782	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5783}
5784