1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD$*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#ifndef __HAIKU__
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <machine/bus.h>
62#include <machine/resource.h>
63
64#include <net/bpf.h>
65#include <net/ethernet.h>
66#include <net/if.h>
67#include <net/if_arp.h>
68#include <net/if_dl.h>
69#include <net/if_media.h>
70
71#include <net/if_types.h>
72#include <net/if_vlan_var.h>
73
74#include <netinet/in_systm.h>
75#include <netinet/in.h>
76#include <netinet/if_ether.h>
77#include <netinet/ip.h>
78#include <netinet/ip6.h>
79#include <netinet/tcp.h>
80#include <netinet/udp.h>
81
82#include <machine/in_cksum.h>
83#ifndef __HAIKU__
84#include <dev/led/led.h>
85#endif
86#include <dev/pci/pcivar.h>
87#include <dev/pci/pcireg.h>
88
89#include "e1000_api.h"
90#include "e1000_82571.h"
91#include "if_em.h"
92
93/*********************************************************************
94 *  Set this to one to display debug statistics
95 *********************************************************************/
96int	em_display_debug_stats = 0;
97
98/*********************************************************************
99 *  Driver version:
100 *********************************************************************/
101char em_driver_version[] = "7.3.2";
102
103/*********************************************************************
104 *  PCI Device ID Table
105 *
106 *  Used by probe to select devices to load on
107 *  Last field stores an index into e1000_strings
108 *  Last entry must be all 0s
109 *
110 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111 *********************************************************************/
112
113static em_vendor_info_t em_vendor_info_array[] =
114{
115	/* Intel(R) PRO/1000 Network Connection */
116	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	/* required last entry */
180	{ 0, 0, 0, 0, 0}
181};
182
183/*********************************************************************
184 *  Table of branding strings for all supported NICs.
185 *********************************************************************/
186
187static char *em_strings[] = {
188	"Intel(R) PRO/1000 Network Connection"
189};
190
191/*********************************************************************
192 *  Function prototypes
193 *********************************************************************/
194static int	em_probe(device_t);
195static int	em_attach(device_t);
196static int	em_detach(device_t);
197static int	em_shutdown(device_t);
198static int	em_suspend(device_t);
199static int	em_resume(device_t);
200#ifdef EM_MULTIQUEUE
201static int	em_mq_start(struct ifnet *, struct mbuf *);
202static int	em_mq_start_locked(struct ifnet *,
203		    struct tx_ring *, struct mbuf *);
204static void	em_qflush(struct ifnet *);
205#else
206static void	em_start(struct ifnet *);
207static void	em_start_locked(struct ifnet *, struct tx_ring *);
208#endif
209static int	em_ioctl(struct ifnet *, u_long, caddr_t);
210static void	em_init(void *);
211static void	em_init_locked(struct adapter *);
212static void	em_stop(void *);
213static void	em_media_status(struct ifnet *, struct ifmediareq *);
214static int	em_media_change(struct ifnet *);
215static void	em_identify_hardware(struct adapter *);
216static int	em_allocate_pci_resources(struct adapter *);
217static int	em_allocate_legacy(struct adapter *);
218static int	em_allocate_msix(struct adapter *);
219static int	em_allocate_queues(struct adapter *);
220static int	em_setup_msix(struct adapter *);
221static void	em_free_pci_resources(struct adapter *);
222static void	em_local_timer(void *);
223static void	em_reset(struct adapter *);
224static int	em_setup_interface(device_t, struct adapter *);
225
226static void	em_setup_transmit_structures(struct adapter *);
227static void	em_initialize_transmit_unit(struct adapter *);
228static int	em_allocate_transmit_buffers(struct tx_ring *);
229static void	em_free_transmit_structures(struct adapter *);
230static void	em_free_transmit_buffers(struct tx_ring *);
231
232static int	em_setup_receive_structures(struct adapter *);
233static int	em_allocate_receive_buffers(struct rx_ring *);
234static void	em_initialize_receive_unit(struct adapter *);
235static void	em_free_receive_structures(struct adapter *);
236static void	em_free_receive_buffers(struct rx_ring *);
237
238static void	em_enable_intr(struct adapter *);
239static void	em_disable_intr(struct adapter *);
240static void	em_update_stats_counters(struct adapter *);
241static void	em_add_hw_stats(struct adapter *adapter);
242static void	em_txeof(struct tx_ring *);
243static bool	em_rxeof(struct rx_ring *, int, int *);
244#ifndef __NO_STRICT_ALIGNMENT
245static int	em_fixup_rx(struct rx_ring *);
246#endif
247static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
248static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
249		    struct ip *, u32 *, u32 *);
250static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
251		    struct tcphdr *, u32 *, u32 *);
252static void	em_set_promisc(struct adapter *);
253static void	em_disable_promisc(struct adapter *);
254static void	em_set_multi(struct adapter *);
255static void	em_update_link_status(struct adapter *);
256static void	em_refresh_mbufs(struct rx_ring *, int);
257static void	em_register_vlan(void *, struct ifnet *, u16);
258static void	em_unregister_vlan(void *, struct ifnet *, u16);
259static void	em_setup_vlan_hw_support(struct adapter *);
260static int	em_xmit(struct tx_ring *, struct mbuf **);
261static int	em_dma_malloc(struct adapter *, bus_size_t,
262		    struct em_dma_alloc *, int);
263static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
264static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
265static void	em_print_nvm_info(struct adapter *);
266static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
267static void	em_print_debug_info(struct adapter *);
268static int 	em_is_valid_ether_addr(u8 *);
269static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
270static void	em_add_int_delay_sysctl(struct adapter *, const char *,
271		    const char *, struct em_int_delay_info *, int, int);
272/* Management and WOL Support */
273static void	em_init_manageability(struct adapter *);
274static void	em_release_manageability(struct adapter *);
275static void     em_get_hw_control(struct adapter *);
276static void     em_release_hw_control(struct adapter *);
277static void	em_get_wakeup(device_t);
278static void     em_enable_wakeup(device_t);
279static int	em_enable_phy_wakeup(struct adapter *);
280static void	em_led_func(void *, int);
281static void	em_disable_aspm(struct adapter *);
282
283static int	em_irq_fast(void *);
284
285/* MSIX handlers */
286static void	em_msix_tx(void *);
287static void	em_msix_rx(void *);
288static void	em_msix_link(void *);
289static void	em_handle_tx(void *context, int pending);
290static void	em_handle_rx(void *context, int pending);
291static void	em_handle_link(void *context, int pending);
292
293static void	em_set_sysctl_value(struct adapter *, const char *,
294		    const char *, int *, int);
295static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
296static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
297
298static __inline void em_rx_discard(struct rx_ring *, int);
299
300#ifdef DEVICE_POLLING
301static poll_handler_t em_poll;
302#endif /* POLLING */
303
304/*********************************************************************
305 *  FreeBSD Device Interface Entry Points
306 *********************************************************************/
307
308static device_method_t em_methods[] = {
309	/* Device interface */
310	DEVMETHOD(device_probe, em_probe),
311	DEVMETHOD(device_attach, em_attach),
312	DEVMETHOD(device_detach, em_detach),
313	DEVMETHOD(device_shutdown, em_shutdown),
314	DEVMETHOD(device_suspend, em_suspend),
315	DEVMETHOD(device_resume, em_resume),
316	{0, 0}
317};
318
319static driver_t em_driver = {
320	"em", em_methods, sizeof(struct adapter),
321};
322
323devclass_t em_devclass;
324DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
325MODULE_DEPEND(em, pci, 1, 1, 1);
326MODULE_DEPEND(em, ether, 1, 1, 1);
327
328/*********************************************************************
329 *  Tunable default values.
330 *********************************************************************/
331
332#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
333#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
334#define M_TSO_LEN			66
335
336/* Allow common code without TSO */
337#ifndef CSUM_TSO
338#define CSUM_TSO	0
339#endif
340
341static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
342
343static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
344static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
345TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
346TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
347SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
348    0, "Default transmit interrupt delay in usecs");
349SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
350    0, "Default receive interrupt delay in usecs");
351
352static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
353static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
354TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
355TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
356SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
357    &em_tx_abs_int_delay_dflt, 0,
358    "Default transmit interrupt delay limit in usecs");
359SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
360    &em_rx_abs_int_delay_dflt, 0,
361    "Default receive interrupt delay limit in usecs");
362
363static int em_rxd = EM_DEFAULT_RXD;
364static int em_txd = EM_DEFAULT_TXD;
365TUNABLE_INT("hw.em.rxd", &em_rxd);
366TUNABLE_INT("hw.em.txd", &em_txd);
367SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
368    "Number of receive descriptors per queue");
369SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
370    "Number of transmit descriptors per queue");
371
372static int em_smart_pwr_down = FALSE;
373TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
374SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
375    0, "Set to true to leave smart power down enabled on newer adapters");
376
377/* Controls whether promiscuous also shows bad packets */
378static int em_debug_sbp = FALSE;
379TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
380SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
381    "Show bad packets in promiscuous mode");
382
383static int em_enable_msix = TRUE;
384TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
385SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
386    "Enable MSI-X interrupts");
387
388/* How many packets rxeof tries to clean at a time */
389static int em_rx_process_limit = 100;
390TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
391SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
392    &em_rx_process_limit, 0,
393    "Maximum number of received packets to process "
394    "at a time, -1 means unlimited");
395
396/* Energy efficient ethernet - default to OFF */
397static int eee_setting = 1;
398TUNABLE_INT("hw.em.eee_setting", &eee_setting);
399SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
400    "Enable Energy Efficient Ethernet");
401
402/* Global used in WOL setup with multiport cards */
403static int global_quad_port_a = 0;
404
405#ifdef DEV_NETMAP	/* see ixgbe.c for details */
406#include <dev/netmap/if_em_netmap.h>
407#endif /* DEV_NETMAP */
408
409/*********************************************************************
410 *  Device identification routine
411 *
412 *  em_probe determines if the driver should be loaded on
413 *  adapter based on PCI vendor/device id of the adapter.
414 *
415 *  return BUS_PROBE_DEFAULT on success, positive on failure
416 *********************************************************************/
417
418static int
419em_probe(device_t dev)
420{
421	char		adapter_name[60];
422	u16		pci_vendor_id = 0;
423	u16		pci_device_id = 0;
424	u16		pci_subvendor_id = 0;
425	u16		pci_subdevice_id = 0;
426	em_vendor_info_t *ent;
427
428	INIT_DEBUGOUT("em_probe: begin");
429
430	pci_vendor_id = pci_get_vendor(dev);
431	if (pci_vendor_id != EM_VENDOR_ID)
432		return (ENXIO);
433
434	pci_device_id = pci_get_device(dev);
435	pci_subvendor_id = pci_get_subvendor(dev);
436	pci_subdevice_id = pci_get_subdevice(dev);
437
438	ent = em_vendor_info_array;
439	while (ent->vendor_id != 0) {
440		if ((pci_vendor_id == ent->vendor_id) &&
441		    (pci_device_id == ent->device_id) &&
442
443		    ((pci_subvendor_id == ent->subvendor_id) ||
444		    (ent->subvendor_id == PCI_ANY_ID)) &&
445
446		    ((pci_subdevice_id == ent->subdevice_id) ||
447		    (ent->subdevice_id == PCI_ANY_ID))) {
448			sprintf(adapter_name, "%s %s",
449				em_strings[ent->index],
450				em_driver_version);
451			device_set_desc_copy(dev, adapter_name);
452			return (BUS_PROBE_DEFAULT);
453		}
454		ent++;
455	}
456
457	return (ENXIO);
458}
459
460/*********************************************************************
461 *  Device initialization routine
462 *
463 *  The attach entry point is called when the driver is being loaded.
464 *  This routine identifies the type of hardware, allocates all resources
465 *  and initializes the hardware.
466 *
467 *  return 0 on success, positive on failure
468 *********************************************************************/
469
470static int
471em_attach(device_t dev)
472{
473	struct adapter	*adapter;
474	struct e1000_hw	*hw;
475	int		error = 0;
476
477	INIT_DEBUGOUT("em_attach: begin");
478
479#ifndef __HAIKU__
480	if (resource_disabled("em", device_get_unit(dev))) {
481		device_printf(dev, "Disabled by device hint\n");
482		return (ENXIO);
483	}
484#endif
485
486	adapter = device_get_softc(dev);
487	adapter->dev = adapter->osdep.dev = dev;
488	hw = &adapter->hw;
489	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
490
491	/* SYSCTL stuff */
492	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
493	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
494	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
495	    em_sysctl_nvm_info, "I", "NVM Information");
496
497	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500	    em_sysctl_debug_info, "I", "Debug Information");
501
502	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505	    em_set_flowcntl, "I", "Flow Control");
506
507	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
508
509	/* Determine hardware and mac info */
510	em_identify_hardware(adapter);
511
512	/* Setup PCI resources */
513	if (em_allocate_pci_resources(adapter)) {
514		device_printf(dev, "Allocation of PCI resources failed\n");
515		error = ENXIO;
516		goto err_pci;
517	}
518
519	/*
520	** For ICH8 and family we need to
521	** map the flash memory, and this
522	** must happen after the MAC is
523	** identified
524	*/
525	if ((hw->mac.type == e1000_ich8lan) ||
526	    (hw->mac.type == e1000_ich9lan) ||
527	    (hw->mac.type == e1000_ich10lan) ||
528	    (hw->mac.type == e1000_pchlan) ||
529	    (hw->mac.type == e1000_pch2lan)) {
530		int rid = EM_BAR_TYPE_FLASH;
531		adapter->flash = bus_alloc_resource_any(dev,
532		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
533		if (adapter->flash == NULL) {
534			device_printf(dev, "Mapping of Flash failed\n");
535			error = ENXIO;
536			goto err_pci;
537		}
538		/* This is used in the shared code */
539		hw->flash_address = (u8 *)adapter->flash;
540		adapter->osdep.flash_bus_space_tag =
541		    rman_get_bustag(adapter->flash);
542		adapter->osdep.flash_bus_space_handle =
543		    rman_get_bushandle(adapter->flash);
544	}
545
546	/* Do Shared Code initialization */
547	if (e1000_setup_init_funcs(hw, TRUE)) {
548		device_printf(dev, "Setup of Shared code failed\n");
549		error = ENXIO;
550		goto err_pci;
551	}
552
553	e1000_get_bus_info(hw);
554
555	/* Set up some sysctls for the tunable interrupt delays */
556	em_add_int_delay_sysctl(adapter, "rx_int_delay",
557	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
558	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
559	em_add_int_delay_sysctl(adapter, "tx_int_delay",
560	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
561	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
562	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
563	    "receive interrupt delay limit in usecs",
564	    &adapter->rx_abs_int_delay,
565	    E1000_REGISTER(hw, E1000_RADV),
566	    em_rx_abs_int_delay_dflt);
567	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
568	    "transmit interrupt delay limit in usecs",
569	    &adapter->tx_abs_int_delay,
570	    E1000_REGISTER(hw, E1000_TADV),
571	    em_tx_abs_int_delay_dflt);
572
573	/* Sysctl for limiting the amount of work done in the taskqueue */
574	em_set_sysctl_value(adapter, "rx_processing_limit",
575	    "max number of rx packets to process", &adapter->rx_process_limit,
576	    em_rx_process_limit);
577
578	/*
579	 * Validate number of transmit and receive descriptors. It
580	 * must not exceed hardware maximum, and must be multiple
581	 * of E1000_DBA_ALIGN.
582	 */
583	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
584	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
585		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
586		    EM_DEFAULT_TXD, em_txd);
587		adapter->num_tx_desc = EM_DEFAULT_TXD;
588	} else
589		adapter->num_tx_desc = em_txd;
590
591	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
592	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
593		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
594		    EM_DEFAULT_RXD, em_rxd);
595		adapter->num_rx_desc = EM_DEFAULT_RXD;
596	} else
597		adapter->num_rx_desc = em_rxd;
598
599	hw->mac.autoneg = DO_AUTO_NEG;
600	hw->phy.autoneg_wait_to_complete = FALSE;
601	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
602
603	/* Copper options */
604	if (hw->phy.media_type == e1000_media_type_copper) {
605		hw->phy.mdix = AUTO_ALL_MODES;
606		hw->phy.disable_polarity_correction = FALSE;
607		hw->phy.ms_type = EM_MASTER_SLAVE;
608	}
609
610	/*
611	 * Set the frame limits assuming
612	 * standard ethernet sized frames.
613	 */
614	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
615	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
616
617	/*
618	 * This controls when hardware reports transmit completion
619	 * status.
620	 */
621	hw->mac.report_tx_early = 1;
622
623	/*
624	** Get queue/ring memory
625	*/
626	if (em_allocate_queues(adapter)) {
627		error = ENOMEM;
628		goto err_pci;
629	}
630
631	/* Allocate multicast array memory. */
632	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
633	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
634	if (adapter->mta == NULL) {
635		device_printf(dev, "Can not allocate multicast setup array\n");
636		error = ENOMEM;
637		goto err_late;
638	}
639
640	/* Check SOL/IDER usage */
641	if (e1000_check_reset_block(hw))
642		device_printf(dev, "PHY reset is blocked"
643		    " due to SOL/IDER session.\n");
644
645	/* Sysctl for setting Energy Efficient Ethernet */
646	hw->dev_spec.ich8lan.eee_disable = eee_setting;
647	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
648	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
649	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
650	    adapter, 0, em_sysctl_eee, "I",
651	    "Disable Energy Efficient Ethernet");
652
653	/*
654	** Start from a known state, this is
655	** important in reading the nvm and
656	** mac from that.
657	*/
658	e1000_reset_hw(hw);
659
660
661	/* Make sure we have a good EEPROM before we read from it */
662	if (e1000_validate_nvm_checksum(hw) < 0) {
663		/*
664		** Some PCI-E parts fail the first check due to
665		** the link being in sleep state, call it again,
666		** if it fails a second time its a real issue.
667		*/
668		if (e1000_validate_nvm_checksum(hw) < 0) {
669			device_printf(dev,
670			    "The EEPROM Checksum Is Not Valid\n");
671			error = EIO;
672			goto err_late;
673		}
674	}
675
676	/* Copy the permanent MAC address out of the EEPROM */
677	if (e1000_read_mac_addr(hw) < 0) {
678		device_printf(dev, "EEPROM read error while reading MAC"
679		    " address\n");
680		error = EIO;
681		goto err_late;
682	}
683
684	if (!em_is_valid_ether_addr(hw->mac.addr)) {
685		device_printf(dev, "Invalid MAC address\n");
686		error = EIO;
687		goto err_late;
688	}
689
690	/*
691	**  Do interrupt configuration
692	*/
693	if (adapter->msix > 1) /* Do MSIX */
694		error = em_allocate_msix(adapter);
695	else  /* MSI or Legacy */
696		error = em_allocate_legacy(adapter);
697	if (error)
698		goto err_late;
699
700	/*
701	 * Get Wake-on-Lan and Management info for later use
702	 */
703	em_get_wakeup(dev);
704
705	/* Setup OS specific network interface */
706	if (em_setup_interface(dev, adapter) != 0)
707		goto err_late;
708
709	em_reset(adapter);
710
711	/* Initialize statistics */
712	em_update_stats_counters(adapter);
713
714	hw->mac.get_link_status = 1;
715	em_update_link_status(adapter);
716
717	/* Register for VLAN events */
718	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
719	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
720	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
721	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
722
723	em_add_hw_stats(adapter);
724
725	/* Non-AMT based hardware can now take control from firmware */
726	if (adapter->has_manage && !adapter->has_amt)
727		em_get_hw_control(adapter);
728
729	/* Tell the stack that the interface is not active */
730	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
731	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
732
733#ifndef __HAIKU__
734	adapter->led_dev = led_create(em_led_func, adapter,
735	    device_get_nameunit(dev));
736#endif
737#ifdef DEV_NETMAP
738	em_netmap_attach(adapter);
739#endif /* DEV_NETMAP */
740
741	INIT_DEBUGOUT("em_attach: end");
742
743	return (0);
744
745err_late:
746	em_free_transmit_structures(adapter);
747	em_free_receive_structures(adapter);
748	em_release_hw_control(adapter);
749	if (adapter->ifp != NULL)
750		if_free(adapter->ifp);
751err_pci:
752	em_free_pci_resources(adapter);
753	free(adapter->mta, M_DEVBUF);
754	EM_CORE_LOCK_DESTROY(adapter);
755
756	return (error);
757}
758
759/*********************************************************************
760 *  Device removal routine
761 *
762 *  The detach entry point is called when the driver is being removed.
763 *  This routine stops the adapter and deallocates all the resources
764 *  that were allocated for driver operation.
765 *
766 *  return 0 on success, positive on failure
767 *********************************************************************/
768
769static int
770em_detach(device_t dev)
771{
772	struct adapter	*adapter = device_get_softc(dev);
773	struct ifnet	*ifp = adapter->ifp;
774
775	INIT_DEBUGOUT("em_detach: begin");
776
777	/* Make sure VLANS are not using driver */
778	if (adapter->ifp->if_vlantrunk != NULL) {
779		device_printf(dev,"Vlan in use, detach first\n");
780		return (EBUSY);
781	}
782
783#ifdef DEVICE_POLLING
784	if (ifp->if_capenable & IFCAP_POLLING)
785		ether_poll_deregister(ifp);
786#endif
787#ifndef __HAIKU__
788	if (adapter->led_dev != NULL)
789		led_destroy(adapter->led_dev);
790#endif
791
792	EM_CORE_LOCK(adapter);
793	adapter->in_detach = 1;
794	em_stop(adapter);
795	EM_CORE_UNLOCK(adapter);
796	EM_CORE_LOCK_DESTROY(adapter);
797
798	e1000_phy_hw_reset(&adapter->hw);
799
800	em_release_manageability(adapter);
801	em_release_hw_control(adapter);
802
803	/* Unregister VLAN events */
804	if (adapter->vlan_attach != NULL)
805		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
806	if (adapter->vlan_detach != NULL)
807		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
808
809	ether_ifdetach(adapter->ifp);
810	callout_drain(&adapter->timer);
811
812#ifdef DEV_NETMAP
813	netmap_detach(ifp);
814#endif /* DEV_NETMAP */
815
816	em_free_pci_resources(adapter);
817	bus_generic_detach(dev);
818	if_free(ifp);
819
820	em_free_transmit_structures(adapter);
821	em_free_receive_structures(adapter);
822
823	em_release_hw_control(adapter);
824	free(adapter->mta, M_DEVBUF);
825
826	return (0);
827}
828
829/*********************************************************************
830 *
831 *  Shutdown entry point
832 *
833 **********************************************************************/
834
835static int
836em_shutdown(device_t dev)
837{
838	return em_suspend(dev);
839}
840
841/*
842 * Suspend/resume device methods.
843 */
844static int
845em_suspend(device_t dev)
846{
847	struct adapter *adapter = device_get_softc(dev);
848
849	EM_CORE_LOCK(adapter);
850
851        em_release_manageability(adapter);
852	em_release_hw_control(adapter);
853	em_enable_wakeup(dev);
854
855	EM_CORE_UNLOCK(adapter);
856
857	return bus_generic_suspend(dev);
858}
859
860static int
861em_resume(device_t dev)
862{
863	struct adapter *adapter = device_get_softc(dev);
864	struct tx_ring	*txr = adapter->tx_rings;
865	struct ifnet *ifp = adapter->ifp;
866
867	EM_CORE_LOCK(adapter);
868	if (adapter->hw.mac.type == e1000_pch2lan)
869		e1000_resume_workarounds_pchlan(&adapter->hw);
870	em_init_locked(adapter);
871	em_init_manageability(adapter);
872
873	if ((ifp->if_flags & IFF_UP) &&
874	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
875		int i;
876		for (i = 0; i < adapter->num_queues; i++, txr++) {
877			EM_TX_LOCK(txr);
878#ifdef EM_MULTIQUEUE
879			if (!drbr_empty(ifp, txr->br))
880				em_mq_start_locked(ifp, txr, NULL);
881#else
882			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
883				em_start_locked(ifp, txr);
884#endif
885			EM_TX_UNLOCK(txr);
886		}
887	}
888	EM_CORE_UNLOCK(adapter);
889
890	return bus_generic_resume(dev);
891}
892
893
894#ifdef EM_MULTIQUEUE
895/*********************************************************************
896 *  Multiqueue Transmit routines
897 *
898 *  em_mq_start is called by the stack to initiate a transmit.
899 *  however, if busy the driver can queue the request rather
900 *  than do an immediate send. It is this that is an advantage
901 *  in this driver, rather than also having multiple tx queues.
902 **********************************************************************/
903static int
904em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
905{
906	struct adapter  *adapter = txr->adapter;
907        struct mbuf     *next;
908        int             err = 0, enq = 0;
909
910	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
911	    IFF_DRV_RUNNING || adapter->link_active == 0) {
912		if (m != NULL)
913			err = drbr_enqueue(ifp, txr->br, m);
914		return (err);
915	}
916
917	enq = 0;
918	if (m == NULL) {
919		next = drbr_dequeue(ifp, txr->br);
920	} else if (drbr_needs_enqueue(ifp, txr->br)) {
921		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
922			return (err);
923		next = drbr_dequeue(ifp, txr->br);
924	} else
925		next = m;
926
927	/* Process the queue */
928	while (next != NULL) {
929		if ((err = em_xmit(txr, &next)) != 0) {
930                        if (next != NULL)
931                                err = drbr_enqueue(ifp, txr->br, next);
932                        break;
933		}
934		enq++;
935		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
936		ETHER_BPF_MTAP(ifp, next);
937		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
938                        break;
939		next = drbr_dequeue(ifp, txr->br);
940	}
941
942	if (enq > 0) {
943                /* Set the watchdog */
944                txr->queue_status = EM_QUEUE_WORKING;
945		txr->watchdog_time = ticks;
946	}
947
948	if (txr->tx_avail < EM_MAX_SCATTER)
949		em_txeof(txr);
950	if (txr->tx_avail < EM_MAX_SCATTER)
951		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
952	return (err);
953}
954
955/*
956** Multiqueue capable stack interface
957*/
958static int
959em_mq_start(struct ifnet *ifp, struct mbuf *m)
960{
961	struct adapter	*adapter = ifp->if_softc;
962	struct tx_ring	*txr = adapter->tx_rings;
963	int 		error;
964
965	if (EM_TX_TRYLOCK(txr)) {
966		error = em_mq_start_locked(ifp, txr, m);
967		EM_TX_UNLOCK(txr);
968	} else
969		error = drbr_enqueue(ifp, txr->br, m);
970
971	return (error);
972}
973
974/*
975** Flush all ring buffers
976*/
977static void
978em_qflush(struct ifnet *ifp)
979{
980	int i;
981	struct adapter  *adapter = ifp->if_softc;
982	struct tx_ring  *txr = adapter->tx_rings;
983	struct mbuf     *m;
984
985	for (i = 0; i < adapter->num_queues; i++, txr++) {
986		EM_TX_LOCK(txr);
987		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
988			m_freem(m);
989		EM_TX_UNLOCK(txr);
990	}
991	if_qflush(ifp);
992}
993#else  /* !EM_MULTIQUEUE */
994
995static void
996em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
997{
998	struct adapter	*adapter = ifp->if_softc;
999	struct mbuf	*m_head;
1000
1001	EM_TX_LOCK_ASSERT(txr);
1002
1003	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1004	    IFF_DRV_RUNNING)
1005		return;
1006
1007	if (!adapter->link_active)
1008		return;
1009
1010	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1011        	/* Call cleanup if number of TX descriptors low */
1012		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1013			em_txeof(txr);
1014		if (txr->tx_avail < EM_MAX_SCATTER) {
1015			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1016			break;
1017		}
1018                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1019		if (m_head == NULL)
1020			break;
1021		/*
1022		 *  Encapsulation can modify our pointer, and or make it
1023		 *  NULL on failure.  In that event, we can't requeue.
1024		 */
1025		if (em_xmit(txr, &m_head)) {
1026			if (m_head == NULL)
1027				break;
1028			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1029			break;
1030		}
1031
1032		/* Send a copy of the frame to the BPF listener */
1033		ETHER_BPF_MTAP(ifp, m_head);
1034
1035		/* Set timeout in case hardware has problems transmitting. */
1036		txr->watchdog_time = ticks;
1037                txr->queue_status = EM_QUEUE_WORKING;
1038	}
1039
1040	return;
1041}
1042
1043static void
1044em_start(struct ifnet *ifp)
1045{
1046	struct adapter	*adapter = ifp->if_softc;
1047	struct tx_ring	*txr = adapter->tx_rings;
1048
1049	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1050		EM_TX_LOCK(txr);
1051		em_start_locked(ifp, txr);
1052		EM_TX_UNLOCK(txr);
1053	}
1054	return;
1055}
1056#endif /* EM_MULTIQUEUE */
1057
1058/*********************************************************************
1059 *  Ioctl entry point
1060 *
1061 *  em_ioctl is called when the user wants to configure the
1062 *  interface.
1063 *
1064 *  return 0 on success, positive on failure
1065 **********************************************************************/
1066
1067static int
1068em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1069{
1070	struct adapter	*adapter = ifp->if_softc;
1071	struct ifreq	*ifr = (struct ifreq *)data;
1072#if defined(INET) || defined(INET6)
1073	struct ifaddr	*ifa = (struct ifaddr *)data;
1074#endif
1075	bool		avoid_reset = FALSE;
1076	int		error = 0;
1077
1078	if (adapter->in_detach)
1079		return (error);
1080
1081	switch (command) {
1082	case SIOCSIFADDR:
1083#ifdef INET
1084		if (ifa->ifa_addr->sa_family == AF_INET)
1085			avoid_reset = TRUE;
1086#endif
1087#ifdef INET6
1088		if (ifa->ifa_addr->sa_family == AF_INET6)
1089			avoid_reset = TRUE;
1090#endif
1091		/*
1092		** Calling init results in link renegotiation,
1093		** so we avoid doing it when possible.
1094		*/
1095		if (avoid_reset) {
1096			ifp->if_flags |= IFF_UP;
1097			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1098				em_init(adapter);
1099#ifdef INET
1100			if (!(ifp->if_flags & IFF_NOARP))
1101				arp_ifinit(ifp, ifa);
1102#endif
1103		} else
1104			error = ether_ioctl(ifp, command, data);
1105		break;
1106	case SIOCSIFMTU:
1107	    {
1108		int max_frame_size;
1109
1110		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1111
1112		EM_CORE_LOCK(adapter);
1113		switch (adapter->hw.mac.type) {
1114		case e1000_82571:
1115		case e1000_82572:
1116		case e1000_ich9lan:
1117		case e1000_ich10lan:
1118		case e1000_pch2lan:
1119		case e1000_82574:
1120		case e1000_82583:
1121		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1122			max_frame_size = 9234;
1123			break;
1124		case e1000_pchlan:
1125			max_frame_size = 4096;
1126			break;
1127			/* Adapters that do not support jumbo frames */
1128		case e1000_ich8lan:
1129			max_frame_size = ETHER_MAX_LEN;
1130			break;
1131		default:
1132			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1133		}
1134		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1135		    ETHER_CRC_LEN) {
1136			EM_CORE_UNLOCK(adapter);
1137			error = EINVAL;
1138			break;
1139		}
1140
1141		ifp->if_mtu = ifr->ifr_mtu;
1142		adapter->max_frame_size =
1143		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1144		em_init_locked(adapter);
1145		EM_CORE_UNLOCK(adapter);
1146		break;
1147	    }
1148	case SIOCSIFFLAGS:
1149		IOCTL_DEBUGOUT("ioctl rcv'd:\
1150		    SIOCSIFFLAGS (Set Interface Flags)");
1151		EM_CORE_LOCK(adapter);
1152		if (ifp->if_flags & IFF_UP) {
1153			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1154				if ((ifp->if_flags ^ adapter->if_flags) &
1155				    (IFF_PROMISC | IFF_ALLMULTI)) {
1156					em_disable_promisc(adapter);
1157					em_set_promisc(adapter);
1158				}
1159			} else
1160				em_init_locked(adapter);
1161		} else
1162			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1163				em_stop(adapter);
1164		adapter->if_flags = ifp->if_flags;
1165		EM_CORE_UNLOCK(adapter);
1166		break;
1167	case SIOCADDMULTI:
1168	case SIOCDELMULTI:
1169		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1170		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1171			EM_CORE_LOCK(adapter);
1172			em_disable_intr(adapter);
1173			em_set_multi(adapter);
1174#ifdef DEVICE_POLLING
1175			if (!(ifp->if_capenable & IFCAP_POLLING))
1176#endif
1177				em_enable_intr(adapter);
1178			EM_CORE_UNLOCK(adapter);
1179		}
1180		break;
1181	case SIOCSIFMEDIA:
1182		/* Check SOL/IDER usage */
1183		EM_CORE_LOCK(adapter);
1184		if (e1000_check_reset_block(&adapter->hw)) {
1185			EM_CORE_UNLOCK(adapter);
1186			device_printf(adapter->dev, "Media change is"
1187			    " blocked due to SOL/IDER session.\n");
1188			break;
1189		}
1190		EM_CORE_UNLOCK(adapter);
1191		/* falls thru */
1192	case SIOCGIFMEDIA:
1193		IOCTL_DEBUGOUT("ioctl rcv'd: \
1194		    SIOCxIFMEDIA (Get/Set Interface Media)");
1195		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1196		break;
1197	case SIOCSIFCAP:
1198	    {
1199		int mask, reinit;
1200
1201		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1202		reinit = 0;
1203		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1204#ifdef DEVICE_POLLING
1205		if (mask & IFCAP_POLLING) {
1206			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1207				error = ether_poll_register(em_poll, ifp);
1208				if (error)
1209					return (error);
1210				EM_CORE_LOCK(adapter);
1211				em_disable_intr(adapter);
1212				ifp->if_capenable |= IFCAP_POLLING;
1213				EM_CORE_UNLOCK(adapter);
1214			} else {
1215				error = ether_poll_deregister(ifp);
1216				/* Enable interrupt even in error case */
1217				EM_CORE_LOCK(adapter);
1218				em_enable_intr(adapter);
1219				ifp->if_capenable &= ~IFCAP_POLLING;
1220				EM_CORE_UNLOCK(adapter);
1221			}
1222		}
1223#endif
1224		if (mask & IFCAP_HWCSUM) {
1225			ifp->if_capenable ^= IFCAP_HWCSUM;
1226			reinit = 1;
1227		}
1228		if (mask & IFCAP_TSO4) {
1229			ifp->if_capenable ^= IFCAP_TSO4;
1230			reinit = 1;
1231		}
1232		if (mask & IFCAP_VLAN_HWTAGGING) {
1233			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1234			reinit = 1;
1235		}
1236		if (mask & IFCAP_VLAN_HWFILTER) {
1237			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1238			reinit = 1;
1239		}
1240		if (mask & IFCAP_VLAN_HWTSO) {
1241			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1242			reinit = 1;
1243		}
1244		if ((mask & IFCAP_WOL) &&
1245		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1246			if (mask & IFCAP_WOL_MCAST)
1247				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1248			if (mask & IFCAP_WOL_MAGIC)
1249				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1250		}
1251		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1252			em_init(adapter);
1253		VLAN_CAPABILITIES(ifp);
1254		break;
1255	    }
1256
1257	default:
1258		error = ether_ioctl(ifp, command, data);
1259		break;
1260	}
1261
1262	return (error);
1263}
1264
1265
1266/*********************************************************************
1267 *  Init entry point
1268 *
1269 *  This routine is used in two ways. It is used by the stack as
1270 *  init entry point in network interface structure. It is also used
1271 *  by the driver as a hw/sw initialization routine to get to a
1272 *  consistent state.
1273 *
1274 *  return 0 on success, positive on failure
1275 **********************************************************************/
1276
1277static void
1278em_init_locked(struct adapter *adapter)
1279{
1280	struct ifnet	*ifp = adapter->ifp;
1281	device_t	dev = adapter->dev;
1282
1283	INIT_DEBUGOUT("em_init: begin");
1284
1285	EM_CORE_LOCK_ASSERT(adapter);
1286
1287	em_disable_intr(adapter);
1288	callout_stop(&adapter->timer);
1289
1290	/* Get the latest mac address, User can use a LAA */
1291        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1292              ETHER_ADDR_LEN);
1293
1294	/* Put the address into the Receive Address Array */
1295	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1296
1297	/*
1298	 * With the 82571 adapter, RAR[0] may be overwritten
1299	 * when the other port is reset, we make a duplicate
1300	 * in RAR[14] for that eventuality, this assures
1301	 * the interface continues to function.
1302	 */
1303	if (adapter->hw.mac.type == e1000_82571) {
1304		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1305		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1306		    E1000_RAR_ENTRIES - 1);
1307	}
1308
1309	/* Initialize the hardware */
1310	em_reset(adapter);
1311	em_update_link_status(adapter);
1312
1313	/* Setup VLAN support, basic and offload if available */
1314	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1315
1316	/* Set hardware offload abilities */
1317	ifp->if_hwassist = 0;
1318	if (ifp->if_capenable & IFCAP_TXCSUM)
1319		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1320	if (ifp->if_capenable & IFCAP_TSO4)
1321		ifp->if_hwassist |= CSUM_TSO;
1322
1323	/* Configure for OS presence */
1324	em_init_manageability(adapter);
1325
1326	/* Prepare transmit descriptors and buffers */
1327	em_setup_transmit_structures(adapter);
1328	em_initialize_transmit_unit(adapter);
1329
1330	/* Setup Multicast table */
1331	em_set_multi(adapter);
1332
1333	/*
1334	** Figure out the desired mbuf
1335	** pool for doing jumbos
1336	*/
1337	if (adapter->max_frame_size <= 2048)
1338		adapter->rx_mbuf_sz = MCLBYTES;
1339	else if (adapter->max_frame_size <= 4096)
1340		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341	else
1342		adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344	/* Prepare receive descriptors and buffers */
1345	if (em_setup_receive_structures(adapter)) {
1346		device_printf(dev, "Could not setup receive structures\n");
1347		em_stop(adapter);
1348		return;
1349	}
1350	em_initialize_receive_unit(adapter);
1351
1352	/* Use real VLAN Filter support? */
1353	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1354		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1355			/* Use real VLAN Filter support */
1356			em_setup_vlan_hw_support(adapter);
1357		else {
1358			u32 ctrl;
1359			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1360			ctrl |= E1000_CTRL_VME;
1361			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1362		}
1363	}
1364
1365	/* Don't lose promiscuous settings */
1366	em_set_promisc(adapter);
1367
1368	/* Set the interface as ACTIVE */
1369	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1370	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1371
1372	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1373	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1374
1375	/* MSI/X configuration for 82574 */
1376	if (adapter->hw.mac.type == e1000_82574) {
1377		int tmp;
1378		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1379		tmp |= E1000_CTRL_EXT_PBA_CLR;
1380		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1381		/* Set the IVAR - interrupt vector routing. */
1382		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1383	}
1384
1385#ifdef DEVICE_POLLING
1386	/*
1387	 * Only enable interrupts if we are not polling, make sure
1388	 * they are off otherwise.
1389	 */
1390	if (ifp->if_capenable & IFCAP_POLLING)
1391		em_disable_intr(adapter);
1392	else
1393#endif /* DEVICE_POLLING */
1394		em_enable_intr(adapter);
1395
1396	/* AMT based hardware can now take control from firmware */
1397	if (adapter->has_manage && adapter->has_amt)
1398		em_get_hw_control(adapter);
1399}
1400
1401static void
1402em_init(void *arg)
1403{
1404	struct adapter *adapter = arg;
1405
1406	EM_CORE_LOCK(adapter);
1407	em_init_locked(adapter);
1408	EM_CORE_UNLOCK(adapter);
1409}
1410
1411
1412#ifdef DEVICE_POLLING
1413/*********************************************************************
1414 *
1415 *  Legacy polling routine: note this only works with single queue
1416 *
1417 *********************************************************************/
1418static int
1419em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1420{
1421	struct adapter *adapter = ifp->if_softc;
1422	struct tx_ring	*txr = adapter->tx_rings;
1423	struct rx_ring	*rxr = adapter->rx_rings;
1424	u32		reg_icr;
1425	int		rx_done;
1426
1427	EM_CORE_LOCK(adapter);
1428	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1429		EM_CORE_UNLOCK(adapter);
1430		return (0);
1431	}
1432
1433	if (cmd == POLL_AND_CHECK_STATUS) {
1434		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1435		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1436			callout_stop(&adapter->timer);
1437			adapter->hw.mac.get_link_status = 1;
1438			em_update_link_status(adapter);
1439			callout_reset(&adapter->timer, hz,
1440			    em_local_timer, adapter);
1441		}
1442	}
1443	EM_CORE_UNLOCK(adapter);
1444
1445	em_rxeof(rxr, count, &rx_done);
1446
1447	EM_TX_LOCK(txr);
1448	em_txeof(txr);
1449#ifdef EM_MULTIQUEUE
1450	if (!drbr_empty(ifp, txr->br))
1451		em_mq_start_locked(ifp, txr, NULL);
1452#else
1453	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1454		em_start_locked(ifp, txr);
1455#endif
1456	EM_TX_UNLOCK(txr);
1457
1458	return (rx_done);
1459}
1460#endif /* DEVICE_POLLING */
1461
1462
1463/*********************************************************************
1464 *
1465 *  Fast Legacy/MSI Combined Interrupt Service routine
1466 *
1467 *********************************************************************/
1468static int
1469em_irq_fast(void *arg)
1470{
1471	struct adapter	*adapter = arg;
1472	struct ifnet	*ifp;
1473	u32		reg_icr;
1474
1475	ifp = adapter->ifp;
1476
1477	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1478
1479	/* Hot eject?  */
1480	if (reg_icr == 0xffffffff)
1481		return FILTER_STRAY;
1482
1483	/* Definitely not our interrupt.  */
1484	if (reg_icr == 0x0)
1485		return FILTER_STRAY;
1486
1487	/*
1488	 * Starting with the 82571 chip, bit 31 should be used to
1489	 * determine whether the interrupt belongs to us.
1490	 */
1491	if (adapter->hw.mac.type >= e1000_82571 &&
1492	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1493		return FILTER_STRAY;
1494
1495	em_disable_intr(adapter);
1496	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497
1498	/* Link status change */
1499	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1500		adapter->hw.mac.get_link_status = 1;
1501		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1502	}
1503
1504	if (reg_icr & E1000_ICR_RXO)
1505		adapter->rx_overruns++;
1506	return FILTER_HANDLED;
1507}
1508
1509/* Combined RX/TX handler, used by Legacy and MSI */
1510static void
1511em_handle_que(void *context, int pending)
1512{
1513	struct adapter	*adapter = context;
1514	struct ifnet	*ifp = adapter->ifp;
1515	struct tx_ring	*txr = adapter->tx_rings;
1516	struct rx_ring	*rxr = adapter->rx_rings;
1517
1518
1519	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1520		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1521		EM_TX_LOCK(txr);
1522		em_txeof(txr);
1523#ifdef EM_MULTIQUEUE
1524		if (!drbr_empty(ifp, txr->br))
1525			em_mq_start_locked(ifp, txr, NULL);
1526#else
1527		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1528			em_start_locked(ifp, txr);
1529#endif
1530		EM_TX_UNLOCK(txr);
1531		if (more) {
1532			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1533			return;
1534		}
1535	}
1536
1537	em_enable_intr(adapter);
1538	return;
1539}
1540
1541
1542/*********************************************************************
1543 *
1544 *  MSIX Interrupt Service Routines
1545 *
1546 **********************************************************************/
1547static void
1548em_msix_tx(void *arg)
1549{
1550	struct tx_ring *txr = arg;
1551	struct adapter *adapter = txr->adapter;
1552	struct ifnet	*ifp = adapter->ifp;
1553
1554	++txr->tx_irq;
1555	EM_TX_LOCK(txr);
1556	em_txeof(txr);
1557#ifdef EM_MULTIQUEUE
1558	if (!drbr_empty(ifp, txr->br))
1559		em_mq_start_locked(ifp, txr, NULL);
1560#else
1561	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1562		em_start_locked(ifp, txr);
1563#endif
1564	/* Reenable this interrupt */
1565	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1566	EM_TX_UNLOCK(txr);
1567	return;
1568}
1569
1570/*********************************************************************
1571 *
1572 *  MSIX RX Interrupt Service routine
1573 *
1574 **********************************************************************/
1575
1576static void
1577em_msix_rx(void *arg)
1578{
1579	struct rx_ring	*rxr = arg;
1580	struct adapter	*adapter = rxr->adapter;
1581	bool		more;
1582
1583	++rxr->rx_irq;
1584	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1585	if (more)
1586		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1587	else
1588		/* Reenable this interrupt */
1589		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1590	return;
1591}
1592
1593/*********************************************************************
1594 *
1595 *  MSIX Link Fast Interrupt Service routine
1596 *
1597 **********************************************************************/
1598static void
1599em_msix_link(void *arg)
1600{
1601	struct adapter	*adapter = arg;
1602	u32		reg_icr;
1603
1604	++adapter->link_irq;
1605	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1606
1607	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1608		adapter->hw.mac.get_link_status = 1;
1609		em_handle_link(adapter, 0);
1610	} else
1611		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1612		    EM_MSIX_LINK | E1000_IMS_LSC);
1613	return;
1614}
1615
1616static void
1617em_handle_rx(void *context, int pending)
1618{
1619	struct rx_ring	*rxr = context;
1620	struct adapter	*adapter = rxr->adapter;
1621        bool            more;
1622
1623	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624	if (more)
1625		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1626	else
1627		/* Reenable this interrupt */
1628		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1629}
1630
1631static void
1632em_handle_tx(void *context, int pending)
1633{
1634	struct tx_ring	*txr = context;
1635	struct adapter	*adapter = txr->adapter;
1636	struct ifnet	*ifp = adapter->ifp;
1637
1638	EM_TX_LOCK(txr);
1639	em_txeof(txr);
1640#ifdef EM_MULTIQUEUE
1641	if (!drbr_empty(ifp, txr->br))
1642		em_mq_start_locked(ifp, txr, NULL);
1643#else
1644	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1645		em_start_locked(ifp, txr);
1646#endif
1647	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1648	EM_TX_UNLOCK(txr);
1649}
1650
1651static void
1652em_handle_link(void *context, int pending)
1653{
1654	struct adapter	*adapter = context;
1655	struct tx_ring	*txr = adapter->tx_rings;
1656	struct ifnet *ifp = adapter->ifp;
1657
1658	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1659		return;
1660
1661	EM_CORE_LOCK(adapter);
1662	callout_stop(&adapter->timer);
1663	em_update_link_status(adapter);
1664	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1665	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1666	    EM_MSIX_LINK | E1000_IMS_LSC);
1667	if (adapter->link_active) {
1668		int i;
1669		for (i = 0; i < adapter->num_queues; i++, txr++) {
1670			EM_TX_LOCK(txr);
1671#ifdef EM_MULTIQUEUE
1672			if (!drbr_empty(ifp, txr->br))
1673				em_mq_start_locked(ifp, txr, NULL);
1674#else
1675			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1676				em_start_locked(ifp, txr);
1677#endif
1678			EM_TX_UNLOCK(txr);
1679		}
1680	}
1681	EM_CORE_UNLOCK(adapter);
1682}
1683
1684
1685/*********************************************************************
1686 *
1687 *  Media Ioctl callback
1688 *
1689 *  This routine is called whenever the user queries the status of
1690 *  the interface using ifconfig.
1691 *
1692 **********************************************************************/
1693static void
1694em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1695{
1696	struct adapter *adapter = ifp->if_softc;
1697	u_char fiber_type = IFM_1000_SX;
1698
1699	INIT_DEBUGOUT("em_media_status: begin");
1700
1701	EM_CORE_LOCK(adapter);
1702	em_update_link_status(adapter);
1703
1704	ifmr->ifm_status = IFM_AVALID;
1705	ifmr->ifm_active = IFM_ETHER;
1706
1707	if (!adapter->link_active) {
1708		EM_CORE_UNLOCK(adapter);
1709		return;
1710	}
1711
1712	ifmr->ifm_status |= IFM_ACTIVE;
1713
1714	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1715	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1716		ifmr->ifm_active |= fiber_type | IFM_FDX;
1717	} else {
1718		switch (adapter->link_speed) {
1719		case 10:
1720			ifmr->ifm_active |= IFM_10_T;
1721			break;
1722		case 100:
1723			ifmr->ifm_active |= IFM_100_TX;
1724			break;
1725		case 1000:
1726			ifmr->ifm_active |= IFM_1000_T;
1727			break;
1728		}
1729		if (adapter->link_duplex == FULL_DUPLEX)
1730			ifmr->ifm_active |= IFM_FDX;
1731		else
1732			ifmr->ifm_active |= IFM_HDX;
1733	}
1734	EM_CORE_UNLOCK(adapter);
1735}
1736
1737/*********************************************************************
1738 *
1739 *  Media Ioctl callback
1740 *
1741 *  This routine is called when the user changes speed/duplex using
1742 *  media/mediopt option with ifconfig.
1743 *
1744 **********************************************************************/
1745static int
1746em_media_change(struct ifnet *ifp)
1747{
1748	struct adapter *adapter = ifp->if_softc;
1749	struct ifmedia  *ifm = &adapter->media;
1750
1751	INIT_DEBUGOUT("em_media_change: begin");
1752
1753	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1754		return (EINVAL);
1755
1756	EM_CORE_LOCK(adapter);
1757	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1758	case IFM_AUTO:
1759		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1760		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1761		break;
1762	case IFM_1000_LX:
1763	case IFM_1000_SX:
1764	case IFM_1000_T:
1765		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1766		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1767		break;
1768	case IFM_100_TX:
1769		adapter->hw.mac.autoneg = FALSE;
1770		adapter->hw.phy.autoneg_advertised = 0;
1771		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1772			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1773		else
1774			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1775		break;
1776	case IFM_10_T:
1777		adapter->hw.mac.autoneg = FALSE;
1778		adapter->hw.phy.autoneg_advertised = 0;
1779		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1781		else
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1783		break;
1784	default:
1785		device_printf(adapter->dev, "Unsupported media type\n");
1786	}
1787
1788	em_init_locked(adapter);
1789	EM_CORE_UNLOCK(adapter);
1790
1791	return (0);
1792}
1793
1794/*********************************************************************
1795 *
1796 *  This routine maps the mbufs to tx descriptors.
1797 *
1798 *  return 0 on success, positive on failure
1799 **********************************************************************/
1800
1801static int
1802em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1803{
1804	struct adapter		*adapter = txr->adapter;
1805	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1806	bus_dmamap_t		map;
1807	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1808	struct e1000_tx_desc	*ctxd = NULL;
1809	struct mbuf		*m_head;
1810	struct ether_header	*eh;
1811	struct ip		*ip = NULL;
1812	struct tcphdr		*tp = NULL;
1813	u32			txd_upper, txd_lower, txd_used, txd_saved;
1814	int			ip_off, poff;
1815	int			nsegs, i, j, first, last = 0;
1816	int			error, do_tso, tso_desc = 0, remap = 1;
1817
1818retry:
1819	m_head = *m_headp;
1820	txd_upper = txd_lower = txd_used = txd_saved = 0;
1821	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1822	ip_off = poff = 0;
1823
1824	/*
1825	 * Intel recommends entire IP/TCP header length reside in a single
1826	 * buffer. If multiple descriptors are used to describe the IP and
1827	 * TCP header, each descriptor should describe one or more
1828	 * complete headers; descriptors referencing only parts of headers
1829	 * are not supported. If all layer headers are not coalesced into
1830	 * a single buffer, each buffer should not cross a 4KB boundary,
1831	 * or be larger than the maximum read request size.
1832	 * Controller also requires modifing IP/TCP header to make TSO work
1833	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1834	 * IP/TCP header into a single buffer to meet the requirement of
1835	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1836	 * which also has similiar restrictions.
1837	 */
1838	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1839		if (do_tso || (m_head->m_next != NULL &&
1840		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1841			if (M_WRITABLE(*m_headp) == 0) {
1842				m_head = m_dup(*m_headp, M_DONTWAIT);
1843				m_freem(*m_headp);
1844				if (m_head == NULL) {
1845					*m_headp = NULL;
1846					return (ENOBUFS);
1847				}
1848				*m_headp = m_head;
1849			}
1850		}
1851		/*
1852		 * XXX
1853		 * Assume IPv4, we don't have TSO/checksum offload support
1854		 * for IPv6 yet.
1855		 */
1856		ip_off = sizeof(struct ether_header);
1857		m_head = m_pullup(m_head, ip_off);
1858		if (m_head == NULL) {
1859			*m_headp = NULL;
1860			return (ENOBUFS);
1861		}
1862		eh = mtod(m_head, struct ether_header *);
1863		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1864			ip_off = sizeof(struct ether_vlan_header);
1865			m_head = m_pullup(m_head, ip_off);
1866			if (m_head == NULL) {
1867				*m_headp = NULL;
1868				return (ENOBUFS);
1869			}
1870		}
1871		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1872		if (m_head == NULL) {
1873			*m_headp = NULL;
1874			return (ENOBUFS);
1875		}
1876		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1877		poff = ip_off + (ip->ip_hl << 2);
1878		if (do_tso) {
1879			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1880			if (m_head == NULL) {
1881				*m_headp = NULL;
1882				return (ENOBUFS);
1883			}
1884			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1885			/*
1886			 * TSO workaround:
1887			 *   pull 4 more bytes of data into it.
1888			 */
1889			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1890			if (m_head == NULL) {
1891				*m_headp = NULL;
1892				return (ENOBUFS);
1893			}
1894			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1895			ip->ip_len = 0;
1896			ip->ip_sum = 0;
1897			/*
1898			 * The pseudo TCP checksum does not include TCP payload
1899			 * length so driver should recompute the checksum here
1900			 * what hardware expect to see. This is adherence of
1901			 * Microsoft's Large Send specification.
1902			 */
1903			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1905			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1906		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1907			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1908			if (m_head == NULL) {
1909				*m_headp = NULL;
1910				return (ENOBUFS);
1911			}
1912			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1913			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1914			if (m_head == NULL) {
1915				*m_headp = NULL;
1916				return (ENOBUFS);
1917			}
1918			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1919			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1921			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927		}
1928		*m_headp = m_head;
1929	}
1930
1931	/*
1932	 * Map the packet for DMA
1933	 *
1934	 * Capture the first descriptor index,
1935	 * this descriptor will have the index
1936	 * of the EOP which is the only one that
1937	 * now gets a DONE bit writeback.
1938	 */
1939	first = txr->next_avail_desc;
1940	tx_buffer = &txr->tx_buffers[first];
1941	tx_buffer_mapped = tx_buffer;
1942	map = tx_buffer->map;
1943
1944	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1945	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1946
1947	/*
1948	 * There are two types of errors we can (try) to handle:
1949	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1950	 *   out of segments.  Defragment the mbuf chain and try again.
1951	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1952	 *   at this point in time.  Defer sending and try again later.
1953	 * All other errors, in particular EINVAL, are fatal and prevent the
1954	 * mbuf chain from ever going through.  Drop it and report error.
1955	 */
1956	if (error == EFBIG && remap) {
1957		struct mbuf *m;
1958
1959		m = m_defrag(*m_headp, M_DONTWAIT);
1960		if (m == NULL) {
1961			adapter->mbuf_alloc_failed++;
1962			m_freem(*m_headp);
1963			*m_headp = NULL;
1964			return (ENOBUFS);
1965		}
1966		*m_headp = m;
1967
1968		/* Try it again, but only once */
1969		remap = 0;
1970		goto retry;
1971	} else if (error == ENOMEM) {
1972		adapter->no_tx_dma_setup++;
1973		return (error);
1974	} else if (error != 0) {
1975		adapter->no_tx_dma_setup++;
1976		m_freem(*m_headp);
1977		*m_headp = NULL;
1978		return (error);
1979	}
1980
1981	/*
1982	 * TSO Hardware workaround, if this packet is not
1983	 * TSO, and is only a single descriptor long, and
1984	 * it follows a TSO burst, then we need to add a
1985	 * sentinel descriptor to prevent premature writeback.
1986	 */
1987	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1988		if (nsegs == 1)
1989			tso_desc = TRUE;
1990		txr->tx_tso = FALSE;
1991	}
1992
1993        if (nsegs > (txr->tx_avail - 2)) {
1994                txr->no_desc_avail++;
1995		bus_dmamap_unload(txr->txtag, map);
1996		return (ENOBUFS);
1997        }
1998	m_head = *m_headp;
1999
2000	/* Do hardware assists */
2001	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2002		em_tso_setup(txr, m_head, ip_off, ip, tp,
2003		    &txd_upper, &txd_lower);
2004		/* we need to make a final sentinel transmit desc */
2005		tso_desc = TRUE;
2006	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2007		em_transmit_checksum_setup(txr, m_head,
2008		    ip_off, ip, &txd_upper, &txd_lower);
2009
2010	if (m_head->m_flags & M_VLANTAG) {
2011		/* Set the vlan id. */
2012		txd_upper |=
2013		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2014                /* Tell hardware to add tag */
2015                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2016        }
2017
2018	i = txr->next_avail_desc;
2019
2020	/* Set up our transmit descriptors */
2021	for (j = 0; j < nsegs; j++) {
2022		bus_size_t seg_len;
2023		bus_addr_t seg_addr;
2024
2025		tx_buffer = &txr->tx_buffers[i];
2026		ctxd = &txr->tx_base[i];
2027		seg_addr = segs[j].ds_addr;
2028		seg_len  = segs[j].ds_len;
2029		/*
2030		** TSO Workaround:
2031		** If this is the last descriptor, we want to
2032		** split it so we have a small final sentinel
2033		*/
2034		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2035			seg_len -= 4;
2036			ctxd->buffer_addr = htole64(seg_addr);
2037			ctxd->lower.data = htole32(
2038			adapter->txd_cmd | txd_lower | seg_len);
2039			ctxd->upper.data =
2040			    htole32(txd_upper);
2041			if (++i == adapter->num_tx_desc)
2042				i = 0;
2043			/* Now make the sentinel */
2044			++txd_used; /* using an extra txd */
2045			ctxd = &txr->tx_base[i];
2046			tx_buffer = &txr->tx_buffers[i];
2047			ctxd->buffer_addr =
2048			    htole64(seg_addr + seg_len);
2049			ctxd->lower.data = htole32(
2050			adapter->txd_cmd | txd_lower | 4);
2051			ctxd->upper.data =
2052			    htole32(txd_upper);
2053			last = i;
2054			if (++i == adapter->num_tx_desc)
2055				i = 0;
2056		} else {
2057			ctxd->buffer_addr = htole64(seg_addr);
2058			ctxd->lower.data = htole32(
2059			adapter->txd_cmd | txd_lower | seg_len);
2060			ctxd->upper.data =
2061			    htole32(txd_upper);
2062			last = i;
2063			if (++i == adapter->num_tx_desc)
2064				i = 0;
2065		}
2066		tx_buffer->m_head = NULL;
2067		tx_buffer->next_eop = -1;
2068	}
2069
2070	txr->next_avail_desc = i;
2071	txr->tx_avail -= nsegs;
2072	if (tso_desc) /* TSO used an extra for sentinel */
2073		txr->tx_avail -= txd_used;
2074
2075        tx_buffer->m_head = m_head;
2076	/*
2077	** Here we swap the map so the last descriptor,
2078	** which gets the completion interrupt has the
2079	** real map, and the first descriptor gets the
2080	** unused map from this descriptor.
2081	*/
2082	tx_buffer_mapped->map = tx_buffer->map;
2083	tx_buffer->map = map;
2084        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2085
2086        /*
2087         * Last Descriptor of Packet
2088	 * needs End Of Packet (EOP)
2089	 * and Report Status (RS)
2090         */
2091        ctxd->lower.data |=
2092	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2093	/*
2094	 * Keep track in the first buffer which
2095	 * descriptor will be written back
2096	 */
2097	tx_buffer = &txr->tx_buffers[first];
2098	tx_buffer->next_eop = last;
2099	/* Update the watchdog time early and often */
2100	txr->watchdog_time = ticks;
2101
2102	/*
2103	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2104	 * that this frame is available to transmit.
2105	 */
2106	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2107	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2108	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2109
2110	return (0);
2111}
2112
2113static void
2114em_set_promisc(struct adapter *adapter)
2115{
2116	struct ifnet	*ifp = adapter->ifp;
2117	u32		reg_rctl;
2118
2119	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2120
2121	if (ifp->if_flags & IFF_PROMISC) {
2122		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2123		/* Turn this on if you want to see bad packets */
2124		if (em_debug_sbp)
2125			reg_rctl |= E1000_RCTL_SBP;
2126		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2127	} else if (ifp->if_flags & IFF_ALLMULTI) {
2128		reg_rctl |= E1000_RCTL_MPE;
2129		reg_rctl &= ~E1000_RCTL_UPE;
2130		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2131	}
2132}
2133
2134static void
2135em_disable_promisc(struct adapter *adapter)
2136{
2137	u32	reg_rctl;
2138
2139	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2140
2141	reg_rctl &=  (~E1000_RCTL_UPE);
2142	reg_rctl &=  (~E1000_RCTL_MPE);
2143	reg_rctl &=  (~E1000_RCTL_SBP);
2144	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2145}
2146
2147
2148/*********************************************************************
2149 *  Multicast Update
2150 *
2151 *  This routine is called whenever multicast address list is updated.
2152 *
2153 **********************************************************************/
2154
2155static void
2156em_set_multi(struct adapter *adapter)
2157{
2158	struct ifnet	*ifp = adapter->ifp;
2159	struct ifmultiaddr *ifma;
2160	u32 reg_rctl = 0;
2161	u8  *mta; /* Multicast array memory */
2162	int mcnt = 0;
2163
2164	IOCTL_DEBUGOUT("em_set_multi: begin");
2165
2166	mta = adapter->mta;
2167	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2168
2169	if (adapter->hw.mac.type == e1000_82542 &&
2170	    adapter->hw.revision_id == E1000_REVISION_2) {
2171		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2172		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2173			e1000_pci_clear_mwi(&adapter->hw);
2174		reg_rctl |= E1000_RCTL_RST;
2175		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2176		msec_delay(5);
2177	}
2178
2179#if __FreeBSD_version < 800000
2180	IF_ADDR_LOCK(ifp);
2181#else
2182	if_maddr_rlock(ifp);
2183#endif
2184	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2185		if (ifma->ifma_addr->sa_family != AF_LINK)
2186			continue;
2187
2188		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2189			break;
2190
2191		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2192		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2193		mcnt++;
2194	}
2195#if __FreeBSD_version < 800000
2196	IF_ADDR_UNLOCK(ifp);
2197#else
2198	if_maddr_runlock(ifp);
2199#endif
2200	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2201		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2202		reg_rctl |= E1000_RCTL_MPE;
2203		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2204	} else
2205		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2206
2207	if (adapter->hw.mac.type == e1000_82542 &&
2208	    adapter->hw.revision_id == E1000_REVISION_2) {
2209		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2210		reg_rctl &= ~E1000_RCTL_RST;
2211		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2212		msec_delay(5);
2213		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2214			e1000_pci_set_mwi(&adapter->hw);
2215	}
2216}
2217
2218
2219/*********************************************************************
2220 *  Timer routine
2221 *
2222 *  This routine checks for link status and updates statistics.
2223 *
2224 **********************************************************************/
2225
2226static void
2227em_local_timer(void *arg)
2228{
2229	int i;
2230	struct adapter	*adapter = arg;
2231	struct ifnet	*ifp = adapter->ifp;
2232	struct tx_ring	*txr = adapter->tx_rings;
2233	struct rx_ring	*rxr = adapter->rx_rings;
2234	u32		trigger;
2235
2236	EM_CORE_LOCK_ASSERT(adapter);
2237
2238	em_update_link_status(adapter);
2239	em_update_stats_counters(adapter);
2240
2241	/* Reset LAA into RAR[0] on 82571 */
2242	if ((adapter->hw.mac.type == e1000_82571) &&
2243	    e1000_get_laa_state_82571(&adapter->hw))
2244		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2245
2246	/* Mask to use in the irq trigger */
2247	if (adapter->msix_mem)
2248		trigger = rxr->ims; /* RX for 82574 */
2249	else
2250		trigger = E1000_ICS_RXDMT0;
2251
2252	/*
2253	** Check on the state of the TX queue(s), this
2254	** can be done without the lock because its RO
2255	** and the HUNG state will be static if set.
2256	*/
2257	for (i = 0; i < adapter->num_queues; i++, txr++) {
2258		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2259		    (adapter->pause_frames == 0))
2260			goto hung;
2261		/* Schedule a TX tasklet if needed */
2262		if (txr->tx_avail <= EM_MAX_SCATTER)
2263			taskqueue_enqueue(txr->tq, &txr->tx_task);
2264	}
2265
2266	adapter->pause_frames = 0;
2267	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2268#ifndef DEVICE_POLLING
2269	/* Trigger an RX interrupt to guarantee mbuf refresh */
2270	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2271#endif
2272	return;
2273hung:
2274	/* Looks like we're hung */
2275	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2276	device_printf(adapter->dev,
2277	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2278	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2279	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2280	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2281	    "Next TX to Clean = %d\n",
2282	    txr->me, txr->tx_avail, txr->next_to_clean);
2283	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2284	adapter->watchdog_events++;
2285	adapter->pause_frames = 0;
2286	em_init_locked(adapter);
2287}
2288
2289
2290static void
2291em_update_link_status(struct adapter *adapter)
2292{
2293	struct e1000_hw *hw = &adapter->hw;
2294	struct ifnet *ifp = adapter->ifp;
2295	device_t dev = adapter->dev;
2296	struct tx_ring *txr = adapter->tx_rings;
2297	u32 link_check = 0;
2298
2299	/* Get the cached link value or read phy for real */
2300	switch (hw->phy.media_type) {
2301	case e1000_media_type_copper:
2302		if (hw->mac.get_link_status) {
2303			/* Do the work to read phy */
2304			e1000_check_for_link(hw);
2305			link_check = !hw->mac.get_link_status;
2306			if (link_check) /* ESB2 fix */
2307				e1000_cfg_on_link_up(hw);
2308		} else
2309			link_check = TRUE;
2310		break;
2311	case e1000_media_type_fiber:
2312		e1000_check_for_link(hw);
2313		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2314                                 E1000_STATUS_LU);
2315		break;
2316	case e1000_media_type_internal_serdes:
2317		e1000_check_for_link(hw);
2318		link_check = adapter->hw.mac.serdes_has_link;
2319		break;
2320	default:
2321	case e1000_media_type_unknown:
2322		break;
2323	}
2324
2325	/* Now check for a transition */
2326	if (link_check && (adapter->link_active == 0)) {
2327		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2328		    &adapter->link_duplex);
2329		/* Check if we must disable SPEED_MODE bit on PCI-E */
2330		if ((adapter->link_speed != SPEED_1000) &&
2331		    ((hw->mac.type == e1000_82571) ||
2332		    (hw->mac.type == e1000_82572))) {
2333			int tarc0;
2334			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2335			tarc0 &= ~SPEED_MODE_BIT;
2336			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2337		}
2338		if (bootverbose)
2339			device_printf(dev, "Link is up %d Mbps %s\n",
2340			    adapter->link_speed,
2341			    ((adapter->link_duplex == FULL_DUPLEX) ?
2342			    "Full Duplex" : "Half Duplex"));
2343		adapter->link_active = 1;
2344		adapter->smartspeed = 0;
2345		ifp->if_baudrate = adapter->link_speed * 1000000;
2346		if_link_state_change(ifp, LINK_STATE_UP);
2347	} else if (!link_check && (adapter->link_active == 1)) {
2348		int i;
2349		ifp->if_baudrate = adapter->link_speed = 0;
2350		adapter->link_duplex = 0;
2351		if (bootverbose)
2352			device_printf(dev, "Link is Down\n");
2353		adapter->link_active = 0;
2354		/* Link down, disable watchdog */
2355		for (i = 0; i < adapter->num_queues; i++, txr++)
2356			txr->queue_status = EM_QUEUE_IDLE;
2357		if_link_state_change(ifp, LINK_STATE_DOWN);
2358	}
2359}
2360
2361/*********************************************************************
2362 *
2363 *  This routine disables all traffic on the adapter by issuing a
2364 *  global reset on the MAC and deallocates TX/RX buffers.
2365 *
2366 *  This routine should always be called with BOTH the CORE
2367 *  and TX locks.
2368 **********************************************************************/
2369
2370static void
2371em_stop(void *arg)
2372{
2373	int i;
2374	struct adapter	*adapter = arg;
2375	struct ifnet	*ifp = adapter->ifp;
2376	struct tx_ring	*txr = adapter->tx_rings;
2377
2378	EM_CORE_LOCK_ASSERT(adapter);
2379
2380	INIT_DEBUGOUT("em_stop: begin");
2381
2382	em_disable_intr(adapter);
2383	callout_stop(&adapter->timer);
2384
2385	/* Tell the stack that the interface is no longer active */
2386	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2387	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2388
2389        /* Unarm watchdog timer. */
2390	for (i = 0; i < adapter->num_queues; i++, txr++) {
2391		EM_TX_LOCK(txr);
2392		txr->queue_status = EM_QUEUE_IDLE;
2393		EM_TX_UNLOCK(txr);
2394	}
2395
2396	e1000_reset_hw(&adapter->hw);
2397	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2398
2399	e1000_led_off(&adapter->hw);
2400	e1000_cleanup_led(&adapter->hw);
2401}
2402
2403
2404/*********************************************************************
2405 *
2406 *  Determine hardware revision.
2407 *
2408 **********************************************************************/
2409static void
2410em_identify_hardware(struct adapter *adapter)
2411{
2412	device_t dev = adapter->dev;
2413
2414	/* Make sure our PCI config space has the necessary stuff set */
2415	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2416	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2417	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2418		device_printf(dev, "Memory Access and/or Bus Master bits "
2419		    "were not set!\n");
2420		adapter->hw.bus.pci_cmd_word |=
2421		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2422		pci_write_config(dev, PCIR_COMMAND,
2423		    adapter->hw.bus.pci_cmd_word, 2);
2424	}
2425
2426	/* Save off the information about this board */
2427	adapter->hw.vendor_id = pci_get_vendor(dev);
2428	adapter->hw.device_id = pci_get_device(dev);
2429	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2430	adapter->hw.subsystem_vendor_id =
2431	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2432	adapter->hw.subsystem_device_id =
2433	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2434
2435	/* Do Shared Code Init and Setup */
2436	if (e1000_set_mac_type(&adapter->hw)) {
2437		device_printf(dev, "Setup init failure\n");
2438		return;
2439	}
2440}
2441
2442static int
2443em_allocate_pci_resources(struct adapter *adapter)
2444{
2445	device_t	dev = adapter->dev;
2446	int		rid;
2447
2448	rid = PCIR_BAR(0);
2449	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2450	    &rid, RF_ACTIVE);
2451	if (adapter->memory == NULL) {
2452		device_printf(dev, "Unable to allocate bus resource: memory\n");
2453		return (ENXIO);
2454	}
2455	adapter->osdep.mem_bus_space_tag =
2456	    rman_get_bustag(adapter->memory);
2457	adapter->osdep.mem_bus_space_handle =
2458	    rman_get_bushandle(adapter->memory);
2459	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2460
2461	/* Default to a single queue */
2462	adapter->num_queues = 1;
2463
2464	/*
2465	 * Setup MSI/X or MSI if PCI Express
2466	 */
2467	adapter->msix = em_setup_msix(adapter);
2468
2469	adapter->hw.back = &adapter->osdep;
2470
2471	return (0);
2472}
2473
2474/*********************************************************************
2475 *
2476 *  Setup the Legacy or MSI Interrupt handler
2477 *
2478 **********************************************************************/
2479int
2480em_allocate_legacy(struct adapter *adapter)
2481{
2482	device_t dev = adapter->dev;
2483	struct tx_ring	*txr = adapter->tx_rings;
2484	int error, rid = 0;
2485
2486	/* Manually turn off all interrupts */
2487	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2488
2489	if (adapter->msix == 1) /* using MSI */
2490		rid = 1;
2491	/* We allocate a single interrupt resource */
2492	adapter->res = bus_alloc_resource_any(dev,
2493	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2494	if (adapter->res == NULL) {
2495		device_printf(dev, "Unable to allocate bus resource: "
2496		    "interrupt\n");
2497		return (ENXIO);
2498	}
2499
2500	/*
2501	 * Allocate a fast interrupt and the associated
2502	 * deferred processing contexts.
2503	 */
2504	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2505	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2506	    taskqueue_thread_enqueue, &adapter->tq);
2507	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2508	    device_get_nameunit(adapter->dev));
2509	/* Use a TX only tasklet for local timer */
2510	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2511	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2512	    taskqueue_thread_enqueue, &txr->tq);
2513	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2514	    device_get_nameunit(adapter->dev));
2515	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2516	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2517	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2518		device_printf(dev, "Failed to register fast interrupt "
2519			    "handler: %d\n", error);
2520		taskqueue_free(adapter->tq);
2521		adapter->tq = NULL;
2522		return (error);
2523	}
2524
2525	return (0);
2526}
2527
2528/*********************************************************************
2529 *
2530 *  Setup the MSIX Interrupt handlers
2531 *   This is not really Multiqueue, rather
2532 *   its just seperate interrupt vectors
2533 *   for TX, RX, and Link.
2534 *
2535 **********************************************************************/
2536int
2537em_allocate_msix(struct adapter *adapter)
2538{
2539	int i;
2540	device_t	dev = adapter->dev;
2541	struct		tx_ring *txr = adapter->tx_rings;
2542	struct		rx_ring *rxr = adapter->rx_rings;
2543	int		error, rid, vector = 0;
2544
2545
2546	/* Make sure all interrupts are disabled */
2547	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2548
2549	/* First set up ring resources */
2550	for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2551
2552		/* RX ring */
2553		rid = vector + 1;
2554
2555		rxr->res = bus_alloc_resource_any(dev,
2556		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2557		if (rxr->res == NULL) {
2558			device_printf(dev,
2559			    "Unable to allocate bus resource: "
2560			    "RX MSIX Interrupt %d\n", i);
2561			return (ENXIO);
2562		}
2563		if ((error = bus_setup_intr(dev, rxr->res,
2564		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2565		    rxr, &rxr->tag)) != 0) {
2566			device_printf(dev, "Failed to register RX handler");
2567			return (error);
2568		}
2569#if __FreeBSD_version >= 800504
2570		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2571#endif
2572		rxr->msix = vector++; /* NOTE increment vector for TX */
2573		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2574		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2575		    taskqueue_thread_enqueue, &rxr->tq);
2576		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2577		    device_get_nameunit(adapter->dev));
2578		/*
2579		** Set the bit to enable interrupt
2580		** in E1000_IMS -- bits 20 and 21
2581		** are for RX0 and RX1, note this has
2582		** NOTHING to do with the MSIX vector
2583		*/
2584		rxr->ims = 1 << (20 + i);
2585		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2586
2587		/* TX ring */
2588		rid = vector + 1;
2589		txr->res = bus_alloc_resource_any(dev,
2590		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2591		if (txr->res == NULL) {
2592			device_printf(dev,
2593			    "Unable to allocate bus resource: "
2594			    "TX MSIX Interrupt %d\n", i);
2595			return (ENXIO);
2596		}
2597		if ((error = bus_setup_intr(dev, txr->res,
2598		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2599		    txr, &txr->tag)) != 0) {
2600			device_printf(dev, "Failed to register TX handler");
2601			return (error);
2602		}
2603#if __FreeBSD_version >= 800504
2604		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2605#endif
2606		txr->msix = vector++; /* Increment vector for next pass */
2607		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2608		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2609		    taskqueue_thread_enqueue, &txr->tq);
2610		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2611		    device_get_nameunit(adapter->dev));
2612		/*
2613		** Set the bit to enable interrupt
2614		** in E1000_IMS -- bits 22 and 23
2615		** are for TX0 and TX1, note this has
2616		** NOTHING to do with the MSIX vector
2617		*/
2618		txr->ims = 1 << (22 + i);
2619		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2620	}
2621
2622	/* Link interrupt */
2623	++rid;
2624	adapter->res = bus_alloc_resource_any(dev,
2625	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2626	if (!adapter->res) {
2627		device_printf(dev,"Unable to allocate "
2628		    "bus resource: Link interrupt [%d]\n", rid);
2629		return (ENXIO);
2630        }
2631	/* Set the link handler function */
2632	error = bus_setup_intr(dev, adapter->res,
2633	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2634	    em_msix_link, adapter, &adapter->tag);
2635	if (error) {
2636		adapter->res = NULL;
2637		device_printf(dev, "Failed to register LINK handler");
2638		return (error);
2639	}
2640#if __FreeBSD_version >= 800504
2641		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2642#endif
2643	adapter->linkvec = vector;
2644	adapter->ivars |=  (8 | vector) << 16;
2645	adapter->ivars |= 0x80000000;
2646
2647	return (0);
2648}
2649
2650
2651static void
2652em_free_pci_resources(struct adapter *adapter)
2653{
2654	int i;
2655	device_t	dev = adapter->dev;
2656	struct tx_ring	*txr;
2657	struct rx_ring	*rxr;
2658	int		rid;
2659
2660
2661	/*
2662	** Release all the queue interrupt resources:
2663	*/
2664	for (i = 0; i < adapter->num_queues; i++) {
2665		txr = &adapter->tx_rings[i];
2666		rxr = &adapter->rx_rings[i];
2667		/* an early abort? */
2668		if ((txr == NULL) || (rxr == NULL))
2669			break;
2670		rid = txr->msix +1;
2671		if (txr->tag != NULL) {
2672			bus_teardown_intr(dev, txr->res, txr->tag);
2673			txr->tag = NULL;
2674		}
2675		if (txr->res != NULL)
2676			bus_release_resource(dev, SYS_RES_IRQ,
2677			    rid, txr->res);
2678		rid = rxr->msix +1;
2679		if (rxr->tag != NULL) {
2680			bus_teardown_intr(dev, rxr->res, rxr->tag);
2681			rxr->tag = NULL;
2682		}
2683		if (rxr->res != NULL)
2684			bus_release_resource(dev, SYS_RES_IRQ,
2685			    rid, rxr->res);
2686	}
2687
2688        if (adapter->linkvec) /* we are doing MSIX */
2689                rid = adapter->linkvec + 1;
2690        else
2691                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2692
2693	if (adapter->tag != NULL) {
2694		bus_teardown_intr(dev, adapter->res, adapter->tag);
2695		adapter->tag = NULL;
2696	}
2697
2698	if (adapter->res != NULL)
2699		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2700
2701
2702	if (adapter->msix)
2703		pci_release_msi(dev);
2704
2705	if (adapter->msix_mem != NULL)
2706		bus_release_resource(dev, SYS_RES_MEMORY,
2707		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2708
2709	if (adapter->memory != NULL)
2710		bus_release_resource(dev, SYS_RES_MEMORY,
2711		    PCIR_BAR(0), adapter->memory);
2712
2713	if (adapter->flash != NULL)
2714		bus_release_resource(dev, SYS_RES_MEMORY,
2715		    EM_FLASH, adapter->flash);
2716}
2717
2718/*
2719 * Setup MSI or MSI/X
2720 */
2721static int
2722em_setup_msix(struct adapter *adapter)
2723{
2724	device_t dev = adapter->dev;
2725	int val = 0;
2726
2727	/*
2728	** Setup MSI/X for Hartwell: tests have shown
2729	** use of two queues to be unstable, and to
2730	** provide no great gain anyway, so we simply
2731	** seperate the interrupts and use a single queue.
2732	*/
2733	if ((adapter->hw.mac.type == e1000_82574) &&
2734	    (em_enable_msix == TRUE)) {
2735		/* Map the MSIX BAR */
2736		int rid = PCIR_BAR(EM_MSIX_BAR);
2737		adapter->msix_mem = bus_alloc_resource_any(dev,
2738		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2739       		if (!adapter->msix_mem) {
2740			/* May not be enabled */
2741               		device_printf(adapter->dev,
2742			    "Unable to map MSIX table \n");
2743			goto msi;
2744       		}
2745		val = pci_msix_count(dev);
2746		/* We only need 3 vectors */
2747		if (val > 3)
2748			val = 3;
2749		if ((val != 3) && (val != 5)) {
2750			bus_release_resource(dev, SYS_RES_MEMORY,
2751			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2752			adapter->msix_mem = NULL;
2753               		device_printf(adapter->dev,
2754			    "MSIX: incorrect vectors, using MSI\n");
2755			goto msi;
2756		}
2757
2758		if (pci_alloc_msix(dev, &val) == 0) {
2759			device_printf(adapter->dev,
2760			    "Using MSIX interrupts "
2761			    "with %d vectors\n", val);
2762		}
2763
2764		return (val);
2765	}
2766msi:
2767       	val = pci_msi_count(dev);
2768       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2769               	adapter->msix = 1;
2770               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2771		return (val);
2772	}
2773	/* Should only happen due to manual configuration */
2774	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2775	return (0);
2776}
2777
2778
2779/*********************************************************************
2780 *
2781 *  Initialize the hardware to a configuration
2782 *  as specified by the adapter structure.
2783 *
2784 **********************************************************************/
2785static void
2786em_reset(struct adapter *adapter)
2787{
2788	device_t	dev = adapter->dev;
2789	struct ifnet	*ifp = adapter->ifp;
2790	struct e1000_hw	*hw = &adapter->hw;
2791	u16		rx_buffer_size;
2792	u32		pba;
2793
2794	INIT_DEBUGOUT("em_reset: begin");
2795
2796	/* Set up smart power down as default off on newer adapters. */
2797	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2798	    hw->mac.type == e1000_82572)) {
2799		u16 phy_tmp = 0;
2800
2801		/* Speed up time to link by disabling smart power down. */
2802		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2803		phy_tmp &= ~IGP02E1000_PM_SPD;
2804		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2805	}
2806
2807	/*
2808	 * Packet Buffer Allocation (PBA)
2809	 * Writing PBA sets the receive portion of the buffer
2810	 * the remainder is used for the transmit buffer.
2811	 */
2812	switch (hw->mac.type) {
2813	/* Total Packet Buffer on these is 48K */
2814	case e1000_82571:
2815	case e1000_82572:
2816	case e1000_80003es2lan:
2817			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2818		break;
2819	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2820			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2821		break;
2822	case e1000_82574:
2823	case e1000_82583:
2824			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2825		break;
2826	case e1000_ich8lan:
2827		pba = E1000_PBA_8K;
2828		break;
2829	case e1000_ich9lan:
2830	case e1000_ich10lan:
2831		/* Boost Receive side for jumbo frames */
2832		if (adapter->max_frame_size > 4096)
2833			pba = E1000_PBA_14K;
2834		else
2835			pba = E1000_PBA_10K;
2836		break;
2837	case e1000_pchlan:
2838	case e1000_pch2lan:
2839		pba = E1000_PBA_26K;
2840		break;
2841	default:
2842		if (adapter->max_frame_size > 8192)
2843			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2844		else
2845			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2846	}
2847	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2848
2849	/*
2850	 * These parameters control the automatic generation (Tx) and
2851	 * response (Rx) to Ethernet PAUSE frames.
2852	 * - High water mark should allow for at least two frames to be
2853	 *   received after sending an XOFF.
2854	 * - Low water mark works best when it is very near the high water mark.
2855	 *   This allows the receiver to restart by sending XON when it has
2856	 *   drained a bit. Here we use an arbitary value of 1500 which will
2857	 *   restart after one full frame is pulled from the buffer. There
2858	 *   could be several smaller frames in the buffer and if so they will
2859	 *   not trigger the XON until their total number reduces the buffer
2860	 *   by 1500.
2861	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2862	 */
2863	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2864	hw->fc.high_water = rx_buffer_size -
2865	    roundup2(adapter->max_frame_size, 1024);
2866	hw->fc.low_water = hw->fc.high_water - 1500;
2867
2868	if (adapter->fc) /* locally set flow control value? */
2869		hw->fc.requested_mode = adapter->fc;
2870	else
2871		hw->fc.requested_mode = e1000_fc_full;
2872
2873	if (hw->mac.type == e1000_80003es2lan)
2874		hw->fc.pause_time = 0xFFFF;
2875	else
2876		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2877
2878	hw->fc.send_xon = TRUE;
2879
2880	/* Device specific overrides/settings */
2881	switch (hw->mac.type) {
2882	case e1000_pchlan:
2883		/* Workaround: no TX flow ctrl for PCH */
2884                hw->fc.requested_mode = e1000_fc_rx_pause;
2885		hw->fc.pause_time = 0xFFFF; /* override */
2886		if (ifp->if_mtu > ETHERMTU) {
2887			hw->fc.high_water = 0x3500;
2888			hw->fc.low_water = 0x1500;
2889		} else {
2890			hw->fc.high_water = 0x5000;
2891			hw->fc.low_water = 0x3000;
2892		}
2893		hw->fc.refresh_time = 0x1000;
2894		break;
2895	case e1000_pch2lan:
2896		hw->fc.high_water = 0x5C20;
2897		hw->fc.low_water = 0x5048;
2898		hw->fc.pause_time = 0x0650;
2899		hw->fc.refresh_time = 0x0400;
2900		/* Jumbos need adjusted PBA */
2901		if (ifp->if_mtu > ETHERMTU)
2902			E1000_WRITE_REG(hw, E1000_PBA, 12);
2903		else
2904			E1000_WRITE_REG(hw, E1000_PBA, 26);
2905		break;
2906        case e1000_ich9lan:
2907        case e1000_ich10lan:
2908		if (ifp->if_mtu > ETHERMTU) {
2909			hw->fc.high_water = 0x2800;
2910			hw->fc.low_water = hw->fc.high_water - 8;
2911			break;
2912		}
2913		/* else fall thru */
2914	default:
2915		if (hw->mac.type == e1000_80003es2lan)
2916			hw->fc.pause_time = 0xFFFF;
2917		break;
2918	}
2919
2920	/* Issue a global reset */
2921	e1000_reset_hw(hw);
2922	E1000_WRITE_REG(hw, E1000_WUC, 0);
2923	em_disable_aspm(adapter);
2924	/* and a re-init */
2925	if (e1000_init_hw(hw) < 0) {
2926		device_printf(dev, "Hardware Initialization Failed\n");
2927		return;
2928	}
2929
2930	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2931	e1000_get_phy_info(hw);
2932	e1000_check_for_link(hw);
2933	return;
2934}
2935
2936/*********************************************************************
2937 *
2938 *  Setup networking device structure and register an interface.
2939 *
2940 **********************************************************************/
2941static int
2942em_setup_interface(device_t dev, struct adapter *adapter)
2943{
2944	struct ifnet   *ifp;
2945
2946	INIT_DEBUGOUT("em_setup_interface: begin");
2947
2948	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2949	if (ifp == NULL) {
2950		device_printf(dev, "can not allocate ifnet structure\n");
2951		return (-1);
2952	}
2953	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2954	ifp->if_init =  em_init;
2955	ifp->if_softc = adapter;
2956	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2957	ifp->if_ioctl = em_ioctl;
2958#ifdef EM_MULTIQUEUE
2959	/* Multiqueue stack interface */
2960	ifp->if_transmit = em_mq_start;
2961	ifp->if_qflush = em_qflush;
2962#else
2963	ifp->if_start = em_start;
2964	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2965	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2966	IFQ_SET_READY(&ifp->if_snd);
2967#endif
2968
2969	ether_ifattach(ifp, adapter->hw.mac.addr);
2970
2971	ifp->if_capabilities = ifp->if_capenable = 0;
2972
2973
2974	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2975	ifp->if_capabilities |= IFCAP_TSO4;
2976	/*
2977	 * Tell the upper layer(s) we
2978	 * support full VLAN capability
2979	 */
2980	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2981	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2982			     |  IFCAP_VLAN_HWTSO
2983			     |  IFCAP_VLAN_MTU;
2984	ifp->if_capenable = ifp->if_capabilities;
2985
2986	/*
2987	** Don't turn this on by default, if vlans are
2988	** created on another pseudo device (eg. lagg)
2989	** then vlan events are not passed thru, breaking
2990	** operation, but with HW FILTER off it works. If
2991	** using vlans directly on the em driver you can
2992	** enable this and get full hardware tag filtering.
2993	*/
2994	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2995
2996#ifdef DEVICE_POLLING
2997	ifp->if_capabilities |= IFCAP_POLLING;
2998#endif
2999
3000	/* Enable only WOL MAGIC by default */
3001	if (adapter->wol) {
3002		ifp->if_capabilities |= IFCAP_WOL;
3003		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3004	}
3005
3006	/*
3007	 * Specify the media types supported by this adapter and register
3008	 * callbacks to update media and link information
3009	 */
3010	ifmedia_init(&adapter->media, IFM_IMASK,
3011	    em_media_change, em_media_status);
3012	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3013	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3014		u_char fiber_type = IFM_1000_SX;	/* default type */
3015
3016		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3017			    0, NULL);
3018		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3019	} else {
3020		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3021		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3022			    0, NULL);
3023		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3024			    0, NULL);
3025		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3026			    0, NULL);
3027		if (adapter->hw.phy.type != e1000_phy_ife) {
3028			ifmedia_add(&adapter->media,
3029				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3030			ifmedia_add(&adapter->media,
3031				IFM_ETHER | IFM_1000_T, 0, NULL);
3032		}
3033	}
3034	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3035	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3036	return (0);
3037}
3038
3039
3040/*
3041 * Manage DMA'able memory.
3042 */
3043static void
3044em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3045{
3046	if (error)
3047		return;
3048	*(bus_addr_t *) arg = segs[0].ds_addr;
3049}
3050
3051static int
3052em_dma_malloc(struct adapter *adapter, bus_size_t size,
3053        struct em_dma_alloc *dma, int mapflags)
3054{
3055	int error;
3056
3057	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3058				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3059				BUS_SPACE_MAXADDR,	/* lowaddr */
3060				BUS_SPACE_MAXADDR,	/* highaddr */
3061				NULL, NULL,		/* filter, filterarg */
3062				size,			/* maxsize */
3063				1,			/* nsegments */
3064				size,			/* maxsegsize */
3065				0,			/* flags */
3066				NULL,			/* lockfunc */
3067				NULL,			/* lockarg */
3068				&dma->dma_tag);
3069	if (error) {
3070		device_printf(adapter->dev,
3071		    "%s: bus_dma_tag_create failed: %d\n",
3072		    __func__, error);
3073		goto fail_0;
3074	}
3075
3076	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3077	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3078	if (error) {
3079		device_printf(adapter->dev,
3080		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3081		    __func__, (uintmax_t)size, error);
3082		goto fail_2;
3083	}
3084
3085	dma->dma_paddr = 0;
3086	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3087	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3088	if (error || dma->dma_paddr == 0) {
3089		device_printf(adapter->dev,
3090		    "%s: bus_dmamap_load failed: %d\n",
3091		    __func__, error);
3092		goto fail_3;
3093	}
3094
3095	return (0);
3096
3097fail_3:
3098	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3099fail_2:
3100	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3101	bus_dma_tag_destroy(dma->dma_tag);
3102fail_0:
3103	dma->dma_map = NULL;
3104	dma->dma_tag = NULL;
3105
3106	return (error);
3107}
3108
3109static void
3110em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3111{
3112	if (dma->dma_tag == NULL)
3113		return;
3114	if (dma->dma_map != NULL) {
3115		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3116		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3117		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3118		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3119		dma->dma_map = NULL;
3120	}
3121	bus_dma_tag_destroy(dma->dma_tag);
3122	dma->dma_tag = NULL;
3123}
3124
3125
3126/*********************************************************************
3127 *
3128 *  Allocate memory for the transmit and receive rings, and then
3129 *  the descriptors associated with each, called only once at attach.
3130 *
3131 **********************************************************************/
3132static int
3133em_allocate_queues(struct adapter *adapter)
3134{
3135	int i;
3136	device_t		dev = adapter->dev;
3137	struct tx_ring		*txr = NULL;
3138	struct rx_ring		*rxr = NULL;
3139	int rsize, tsize, error = E1000_SUCCESS;
3140	int txconf = 0, rxconf = 0;
3141
3142
3143	/* Allocate the TX ring struct memory */
3144	if (!(adapter->tx_rings =
3145	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3146	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3147		device_printf(dev, "Unable to allocate TX ring memory\n");
3148		error = ENOMEM;
3149		goto fail;
3150	}
3151
3152	/* Now allocate the RX */
3153	if (!(adapter->rx_rings =
3154	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3155	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3156		device_printf(dev, "Unable to allocate RX ring memory\n");
3157		error = ENOMEM;
3158		goto rx_fail;
3159	}
3160
3161	tsize = roundup2(adapter->num_tx_desc *
3162	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3163	/*
3164	 * Now set up the TX queues, txconf is needed to handle the
3165	 * possibility that things fail midcourse and we need to
3166	 * undo memory gracefully
3167	 */
3168	for (i = 0; i < adapter->num_queues; i++, txconf++) {
3169		/* Set up some basics */
3170		txr = &adapter->tx_rings[i];
3171		txr->adapter = adapter;
3172		txr->me = i;
3173
3174		/* Initialize the TX lock */
3175		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3176		    device_get_nameunit(dev), txr->me);
3177		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3178
3179		if (em_dma_malloc(adapter, tsize,
3180			&txr->txdma, BUS_DMA_NOWAIT)) {
3181			device_printf(dev,
3182			    "Unable to allocate TX Descriptor memory\n");
3183			error = ENOMEM;
3184			goto err_tx_desc;
3185		}
3186		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3187		bzero((void *)txr->tx_base, tsize);
3188
3189        	if (em_allocate_transmit_buffers(txr)) {
3190			device_printf(dev,
3191			    "Critical Failure setting up transmit buffers\n");
3192			error = ENOMEM;
3193			goto err_tx_desc;
3194        	}
3195#ifndef __HAIKU__
3196#if __FreeBSD_version >= 800000
3197		/* Allocate a buf ring */
3198		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3199		    M_WAITOK, &txr->tx_mtx);
3200#endif
3201#endif
3202	}
3203
3204	/*
3205	 * Next the RX queues...
3206	 */
3207	rsize = roundup2(adapter->num_rx_desc *
3208	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3209	for (i = 0; i < adapter->num_queues; i++, rxconf++) {
3210		rxr = &adapter->rx_rings[i];
3211		rxr->adapter = adapter;
3212		rxr->me = i;
3213
3214		/* Initialize the RX lock */
3215		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3216		    device_get_nameunit(dev), txr->me);
3217		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3218
3219		if (em_dma_malloc(adapter, rsize,
3220			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3221			device_printf(dev,
3222			    "Unable to allocate RxDescriptor memory\n");
3223			error = ENOMEM;
3224			goto err_rx_desc;
3225		}
3226		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3227		bzero((void *)rxr->rx_base, rsize);
3228
3229        	/* Allocate receive buffers for the ring*/
3230		if (em_allocate_receive_buffers(rxr)) {
3231			device_printf(dev,
3232			    "Critical Failure setting up receive buffers\n");
3233			error = ENOMEM;
3234			goto err_rx_desc;
3235		}
3236	}
3237
3238	return (0);
3239
3240err_rx_desc:
3241	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3242		em_dma_free(adapter, &rxr->rxdma);
3243err_tx_desc:
3244	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3245		em_dma_free(adapter, &txr->txdma);
3246	free(adapter->rx_rings, M_DEVBUF);
3247rx_fail:
3248#ifndef __HAIKU__
3249#if __FreeBSD_version >= 800000
3250	buf_ring_free(txr->br, M_DEVBUF);
3251#endif
3252#endif
3253	free(adapter->tx_rings, M_DEVBUF);
3254fail:
3255	return (error);
3256}
3257
3258
3259/*********************************************************************
3260 *
3261 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3262 *  the information needed to transmit a packet on the wire. This is
3263 *  called only once at attach, setup is done every reset.
3264 *
3265 **********************************************************************/
3266static int
3267em_allocate_transmit_buffers(struct tx_ring *txr)
3268{
3269	struct adapter *adapter = txr->adapter;
3270	device_t dev = adapter->dev;
3271	struct em_buffer *txbuf;
3272	int error, i;
3273
3274	/*
3275	 * Setup DMA descriptor areas.
3276	 */
3277	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3278			       1, 0,			/* alignment, bounds */
3279			       BUS_SPACE_MAXADDR,	/* lowaddr */
3280			       BUS_SPACE_MAXADDR,	/* highaddr */
3281			       NULL, NULL,		/* filter, filterarg */
3282			       EM_TSO_SIZE,		/* maxsize */
3283			       EM_MAX_SCATTER,		/* nsegments */
3284			       PAGE_SIZE,		/* maxsegsize */
3285			       0,			/* flags */
3286			       NULL,			/* lockfunc */
3287			       NULL,			/* lockfuncarg */
3288			       &txr->txtag))) {
3289		device_printf(dev,"Unable to allocate TX DMA tag\n");
3290		goto fail;
3291	}
3292
3293	if (!(txr->tx_buffers =
3294	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3295	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3296		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3297		error = ENOMEM;
3298		goto fail;
3299	}
3300
3301        /* Create the descriptor buffer dma maps */
3302	txbuf = txr->tx_buffers;
3303	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3304		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3305		if (error != 0) {
3306			device_printf(dev, "Unable to create TX DMA map\n");
3307			goto fail;
3308		}
3309	}
3310
3311	return 0;
3312fail:
3313	/* We free all, it handles case where we are in the middle */
3314	em_free_transmit_structures(adapter);
3315	return (error);
3316}
3317
3318/*********************************************************************
3319 *
3320 *  Initialize a transmit ring.
3321 *
3322 **********************************************************************/
3323static void
3324em_setup_transmit_ring(struct tx_ring *txr)
3325{
3326	struct adapter *adapter = txr->adapter;
3327	struct em_buffer *txbuf;
3328	int i;
3329#ifdef DEV_NETMAP
3330	struct netmap_adapter *na = NA(adapter->ifp);
3331	struct netmap_slot *slot;
3332#endif /* DEV_NETMAP */
3333
3334	/* Clear the old descriptor contents */
3335	EM_TX_LOCK(txr);
3336#ifdef DEV_NETMAP
3337	slot = netmap_reset(na, NR_TX, txr->me, 0);
3338#endif /* DEV_NETMAP */
3339
3340	bzero((void *)txr->tx_base,
3341	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3342	/* Reset indices */
3343	txr->next_avail_desc = 0;
3344	txr->next_to_clean = 0;
3345
3346	/* Free any existing tx buffers. */
3347        txbuf = txr->tx_buffers;
3348	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3349		if (txbuf->m_head != NULL) {
3350			bus_dmamap_sync(txr->txtag, txbuf->map,
3351			    BUS_DMASYNC_POSTWRITE);
3352			bus_dmamap_unload(txr->txtag, txbuf->map);
3353			m_freem(txbuf->m_head);
3354			txbuf->m_head = NULL;
3355		}
3356#ifdef DEV_NETMAP
3357		if (slot) {
3358			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3359			uint64_t paddr;
3360			void *addr;
3361
3362			addr = PNMB(slot + si, &paddr);
3363			txr->tx_base[i].buffer_addr = htole64(paddr);
3364			/* reload the map for netmap mode */
3365			netmap_load_map(txr->txtag, txbuf->map, addr);
3366		}
3367#endif /* DEV_NETMAP */
3368
3369		/* clear the watch index */
3370		txbuf->next_eop = -1;
3371        }
3372
3373	/* Set number of descriptors available */
3374	txr->tx_avail = adapter->num_tx_desc;
3375	txr->queue_status = EM_QUEUE_IDLE;
3376
3377	/* Clear checksum offload context. */
3378	txr->last_hw_offload = 0;
3379	txr->last_hw_ipcss = 0;
3380	txr->last_hw_ipcso = 0;
3381	txr->last_hw_tucss = 0;
3382	txr->last_hw_tucso = 0;
3383
3384	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3385	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3386	EM_TX_UNLOCK(txr);
3387}
3388
3389/*********************************************************************
3390 *
3391 *  Initialize all transmit rings.
3392 *
3393 **********************************************************************/
3394static void
3395em_setup_transmit_structures(struct adapter *adapter)
3396{
3397	int i;
3398	struct tx_ring *txr = adapter->tx_rings;
3399
3400	for (i = 0; i < adapter->num_queues; i++, txr++)
3401		em_setup_transmit_ring(txr);
3402
3403	return;
3404}
3405
3406/*********************************************************************
3407 *
3408 *  Enable transmit unit.
3409 *
3410 **********************************************************************/
3411static void
3412em_initialize_transmit_unit(struct adapter *adapter)
3413{
3414	int i;
3415	struct tx_ring	*txr = adapter->tx_rings;
3416	struct e1000_hw	*hw = &adapter->hw;
3417	u32	tctl, tarc, tipg = 0;
3418
3419	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3420
3421	for (i = 0; i < adapter->num_queues; i++, txr++) {
3422		u64 bus_addr = txr->txdma.dma_paddr;
3423		/* Base and Len of TX Ring */
3424		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3425	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3426		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3427	    	    (u32)(bus_addr >> 32));
3428		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3429	    	    (u32)bus_addr);
3430		/* Init the HEAD/TAIL indices */
3431		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3432		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3433
3434		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3435		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3436		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3437
3438		txr->queue_status = EM_QUEUE_IDLE;
3439	}
3440
3441	/* Set the default values for the Tx Inter Packet Gap timer */
3442	switch (adapter->hw.mac.type) {
3443	case e1000_80003es2lan:
3444		tipg = DEFAULT_82543_TIPG_IPGR1;
3445		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3446		    E1000_TIPG_IPGR2_SHIFT;
3447		break;
3448	default:
3449		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3450		    (adapter->hw.phy.media_type ==
3451		    e1000_media_type_internal_serdes))
3452			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3453		else
3454			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3455		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3456		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3457	}
3458
3459	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3460	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3461
3462	if(adapter->hw.mac.type >= e1000_82540)
3463		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3464		    adapter->tx_abs_int_delay.value);
3465
3466	if ((adapter->hw.mac.type == e1000_82571) ||
3467	    (adapter->hw.mac.type == e1000_82572)) {
3468		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3469		tarc |= SPEED_MODE_BIT;
3470		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3471	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3472		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3473		tarc |= 1;
3474		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3475		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3476		tarc |= 1;
3477		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3478	}
3479
3480	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3481	if (adapter->tx_int_delay.value > 0)
3482		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3483
3484	/* Program the Transmit Control Register */
3485	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3486	tctl &= ~E1000_TCTL_CT;
3487	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3488		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3489
3490	if (adapter->hw.mac.type >= e1000_82571)
3491		tctl |= E1000_TCTL_MULR;
3492
3493	/* This write will effectively turn on the transmit unit. */
3494	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3495
3496}
3497
3498
3499/*********************************************************************
3500 *
3501 *  Free all transmit rings.
3502 *
3503 **********************************************************************/
3504static void
3505em_free_transmit_structures(struct adapter *adapter)
3506{
3507	int i;
3508	struct tx_ring *txr = adapter->tx_rings;
3509
3510	for (i = 0; i < adapter->num_queues; i++, txr++) {
3511		EM_TX_LOCK(txr);
3512		em_free_transmit_buffers(txr);
3513		em_dma_free(adapter, &txr->txdma);
3514		EM_TX_UNLOCK(txr);
3515		EM_TX_LOCK_DESTROY(txr);
3516	}
3517
3518	free(adapter->tx_rings, M_DEVBUF);
3519}
3520
3521/*********************************************************************
3522 *
3523 *  Free transmit ring related data structures.
3524 *
3525 **********************************************************************/
3526static void
3527em_free_transmit_buffers(struct tx_ring *txr)
3528{
3529	int i;
3530	struct adapter		*adapter = txr->adapter;
3531	struct em_buffer	*txbuf;
3532
3533	INIT_DEBUGOUT("free_transmit_ring: begin");
3534
3535	if (txr->tx_buffers == NULL)
3536		return;
3537
3538	for (i = 0; i < adapter->num_tx_desc; i++) {
3539		txbuf = &txr->tx_buffers[i];
3540		if (txbuf->m_head != NULL) {
3541			bus_dmamap_sync(txr->txtag, txbuf->map,
3542			    BUS_DMASYNC_POSTWRITE);
3543			bus_dmamap_unload(txr->txtag,
3544			    txbuf->map);
3545			m_freem(txbuf->m_head);
3546			txbuf->m_head = NULL;
3547			if (txbuf->map != NULL) {
3548				bus_dmamap_destroy(txr->txtag,
3549				    txbuf->map);
3550				txbuf->map = NULL;
3551			}
3552		} else if (txbuf->map != NULL) {
3553			bus_dmamap_unload(txr->txtag,
3554			    txbuf->map);
3555			bus_dmamap_destroy(txr->txtag,
3556			    txbuf->map);
3557			txbuf->map = NULL;
3558		}
3559	}
3560#ifndef __HAIKU__
3561#if __FreeBSD_version >= 800000
3562	if (txr->br != NULL)
3563		buf_ring_free(txr->br, M_DEVBUF);
3564#endif
3565#endif
3566	if (txr->tx_buffers != NULL) {
3567		free(txr->tx_buffers, M_DEVBUF);
3568		txr->tx_buffers = NULL;
3569	}
3570	if (txr->txtag != NULL) {
3571		bus_dma_tag_destroy(txr->txtag);
3572		txr->txtag = NULL;
3573	}
3574	return;
3575}
3576
3577
3578/*********************************************************************
3579 *  The offload context is protocol specific (TCP/UDP) and thus
3580 *  only needs to be set when the protocol changes. The occasion
3581 *  of a context change can be a performance detriment, and
3582 *  might be better just disabled. The reason arises in the way
3583 *  in which the controller supports pipelined requests from the
3584 *  Tx data DMA. Up to four requests can be pipelined, and they may
3585 *  belong to the same packet or to multiple packets. However all
3586 *  requests for one packet are issued before a request is issued
3587 *  for a subsequent packet and if a request for the next packet
3588 *  requires a context change, that request will be stalled
3589 *  until the previous request completes. This means setting up
3590 *  a new context effectively disables pipelined Tx data DMA which
3591 *  in turn greatly slow down performance to send small sized
3592 *  frames.
3593 **********************************************************************/
3594static void
3595em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3596    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3597{
3598	struct adapter			*adapter = txr->adapter;
3599	struct e1000_context_desc	*TXD = NULL;
3600	struct em_buffer		*tx_buffer;
3601	int				cur, hdr_len;
3602	u32				cmd = 0;
3603	u16				offload = 0;
3604	u8				ipcso, ipcss, tucso, tucss;
3605
3606	ipcss = ipcso = tucss = tucso = 0;
3607	hdr_len = ip_off + (ip->ip_hl << 2);
3608	cur = txr->next_avail_desc;
3609
3610	/* Setup of IP header checksum. */
3611	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3612		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3613		offload |= CSUM_IP;
3614		ipcss = ip_off;
3615		ipcso = ip_off + offsetof(struct ip, ip_sum);
3616		/*
3617		 * Start offset for header checksum calculation.
3618		 * End offset for header checksum calculation.
3619		 * Offset of place to put the checksum.
3620		 */
3621		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3622		TXD->lower_setup.ip_fields.ipcss = ipcss;
3623		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3624		TXD->lower_setup.ip_fields.ipcso = ipcso;
3625		cmd |= E1000_TXD_CMD_IP;
3626	}
3627
3628	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3629 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3630 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3631 		offload |= CSUM_TCP;
3632 		tucss = hdr_len;
3633 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3634 		/*
3635 		 * Setting up new checksum offload context for every frames
3636 		 * takes a lot of processing time for hardware. This also
3637 		 * reduces performance a lot for small sized frames so avoid
3638 		 * it if driver can use previously configured checksum
3639 		 * offload context.
3640 		 */
3641 		if (txr->last_hw_offload == offload) {
3642 			if (offload & CSUM_IP) {
3643 				if (txr->last_hw_ipcss == ipcss &&
3644 				    txr->last_hw_ipcso == ipcso &&
3645 				    txr->last_hw_tucss == tucss &&
3646 				    txr->last_hw_tucso == tucso)
3647 					return;
3648 			} else {
3649 				if (txr->last_hw_tucss == tucss &&
3650 				    txr->last_hw_tucso == tucso)
3651 					return;
3652 			}
3653  		}
3654 		txr->last_hw_offload = offload;
3655 		txr->last_hw_tucss = tucss;
3656 		txr->last_hw_tucso = tucso;
3657 		/*
3658 		 * Start offset for payload checksum calculation.
3659 		 * End offset for payload checksum calculation.
3660 		 * Offset of place to put the checksum.
3661 		 */
3662		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3663 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3664 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3665 		TXD->upper_setup.tcp_fields.tucso = tucso;
3666 		cmd |= E1000_TXD_CMD_TCP;
3667 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3668 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3669 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3670 		tucss = hdr_len;
3671 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3672 		/*
3673 		 * Setting up new checksum offload context for every frames
3674 		 * takes a lot of processing time for hardware. This also
3675 		 * reduces performance a lot for small sized frames so avoid
3676 		 * it if driver can use previously configured checksum
3677 		 * offload context.
3678 		 */
3679 		if (txr->last_hw_offload == offload) {
3680 			if (offload & CSUM_IP) {
3681 				if (txr->last_hw_ipcss == ipcss &&
3682 				    txr->last_hw_ipcso == ipcso &&
3683 				    txr->last_hw_tucss == tucss &&
3684 				    txr->last_hw_tucso == tucso)
3685 					return;
3686 			} else {
3687 				if (txr->last_hw_tucss == tucss &&
3688 				    txr->last_hw_tucso == tucso)
3689 					return;
3690 			}
3691 		}
3692 		txr->last_hw_offload = offload;
3693 		txr->last_hw_tucss = tucss;
3694 		txr->last_hw_tucso = tucso;
3695 		/*
3696 		 * Start offset for header checksum calculation.
3697 		 * End offset for header checksum calculation.
3698 		 * Offset of place to put the checksum.
3699 		 */
3700		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3701 		TXD->upper_setup.tcp_fields.tucss = tucss;
3702 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3703 		TXD->upper_setup.tcp_fields.tucso = tucso;
3704  	}
3705
3706 	if (offload & CSUM_IP) {
3707 		txr->last_hw_ipcss = ipcss;
3708 		txr->last_hw_ipcso = ipcso;
3709  	}
3710
3711	TXD->tcp_seg_setup.data = htole32(0);
3712	TXD->cmd_and_length =
3713	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3714	tx_buffer = &txr->tx_buffers[cur];
3715	tx_buffer->m_head = NULL;
3716	tx_buffer->next_eop = -1;
3717
3718	if (++cur == adapter->num_tx_desc)
3719		cur = 0;
3720
3721	txr->tx_avail--;
3722	txr->next_avail_desc = cur;
3723}
3724
3725
3726/**********************************************************************
3727 *
3728 *  Setup work for hardware segmentation offload (TSO)
3729 *
3730 **********************************************************************/
3731static void
3732em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3733    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3734{
3735	struct adapter			*adapter = txr->adapter;
3736	struct e1000_context_desc	*TXD;
3737	struct em_buffer		*tx_buffer;
3738	int cur, hdr_len;
3739
3740	/*
3741	 * In theory we can use the same TSO context if and only if
3742	 * frame is the same type(IP/TCP) and the same MSS. However
3743	 * checking whether a frame has the same IP/TCP structure is
3744	 * hard thing so just ignore that and always restablish a
3745	 * new TSO context.
3746	 */
3747	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3748	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3749		      E1000_TXD_DTYP_D |	/* Data descr type */
3750		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3751
3752	/* IP and/or TCP header checksum calculation and insertion. */
3753	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3754
3755	cur = txr->next_avail_desc;
3756	tx_buffer = &txr->tx_buffers[cur];
3757	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3758
3759	/*
3760	 * Start offset for header checksum calculation.
3761	 * End offset for header checksum calculation.
3762	 * Offset of place put the checksum.
3763	 */
3764	TXD->lower_setup.ip_fields.ipcss = ip_off;
3765	TXD->lower_setup.ip_fields.ipcse =
3766	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3767	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3768	/*
3769	 * Start offset for payload checksum calculation.
3770	 * End offset for payload checksum calculation.
3771	 * Offset of place to put the checksum.
3772	 */
3773	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3774	TXD->upper_setup.tcp_fields.tucse = 0;
3775	TXD->upper_setup.tcp_fields.tucso =
3776	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3777	/*
3778	 * Payload size per packet w/o any headers.
3779	 * Length of all headers up to payload.
3780	 */
3781	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3782	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3783
3784	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3785				E1000_TXD_CMD_DEXT |	/* Extended descr */
3786				E1000_TXD_CMD_TSE |	/* TSE context */
3787				E1000_TXD_CMD_IP |	/* Do IP csum */
3788				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3789				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3790
3791	tx_buffer->m_head = NULL;
3792	tx_buffer->next_eop = -1;
3793
3794	if (++cur == adapter->num_tx_desc)
3795		cur = 0;
3796
3797	txr->tx_avail--;
3798	txr->next_avail_desc = cur;
3799	txr->tx_tso = TRUE;
3800}
3801
3802
3803/**********************************************************************
3804 *
3805 *  Examine each tx_buffer in the used queue. If the hardware is done
3806 *  processing the packet then free associated resources. The
3807 *  tx_buffer is put back on the free queue.
3808 *
3809 **********************************************************************/
3810static void
3811em_txeof(struct tx_ring *txr)
3812{
3813	struct adapter	*adapter = txr->adapter;
3814        int first, last, done, processed;
3815        struct em_buffer *tx_buffer;
3816        struct e1000_tx_desc   *tx_desc, *eop_desc;
3817	struct ifnet   *ifp = adapter->ifp;
3818
3819	EM_TX_LOCK_ASSERT(txr);
3820#ifdef DEV_NETMAP
3821	if (ifp->if_capenable & IFCAP_NETMAP) {
3822		struct netmap_adapter *na = NA(ifp);
3823
3824		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3825		EM_TX_UNLOCK(txr);
3826		EM_CORE_LOCK(adapter);
3827		selwakeuppri(&na->tx_si, PI_NET);
3828		EM_CORE_UNLOCK(adapter);
3829		EM_TX_LOCK(txr);
3830		return;
3831	}
3832#endif /* DEV_NETMAP */
3833
3834	/* No work, make sure watchdog is off */
3835        if (txr->tx_avail == adapter->num_tx_desc) {
3836		txr->queue_status = EM_QUEUE_IDLE;
3837                return;
3838	}
3839
3840	processed = 0;
3841        first = txr->next_to_clean;
3842        tx_desc = &txr->tx_base[first];
3843        tx_buffer = &txr->tx_buffers[first];
3844	last = tx_buffer->next_eop;
3845        eop_desc = &txr->tx_base[last];
3846
3847	/*
3848	 * What this does is get the index of the
3849	 * first descriptor AFTER the EOP of the
3850	 * first packet, that way we can do the
3851	 * simple comparison on the inner while loop.
3852	 */
3853	if (++last == adapter->num_tx_desc)
3854 		last = 0;
3855	done = last;
3856
3857        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3858            BUS_DMASYNC_POSTREAD);
3859
3860        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3861		/* We clean the range of the packet */
3862		while (first != done) {
3863                	tx_desc->upper.data = 0;
3864                	tx_desc->lower.data = 0;
3865                	tx_desc->buffer_addr = 0;
3866                	++txr->tx_avail;
3867			++processed;
3868
3869			if (tx_buffer->m_head) {
3870				bus_dmamap_sync(txr->txtag,
3871				    tx_buffer->map,
3872				    BUS_DMASYNC_POSTWRITE);
3873				bus_dmamap_unload(txr->txtag,
3874				    tx_buffer->map);
3875                        	m_freem(tx_buffer->m_head);
3876                        	tx_buffer->m_head = NULL;
3877                	}
3878			tx_buffer->next_eop = -1;
3879			txr->watchdog_time = ticks;
3880
3881	                if (++first == adapter->num_tx_desc)
3882				first = 0;
3883
3884	                tx_buffer = &txr->tx_buffers[first];
3885			tx_desc = &txr->tx_base[first];
3886		}
3887		++ifp->if_opackets;
3888		/* See if we can continue to the next packet */
3889		last = tx_buffer->next_eop;
3890		if (last != -1) {
3891        		eop_desc = &txr->tx_base[last];
3892			/* Get new done point */
3893			if (++last == adapter->num_tx_desc) last = 0;
3894			done = last;
3895		} else
3896			break;
3897        }
3898        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3899            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3900
3901        txr->next_to_clean = first;
3902
3903	/*
3904	** Watchdog calculation, we know there's
3905	** work outstanding or the first return
3906	** would have been taken, so none processed
3907	** for too long indicates a hang. local timer
3908	** will examine this and do a reset if needed.
3909	*/
3910	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3911		txr->queue_status = EM_QUEUE_HUNG;
3912
3913        /*
3914         * If we have a minimum free, clear IFF_DRV_OACTIVE
3915         * to tell the stack that it is OK to send packets.
3916	 * Notice that all writes of OACTIVE happen under the
3917	 * TX lock which, with a single queue, guarantees
3918	 * sanity.
3919         */
3920        if (txr->tx_avail >= EM_MAX_SCATTER)
3921		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3922
3923	/* Disable watchdog if all clean */
3924	if (txr->tx_avail == adapter->num_tx_desc) {
3925		txr->queue_status = EM_QUEUE_IDLE;
3926	}
3927}
3928
3929
3930/*********************************************************************
3931 *
3932 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3933 *
3934 **********************************************************************/
3935static void
3936em_refresh_mbufs(struct rx_ring *rxr, int limit)
3937{
3938	struct adapter		*adapter = rxr->adapter;
3939	struct mbuf		*m;
3940	bus_dma_segment_t	segs[1];
3941	struct em_buffer	*rxbuf;
3942	int			i, j, error, nsegs;
3943	bool			cleaned = FALSE;
3944
3945	i = j = rxr->next_to_refresh;
3946	/*
3947	** Get one descriptor beyond
3948	** our work mark to control
3949	** the loop.
3950	*/
3951	if (++j == adapter->num_rx_desc)
3952		j = 0;
3953
3954	while (j != limit) {
3955		rxbuf = &rxr->rx_buffers[i];
3956		if (rxbuf->m_head == NULL) {
3957			m = m_getjcl(M_DONTWAIT, MT_DATA,
3958			    M_PKTHDR, adapter->rx_mbuf_sz);
3959			/*
3960			** If we have a temporary resource shortage
3961			** that causes a failure, just abort refresh
3962			** for now, we will return to this point when
3963			** reinvoked from em_rxeof.
3964			*/
3965			if (m == NULL)
3966				goto update;
3967		} else
3968			m = rxbuf->m_head;
3969
3970		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3971		m->m_flags |= M_PKTHDR;
3972		m->m_data = m->m_ext.ext_buf;
3973
3974		/* Use bus_dma machinery to setup the memory mapping  */
3975		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3976		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3977		if (error != 0) {
3978			printf("Refresh mbufs: hdr dmamap load"
3979			    " failure - %d\n", error);
3980			m_free(m);
3981			rxbuf->m_head = NULL;
3982			goto update;
3983		}
3984		rxbuf->m_head = m;
3985		bus_dmamap_sync(rxr->rxtag,
3986		    rxbuf->map, BUS_DMASYNC_PREREAD);
3987		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3988		cleaned = TRUE;
3989
3990		i = j; /* Next is precalulated for us */
3991		rxr->next_to_refresh = i;
3992		/* Calculate next controlling index */
3993		if (++j == adapter->num_rx_desc)
3994			j = 0;
3995	}
3996update:
3997	/*
3998	** Update the tail pointer only if,
3999	** and as far as we have refreshed.
4000	*/
4001	if (cleaned)
4002		E1000_WRITE_REG(&adapter->hw,
4003		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4004
4005	return;
4006}
4007
4008
4009/*********************************************************************
4010 *
4011 *  Allocate memory for rx_buffer structures. Since we use one
4012 *  rx_buffer per received packet, the maximum number of rx_buffer's
4013 *  that we'll need is equal to the number of receive descriptors
4014 *  that we've allocated.
4015 *
4016 **********************************************************************/
4017static int
4018em_allocate_receive_buffers(struct rx_ring *rxr)
4019{
4020	int i;
4021	struct adapter		*adapter = rxr->adapter;
4022	device_t		dev = adapter->dev;
4023	struct em_buffer	*rxbuf;
4024	int			error;
4025
4026	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4027	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4028	if (rxr->rx_buffers == NULL) {
4029		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4030		return (ENOMEM);
4031	}
4032
4033	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4034				1, 0,			/* alignment, bounds */
4035				BUS_SPACE_MAXADDR,	/* lowaddr */
4036				BUS_SPACE_MAXADDR,	/* highaddr */
4037				NULL, NULL,		/* filter, filterarg */
4038				MJUM9BYTES,		/* maxsize */
4039				1,			/* nsegments */
4040				MJUM9BYTES,		/* maxsegsize */
4041				0,			/* flags */
4042				NULL,			/* lockfunc */
4043				NULL,			/* lockarg */
4044				&rxr->rxtag);
4045	if (error) {
4046		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4047		    __func__, error);
4048		goto fail;
4049	}
4050
4051	rxbuf = rxr->rx_buffers;
4052	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4053		rxbuf = &rxr->rx_buffers[i];
4054		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4055		    &rxbuf->map);
4056		if (error) {
4057			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4058			    __func__, error);
4059			goto fail;
4060		}
4061	}
4062
4063	return (0);
4064
4065fail:
4066	em_free_receive_structures(adapter);
4067	return (error);
4068}
4069
4070
4071/*********************************************************************
4072 *
4073 *  Initialize a receive ring and its buffers.
4074 *
4075 **********************************************************************/
4076static int
4077em_setup_receive_ring(struct rx_ring *rxr)
4078{
4079	int i, j;
4080	struct	adapter 	*adapter = rxr->adapter;
4081	struct em_buffer	*rxbuf;
4082	bus_dma_segment_t	seg[1];
4083	int			rsize, nsegs, error = 0;
4084#ifdef DEV_NETMAP
4085	struct netmap_adapter *na = NA(adapter->ifp);
4086	struct netmap_slot *slot;
4087#endif
4088
4089
4090	/* Clear the ring contents */
4091	EM_RX_LOCK(rxr);
4092	rsize = roundup2(adapter->num_rx_desc *
4093	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4094	bzero((void *)rxr->rx_base, rsize);
4095#ifdef DEV_NETMAP
4096	slot = netmap_reset(na, NR_RX, 0, 0);
4097#endif
4098
4099	/*
4100	** Free current RX buffer structs and their mbufs
4101	*/
4102	for (i = 0; i < adapter->num_rx_desc; i++) {
4103		rxbuf = &rxr->rx_buffers[i];
4104		if (rxbuf->m_head != NULL) {
4105			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4106			    BUS_DMASYNC_POSTREAD);
4107			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4108			m_freem(rxbuf->m_head);
4109			rxbuf->m_head = NULL; /* mark as freed */
4110		}
4111	}
4112
4113	/* Now replenish the mbufs */
4114        for (j = 0; j != adapter->num_rx_desc; ++j) {
4115		rxbuf = &rxr->rx_buffers[j];
4116#ifdef DEV_NETMAP
4117		if (slot) {
4118			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4119			uint64_t paddr;
4120			void *addr;
4121
4122			addr = PNMB(slot + si, &paddr);
4123			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4124			/* Update descriptor */
4125			rxr->rx_base[j].buffer_addr = htole64(paddr);
4126			continue;
4127		}
4128#endif /* DEV_NETMAP */
4129		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4130		    M_PKTHDR, adapter->rx_mbuf_sz);
4131		if (rxbuf->m_head == NULL) {
4132			error = ENOBUFS;
4133			goto fail;
4134		}
4135		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4136		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4137		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4138
4139		/* Get the memory mapping */
4140		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4141		    rxbuf->map, rxbuf->m_head, seg,
4142		    &nsegs, BUS_DMA_NOWAIT);
4143		if (error != 0) {
4144			m_freem(rxbuf->m_head);
4145			rxbuf->m_head = NULL;
4146			goto fail;
4147		}
4148		bus_dmamap_sync(rxr->rxtag,
4149		    rxbuf->map, BUS_DMASYNC_PREREAD);
4150
4151		/* Update descriptor */
4152		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4153	}
4154	rxr->next_to_check = 0;
4155	rxr->next_to_refresh = 0;
4156	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4157	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4158
4159fail:
4160	EM_RX_UNLOCK(rxr);
4161	return (error);
4162}
4163
4164/*********************************************************************
4165 *
4166 *  Initialize all receive rings.
4167 *
4168 **********************************************************************/
4169static int
4170em_setup_receive_structures(struct adapter *adapter)
4171{
4172	int i, n;
4173	struct rx_ring *rxr = adapter->rx_rings;
4174	int q;
4175
4176	for (q = 0; q < adapter->num_queues; q++, rxr++)
4177		if (em_setup_receive_ring(rxr))
4178			goto fail;
4179
4180	return (0);
4181fail:
4182	/*
4183	 * Free RX buffers allocated so far, we will only handle
4184	 * the rings that completed, the failing case will have
4185	 * cleaned up for itself. 'q' failed, so its the terminus.
4186	 */
4187	for (i = 0; i < q; ++i) {
4188		rxr = &adapter->rx_rings[i];
4189		for (n = 0; n < adapter->num_rx_desc; n++) {
4190			struct em_buffer *rxbuf;
4191			rxbuf = &rxr->rx_buffers[n];
4192			if (rxbuf->m_head != NULL) {
4193				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4194			  	  BUS_DMASYNC_POSTREAD);
4195				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4196				m_freem(rxbuf->m_head);
4197				rxbuf->m_head = NULL;
4198			}
4199		}
4200		rxr->next_to_check = 0;
4201		rxr->next_to_refresh = 0;
4202	}
4203
4204	return (ENOBUFS);
4205}
4206
4207/*********************************************************************
4208 *
4209 *  Free all receive rings.
4210 *
4211 **********************************************************************/
4212static void
4213em_free_receive_structures(struct adapter *adapter)
4214{
4215	int i;
4216	struct rx_ring *rxr = adapter->rx_rings;
4217
4218	for (i = 0; i < adapter->num_queues; i++, rxr++) {
4219		em_free_receive_buffers(rxr);
4220		/* Free the ring memory as well */
4221		em_dma_free(adapter, &rxr->rxdma);
4222		EM_RX_LOCK_DESTROY(rxr);
4223	}
4224
4225	free(adapter->rx_rings, M_DEVBUF);
4226}
4227
4228
4229/*********************************************************************
4230 *
4231 *  Free receive ring data structures
4232 *
4233 **********************************************************************/
4234static void
4235em_free_receive_buffers(struct rx_ring *rxr)
4236{
4237	struct adapter		*adapter = rxr->adapter;
4238	struct em_buffer	*rxbuf = NULL;
4239
4240	INIT_DEBUGOUT("free_receive_buffers: begin");
4241
4242	if (rxr->rx_buffers != NULL) {
4243		int i;
4244		for (i = 0; i < adapter->num_rx_desc; i++) {
4245			rxbuf = &rxr->rx_buffers[i];
4246			if (rxbuf->map != NULL) {
4247				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4248				    BUS_DMASYNC_POSTREAD);
4249				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4250				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4251			}
4252			if (rxbuf->m_head != NULL) {
4253				m_freem(rxbuf->m_head);
4254				rxbuf->m_head = NULL;
4255			}
4256		}
4257		free(rxr->rx_buffers, M_DEVBUF);
4258		rxr->rx_buffers = NULL;
4259		rxr->next_to_check = 0;
4260		rxr->next_to_refresh = 0;
4261	}
4262
4263	if (rxr->rxtag != NULL) {
4264		bus_dma_tag_destroy(rxr->rxtag);
4265		rxr->rxtag = NULL;
4266	}
4267
4268	return;
4269}
4270
4271
4272/*********************************************************************
4273 *
4274 *  Enable receive unit.
4275 *
4276 **********************************************************************/
4277#define MAX_INTS_PER_SEC	8000
4278#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4279
4280static void
4281em_initialize_receive_unit(struct adapter *adapter)
4282{
4283	int i;
4284	struct rx_ring	*rxr = adapter->rx_rings;
4285	struct ifnet	*ifp = adapter->ifp;
4286	struct e1000_hw	*hw = &adapter->hw;
4287	u64	bus_addr;
4288	u32	rctl, rxcsum;
4289
4290	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4291
4292	/*
4293	 * Make sure receives are disabled while setting
4294	 * up the descriptor ring
4295	 */
4296	rctl = E1000_READ_REG(hw, E1000_RCTL);
4297	/* Do not disable if ever enabled on this hardware */
4298	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4299		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4300
4301	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4302	    adapter->rx_abs_int_delay.value);
4303	/*
4304	 * Set the interrupt throttling rate. Value is calculated
4305	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4306	 */
4307	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4308
4309	/*
4310	** When using MSIX interrupts we need to throttle
4311	** using the EITR register (82574 only)
4312	*/
4313	if (hw->mac.type == e1000_82574) {
4314		int i;
4315		for (i = 0; i < 4; i++)
4316			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4317			    DEFAULT_ITR);
4318		/* Disable accelerated acknowledge */
4319		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4320	}
4321
4322	if (ifp->if_capenable & IFCAP_RXCSUM) {
4323		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4324		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4325		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4326	}
4327
4328	/*
4329	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4330	** long latencies are observed, like Lenovo X60. This
4331	** change eliminates the problem, but since having positive
4332	** values in RDTR is a known source of problems on other
4333	** platforms another solution is being sought.
4334	*/
4335	if (hw->mac.type == e1000_82573)
4336		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4337
4338	for (i = 0; i < adapter->num_queues; i++, rxr++) {
4339		/* Setup the Base and Length of the Rx Descriptor Ring */
4340		bus_addr = rxr->rxdma.dma_paddr;
4341		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4342		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4343		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4344		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4345		/* Setup the Head and Tail Descriptor Pointers */
4346		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4347#ifdef DEV_NETMAP
4348		/*
4349		 * an init() while a netmap client is active must
4350		 * preserve the rx buffers passed to userspace.
4351		 * In this driver it means we adjust RDT to
4352		 * something different from na->num_rx_desc - 1.
4353		 */
4354		if (ifp->if_capenable & IFCAP_NETMAP) {
4355			struct netmap_adapter *na = NA(adapter->ifp);
4356			struct netmap_kring *kring = &na->rx_rings[i];
4357			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4358
4359			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4360		} else
4361#endif /* DEV_NETMAP */
4362		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4363	}
4364
4365	/* Set PTHRESH for improved jumbo performance */
4366	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4367	    (adapter->hw.mac.type == e1000_pch2lan) ||
4368	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4369	    (ifp->if_mtu > ETHERMTU)) {
4370		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4371		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4372	}
4373
4374	if (adapter->hw.mac.type == e1000_pch2lan) {
4375		if (ifp->if_mtu > ETHERMTU)
4376			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4377		else
4378			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4379	}
4380
4381	/* Setup the Receive Control Register */
4382	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4383	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4384	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4385	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4386
4387        /* Strip the CRC */
4388        rctl |= E1000_RCTL_SECRC;
4389
4390        /* Make sure VLAN Filters are off */
4391        rctl &= ~E1000_RCTL_VFE;
4392	rctl &= ~E1000_RCTL_SBP;
4393
4394	if (adapter->rx_mbuf_sz == MCLBYTES)
4395		rctl |= E1000_RCTL_SZ_2048;
4396	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4397		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4398	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4399		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4400
4401	if (ifp->if_mtu > ETHERMTU)
4402		rctl |= E1000_RCTL_LPE;
4403	else
4404		rctl &= ~E1000_RCTL_LPE;
4405
4406	/* Write out the settings */
4407	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4408
4409	return;
4410}
4411
4412
4413/*********************************************************************
4414 *
4415 *  This routine executes in interrupt context. It replenishes
4416 *  the mbufs in the descriptor and sends data which has been
4417 *  dma'ed into host memory to upper layer.
4418 *
4419 *  We loop at most count times if count is > 0, or until done if
4420 *  count < 0.
4421 *
4422 *  For polling we also now return the number of cleaned packets
4423 *********************************************************************/
4424static bool
4425em_rxeof(struct rx_ring *rxr, int count, int *done)
4426{
4427	struct adapter		*adapter = rxr->adapter;
4428	struct ifnet		*ifp = adapter->ifp;
4429	struct mbuf		*mp, *sendmp;
4430	u8			status = 0;
4431	u16 			len;
4432	int			i, processed, rxdone = 0;
4433	bool			eop;
4434	struct e1000_rx_desc	*cur;
4435
4436	EM_RX_LOCK(rxr);
4437
4438#ifdef DEV_NETMAP
4439	if (ifp->if_capenable & IFCAP_NETMAP) {
4440		struct netmap_adapter *na = NA(ifp);
4441
4442		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4443		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4444		EM_RX_UNLOCK(rxr);
4445		EM_CORE_LOCK(adapter);
4446		selwakeuppri(&na->rx_si, PI_NET);
4447		EM_CORE_UNLOCK(adapter);
4448		return (0);
4449	}
4450#endif /* DEV_NETMAP */
4451
4452	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4453
4454		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4455			break;
4456
4457		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4458		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4459
4460		cur = &rxr->rx_base[i];
4461		status = cur->status;
4462		mp = sendmp = NULL;
4463
4464		if ((status & E1000_RXD_STAT_DD) == 0)
4465			break;
4466
4467		len = le16toh(cur->length);
4468		eop = (status & E1000_RXD_STAT_EOP) != 0;
4469
4470		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4471		    (rxr->discard == TRUE)) {
4472			ifp->if_ierrors++;
4473			++rxr->rx_discarded;
4474			if (!eop) /* Catch subsequent segs */
4475				rxr->discard = TRUE;
4476			else
4477				rxr->discard = FALSE;
4478			em_rx_discard(rxr, i);
4479			goto next_desc;
4480		}
4481
4482		/* Assign correct length to the current fragment */
4483		mp = rxr->rx_buffers[i].m_head;
4484		mp->m_len = len;
4485
4486		/* Trigger for refresh */
4487		rxr->rx_buffers[i].m_head = NULL;
4488
4489		/* First segment? */
4490		if (rxr->fmp == NULL) {
4491			mp->m_pkthdr.len = len;
4492			rxr->fmp = rxr->lmp = mp;
4493		} else {
4494			/* Chain mbuf's together */
4495			mp->m_flags &= ~M_PKTHDR;
4496			rxr->lmp->m_next = mp;
4497			rxr->lmp = mp;
4498			rxr->fmp->m_pkthdr.len += len;
4499		}
4500
4501		if (eop) {
4502			--count;
4503			sendmp = rxr->fmp;
4504			sendmp->m_pkthdr.rcvif = ifp;
4505			ifp->if_ipackets++;
4506			em_receive_checksum(cur, sendmp);
4507#ifndef __NO_STRICT_ALIGNMENT
4508			if (adapter->max_frame_size >
4509			    (MCLBYTES - ETHER_ALIGN) &&
4510			    em_fixup_rx(rxr) != 0)
4511				goto skip;
4512#endif
4513			if (status & E1000_RXD_STAT_VP) {
4514				sendmp->m_pkthdr.ether_vtag =
4515				    le16toh(cur->special);
4516				sendmp->m_flags |= M_VLANTAG;
4517			}
4518#ifndef __NO_STRICT_ALIGNMENT
4519skip:
4520#endif
4521			rxr->fmp = rxr->lmp = NULL;
4522		}
4523next_desc:
4524		/* Zero out the receive descriptors status. */
4525		cur->status = 0;
4526		++rxdone;	/* cumulative for POLL */
4527		++processed;
4528
4529		/* Advance our pointers to the next descriptor. */
4530		if (++i == adapter->num_rx_desc)
4531			i = 0;
4532
4533		/* Send to the stack */
4534		if (sendmp != NULL) {
4535			rxr->next_to_check = i;
4536			EM_RX_UNLOCK(rxr);
4537			(*ifp->if_input)(ifp, sendmp);
4538			EM_RX_LOCK(rxr);
4539			i = rxr->next_to_check;
4540		}
4541
4542		/* Only refresh mbufs every 8 descriptors */
4543		if (processed == 8) {
4544			em_refresh_mbufs(rxr, i);
4545			processed = 0;
4546		}
4547	}
4548
4549	/* Catch any remaining refresh work */
4550	if (e1000_rx_unrefreshed(rxr))
4551		em_refresh_mbufs(rxr, i);
4552
4553	rxr->next_to_check = i;
4554	if (done != NULL)
4555		*done = rxdone;
4556	EM_RX_UNLOCK(rxr);
4557
4558	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4559}
4560
4561static __inline void
4562em_rx_discard(struct rx_ring *rxr, int i)
4563{
4564	struct em_buffer	*rbuf;
4565
4566	rbuf = &rxr->rx_buffers[i];
4567	/* Free any previous pieces */
4568	if (rxr->fmp != NULL) {
4569		rxr->fmp->m_flags |= M_PKTHDR;
4570		m_freem(rxr->fmp);
4571		rxr->fmp = NULL;
4572		rxr->lmp = NULL;
4573	}
4574	/*
4575	** Free buffer and allow em_refresh_mbufs()
4576	** to clean up and recharge buffer.
4577	*/
4578	if (rbuf->m_head) {
4579		m_free(rbuf->m_head);
4580		rbuf->m_head = NULL;
4581	}
4582	return;
4583}
4584
4585#ifndef __NO_STRICT_ALIGNMENT
4586/*
4587 * When jumbo frames are enabled we should realign entire payload on
4588 * architecures with strict alignment. This is serious design mistake of 8254x
4589 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4590 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4591 * payload. On architecures without strict alignment restrictions 8254x still
4592 * performs unaligned memory access which would reduce the performance too.
4593 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4594 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4595 * existing mbuf chain.
4596 *
4597 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4598 * not used at all on architectures with strict alignment.
4599 */
4600static int
4601em_fixup_rx(struct rx_ring *rxr)
4602{
4603	struct adapter *adapter = rxr->adapter;
4604	struct mbuf *m, *n;
4605	int error;
4606
4607	error = 0;
4608	m = rxr->fmp;
4609	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4610		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4611		m->m_data += ETHER_HDR_LEN;
4612	} else {
4613		MGETHDR(n, M_DONTWAIT, MT_DATA);
4614		if (n != NULL) {
4615			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4616			m->m_data += ETHER_HDR_LEN;
4617			m->m_len -= ETHER_HDR_LEN;
4618			n->m_len = ETHER_HDR_LEN;
4619			M_MOVE_PKTHDR(n, m);
4620			n->m_next = m;
4621			rxr->fmp = n;
4622		} else {
4623			adapter->dropped_pkts++;
4624			m_freem(rxr->fmp);
4625			rxr->fmp = NULL;
4626			error = ENOMEM;
4627		}
4628	}
4629
4630	return (error);
4631}
4632#endif
4633
4634/*********************************************************************
4635 *
4636 *  Verify that the hardware indicated that the checksum is valid.
4637 *  Inform the stack about the status of checksum so that stack
4638 *  doesn't spend time verifying the checksum.
4639 *
4640 *********************************************************************/
4641static void
4642em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4643{
4644	/* Ignore Checksum bit is set */
4645	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4646		mp->m_pkthdr.csum_flags = 0;
4647		return;
4648	}
4649
4650	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4651		/* Did it pass? */
4652		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4653			/* IP Checksum Good */
4654			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4655			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4656
4657		} else {
4658			mp->m_pkthdr.csum_flags = 0;
4659		}
4660	}
4661
4662	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4663		/* Did it pass? */
4664		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4665			mp->m_pkthdr.csum_flags |=
4666			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4667			mp->m_pkthdr.csum_data = htons(0xffff);
4668		}
4669	}
4670}
4671
4672/*
4673 * This routine is run via an vlan
4674 * config EVENT
4675 */
4676static void
4677em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4678{
4679	struct adapter	*adapter = ifp->if_softc;
4680	u32		index, bit;
4681
4682	if (ifp->if_softc !=  arg)   /* Not our event */
4683		return;
4684
4685	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4686                return;
4687
4688	EM_CORE_LOCK(adapter);
4689	index = (vtag >> 5) & 0x7F;
4690	bit = vtag & 0x1F;
4691	adapter->shadow_vfta[index] |= (1 << bit);
4692	++adapter->num_vlans;
4693	/* Re-init to load the changes */
4694	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4695		em_init_locked(adapter);
4696	EM_CORE_UNLOCK(adapter);
4697}
4698
4699/*
4700 * This routine is run via an vlan
4701 * unconfig EVENT
4702 */
4703static void
4704em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4705{
4706	struct adapter	*adapter = ifp->if_softc;
4707	u32		index, bit;
4708
4709	if (ifp->if_softc !=  arg)
4710		return;
4711
4712	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4713                return;
4714
4715	EM_CORE_LOCK(adapter);
4716	index = (vtag >> 5) & 0x7F;
4717	bit = vtag & 0x1F;
4718	adapter->shadow_vfta[index] &= ~(1 << bit);
4719	--adapter->num_vlans;
4720	/* Re-init to load the changes */
4721	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4722		em_init_locked(adapter);
4723	EM_CORE_UNLOCK(adapter);
4724}
4725
4726static void
4727em_setup_vlan_hw_support(struct adapter *adapter)
4728{
4729	int i;
4730	struct e1000_hw *hw = &adapter->hw;
4731	u32             reg;
4732
4733	/*
4734	** We get here thru init_locked, meaning
4735	** a soft reset, this has already cleared
4736	** the VFTA and other state, so if there
4737	** have been no vlan's registered do nothing.
4738	*/
4739	if (adapter->num_vlans == 0)
4740                return;
4741
4742	/*
4743	** A soft reset zero's out the VFTA, so
4744	** we need to repopulate it now.
4745	*/
4746	for (i = 0; i < EM_VFTA_SIZE; i++)
4747                if (adapter->shadow_vfta[i] != 0)
4748			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4749                            i, adapter->shadow_vfta[i]);
4750
4751	reg = E1000_READ_REG(hw, E1000_CTRL);
4752	reg |= E1000_CTRL_VME;
4753	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4754
4755	/* Enable the Filter Table */
4756	reg = E1000_READ_REG(hw, E1000_RCTL);
4757	reg &= ~E1000_RCTL_CFIEN;
4758	reg |= E1000_RCTL_VFE;
4759	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4760}
4761
4762static void
4763em_enable_intr(struct adapter *adapter)
4764{
4765	struct e1000_hw *hw = &adapter->hw;
4766	u32 ims_mask = IMS_ENABLE_MASK;
4767
4768	if (hw->mac.type == e1000_82574) {
4769		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4770		ims_mask |= EM_MSIX_MASK;
4771	}
4772	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4773}
4774
4775static void
4776em_disable_intr(struct adapter *adapter)
4777{
4778	struct e1000_hw *hw = &adapter->hw;
4779
4780	if (hw->mac.type == e1000_82574)
4781		E1000_WRITE_REG(hw, EM_EIAC, 0);
4782	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4783}
4784
4785/*
4786 * Bit of a misnomer, what this really means is
4787 * to enable OS management of the system... aka
4788 * to disable special hardware management features
4789 */
4790static void
4791em_init_manageability(struct adapter *adapter)
4792{
4793	/* A shared code workaround */
4794#define E1000_82542_MANC2H E1000_MANC2H
4795	if (adapter->has_manage) {
4796		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4797		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4798
4799		/* disable hardware interception of ARP */
4800		manc &= ~(E1000_MANC_ARP_EN);
4801
4802                /* enable receiving management packets to the host */
4803		manc |= E1000_MANC_EN_MNG2HOST;
4804#define E1000_MNG2HOST_PORT_623 (1 << 5)
4805#define E1000_MNG2HOST_PORT_664 (1 << 6)
4806		manc2h |= E1000_MNG2HOST_PORT_623;
4807		manc2h |= E1000_MNG2HOST_PORT_664;
4808		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4809		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4810	}
4811}
4812
4813/*
4814 * Give control back to hardware management
4815 * controller if there is one.
4816 */
4817static void
4818em_release_manageability(struct adapter *adapter)
4819{
4820	if (adapter->has_manage) {
4821		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4822
4823		/* re-enable hardware interception of ARP */
4824		manc |= E1000_MANC_ARP_EN;
4825		manc &= ~E1000_MANC_EN_MNG2HOST;
4826
4827		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4828	}
4829}
4830
4831/*
4832 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4833 * For ASF and Pass Through versions of f/w this means
4834 * that the driver is loaded. For AMT version type f/w
4835 * this means that the network i/f is open.
4836 */
4837static void
4838em_get_hw_control(struct adapter *adapter)
4839{
4840	u32 ctrl_ext, swsm;
4841
4842	if (adapter->hw.mac.type == e1000_82573) {
4843		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4844		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4845		    swsm | E1000_SWSM_DRV_LOAD);
4846		return;
4847	}
4848	/* else */
4849	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4850	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4851	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4852	return;
4853}
4854
4855/*
4856 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4857 * For ASF and Pass Through versions of f/w this means that
4858 * the driver is no longer loaded. For AMT versions of the
4859 * f/w this means that the network i/f is closed.
4860 */
4861static void
4862em_release_hw_control(struct adapter *adapter)
4863{
4864	u32 ctrl_ext, swsm;
4865
4866	if (!adapter->has_manage)
4867		return;
4868
4869	if (adapter->hw.mac.type == e1000_82573) {
4870		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4871		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4872		    swsm & ~E1000_SWSM_DRV_LOAD);
4873		return;
4874	}
4875	/* else */
4876	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4877	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4878	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4879	return;
4880}
4881
4882static int
4883em_is_valid_ether_addr(u8 *addr)
4884{
4885	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4886
4887	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4888		return (FALSE);
4889	}
4890
4891	return (TRUE);
4892}
4893
4894/*
4895** Parse the interface capabilities with regard
4896** to both system management and wake-on-lan for
4897** later use.
4898*/
4899static void
4900em_get_wakeup(device_t dev)
4901{
4902	struct adapter	*adapter = device_get_softc(dev);
4903	u16		eeprom_data = 0, device_id, apme_mask;
4904
4905	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4906	apme_mask = EM_EEPROM_APME;
4907
4908	switch (adapter->hw.mac.type) {
4909	case e1000_82573:
4910	case e1000_82583:
4911		adapter->has_amt = TRUE;
4912		/* Falls thru */
4913	case e1000_82571:
4914	case e1000_82572:
4915	case e1000_80003es2lan:
4916		if (adapter->hw.bus.func == 1) {
4917			e1000_read_nvm(&adapter->hw,
4918			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4919			break;
4920		} else
4921			e1000_read_nvm(&adapter->hw,
4922			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4923		break;
4924	case e1000_ich8lan:
4925	case e1000_ich9lan:
4926	case e1000_ich10lan:
4927	case e1000_pchlan:
4928	case e1000_pch2lan:
4929		apme_mask = E1000_WUC_APME;
4930		adapter->has_amt = TRUE;
4931		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4932		break;
4933	default:
4934		e1000_read_nvm(&adapter->hw,
4935		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4936		break;
4937	}
4938	if (eeprom_data & apme_mask)
4939		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4940	/*
4941         * We have the eeprom settings, now apply the special cases
4942         * where the eeprom may be wrong or the board won't support
4943         * wake on lan on a particular port
4944	 */
4945	device_id = pci_get_device(dev);
4946        switch (device_id) {
4947	case E1000_DEV_ID_82571EB_FIBER:
4948		/* Wake events only supported on port A for dual fiber
4949		 * regardless of eeprom setting */
4950		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4951		    E1000_STATUS_FUNC_1)
4952			adapter->wol = 0;
4953		break;
4954	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4955	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4956	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4957                /* if quad port adapter, disable WoL on all but port A */
4958		if (global_quad_port_a != 0)
4959			adapter->wol = 0;
4960		/* Reset for multiple quad port adapters */
4961		if (++global_quad_port_a == 4)
4962			global_quad_port_a = 0;
4963                break;
4964	}
4965	return;
4966}
4967
4968
4969/*
4970 * Enable PCI Wake On Lan capability
4971 */
4972static void
4973em_enable_wakeup(device_t dev)
4974{
4975	struct adapter	*adapter = device_get_softc(dev);
4976	struct ifnet	*ifp = adapter->ifp;
4977	u32		pmc, ctrl, ctrl_ext, rctl;
4978	u16     	status;
4979
4980	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4981		return;
4982
4983	/* Advertise the wakeup capability */
4984	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4985	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4986	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4987	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4988
4989	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4990	    (adapter->hw.mac.type == e1000_pchlan) ||
4991	    (adapter->hw.mac.type == e1000_ich9lan) ||
4992	    (adapter->hw.mac.type == e1000_ich10lan))
4993		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4994
4995	/* Keep the laser running on Fiber adapters */
4996	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4997	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4998		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4999		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5000		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5001	}
5002
5003	/*
5004	** Determine type of Wakeup: note that wol
5005	** is set with all bits on by default.
5006	*/
5007	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5008		adapter->wol &= ~E1000_WUFC_MAG;
5009
5010	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5011		adapter->wol &= ~E1000_WUFC_MC;
5012	else {
5013		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5014		rctl |= E1000_RCTL_MPE;
5015		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5016	}
5017
5018	if ((adapter->hw.mac.type == e1000_pchlan) ||
5019	    (adapter->hw.mac.type == e1000_pch2lan)) {
5020		if (em_enable_phy_wakeup(adapter))
5021			return;
5022	} else {
5023		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5024		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5025	}
5026
5027	if (adapter->hw.phy.type == e1000_phy_igp_3)
5028		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5029
5030        /* Request PME */
5031        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5032	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5033	if (ifp->if_capenable & IFCAP_WOL)
5034		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5035        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5036
5037	return;
5038}
5039
5040/*
5041** WOL in the newer chipset interfaces (pchlan)
5042** require thing to be copied into the phy
5043*/
5044static int
5045em_enable_phy_wakeup(struct adapter *adapter)
5046{
5047	int i;
5048	struct e1000_hw *hw = &adapter->hw;
5049	u32 mreg, ret = 0;
5050	u16 preg;
5051
5052	/* copy MAC RARs to PHY RARs */
5053	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5054
5055	/* copy MAC MTA to PHY MTA */
5056	for (i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5057		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5058		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5059		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5060		    (u16)((mreg >> 16) & 0xFFFF));
5061	}
5062
5063	/* configure PHY Rx Control register */
5064	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5065	mreg = E1000_READ_REG(hw, E1000_RCTL);
5066	if (mreg & E1000_RCTL_UPE)
5067		preg |= BM_RCTL_UPE;
5068	if (mreg & E1000_RCTL_MPE)
5069		preg |= BM_RCTL_MPE;
5070	preg &= ~(BM_RCTL_MO_MASK);
5071	if (mreg & E1000_RCTL_MO_3)
5072		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5073				<< BM_RCTL_MO_SHIFT);
5074	if (mreg & E1000_RCTL_BAM)
5075		preg |= BM_RCTL_BAM;
5076	if (mreg & E1000_RCTL_PMCF)
5077		preg |= BM_RCTL_PMCF;
5078	mreg = E1000_READ_REG(hw, E1000_CTRL);
5079	if (mreg & E1000_CTRL_RFCE)
5080		preg |= BM_RCTL_RFCE;
5081	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5082
5083	/* enable PHY wakeup in MAC register */
5084	E1000_WRITE_REG(hw, E1000_WUC,
5085	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5086	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5087
5088	/* configure and enable PHY wakeup in PHY registers */
5089	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5090	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5091
5092	/* activate PHY wakeup */
5093	ret = hw->phy.ops.acquire(hw);
5094	if (ret) {
5095		printf("Could not acquire PHY\n");
5096		return ret;
5097	}
5098	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5099	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5100	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5101	if (ret) {
5102		printf("Could not read PHY page 769\n");
5103		goto out;
5104	}
5105	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5106	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5107	if (ret)
5108		printf("Could not set PHY Host Wakeup bit\n");
5109out:
5110	hw->phy.ops.release(hw);
5111
5112	return ret;
5113}
5114
5115static void
5116em_led_func(void *arg, int onoff)
5117{
5118	struct adapter	*adapter = arg;
5119
5120	EM_CORE_LOCK(adapter);
5121	if (onoff) {
5122		e1000_setup_led(&adapter->hw);
5123		e1000_led_on(&adapter->hw);
5124	} else {
5125		e1000_led_off(&adapter->hw);
5126		e1000_cleanup_led(&adapter->hw);
5127	}
5128	EM_CORE_UNLOCK(adapter);
5129}
5130
5131/*
5132** Disable the L0S and L1 LINK states
5133*/
5134static void
5135em_disable_aspm(struct adapter *adapter)
5136{
5137	int		base, reg;
5138	u16		link_cap,link_ctrl;
5139	device_t	dev = adapter->dev;
5140
5141	switch (adapter->hw.mac.type) {
5142		case e1000_82573:
5143		case e1000_82574:
5144		case e1000_82583:
5145			break;
5146		default:
5147			return;
5148	}
5149	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5150		return;
5151	reg = base + PCIR_EXPRESS_LINK_CAP;
5152	link_cap = pci_read_config(dev, reg, 2);
5153	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5154		return;
5155	reg = base + PCIR_EXPRESS_LINK_CTL;
5156	link_ctrl = pci_read_config(dev, reg, 2);
5157	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5158	pci_write_config(dev, reg, link_ctrl, 2);
5159	return;
5160}
5161
5162/**********************************************************************
5163 *
5164 *  Update the board statistics counters.
5165 *
5166 **********************************************************************/
5167static void
5168em_update_stats_counters(struct adapter *adapter)
5169{
5170	struct ifnet   *ifp;
5171
5172	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5173	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5174		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5175		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5176	}
5177	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5178	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5179	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5180	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5181
5182	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5183	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5184	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5185	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5186	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5187	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5188	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5189	/*
5190	** For watchdog management we need to know if we have been
5191	** paused during the last interval, so capture that here.
5192	*/
5193	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5194	adapter->stats.xoffrxc += adapter->pause_frames;
5195	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5196	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5197	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5198	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5199	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5200	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5201	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5202	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5203	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5204	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5205	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5206	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5207
5208	/* For the 64-bit byte counters the low dword must be read first. */
5209	/* Both registers clear on the read of the high dword */
5210
5211	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5212	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5213	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5214	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5215
5216	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5217	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5218	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5219	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5220	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5221
5222	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5223	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5224
5225	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5226	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5227	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5228	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5229	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5230	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5231	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5232	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5233	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5234	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5235
5236	/* Interrupt Counts */
5237
5238	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5239	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5240	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5241	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5242	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5243	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5244	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5245	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5246	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5247
5248	if (adapter->hw.mac.type >= e1000_82543) {
5249		adapter->stats.algnerrc +=
5250		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5251		adapter->stats.rxerrc +=
5252		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5253		adapter->stats.tncrs +=
5254		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5255		adapter->stats.cexterr +=
5256		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5257		adapter->stats.tsctc +=
5258		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5259		adapter->stats.tsctfc +=
5260		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5261	}
5262	ifp = adapter->ifp;
5263
5264	ifp->if_collisions = adapter->stats.colc;
5265
5266	/* Rx Errors */
5267	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5268	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5269	    adapter->stats.ruc + adapter->stats.roc +
5270	    adapter->stats.mpc + adapter->stats.cexterr;
5271
5272	/* Tx Errors */
5273	ifp->if_oerrors = adapter->stats.ecol +
5274	    adapter->stats.latecol + adapter->watchdog_events;
5275}
5276
5277/* Export a single 32-bit register via a read-only sysctl. */
5278static int
5279em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5280{
5281	struct adapter *adapter;
5282	u_int val;
5283
5284#ifndef __HAIKU__
5285	adapter = oidp->oid_arg1;
5286	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5287#endif
5288	return (sysctl_handle_int(oidp, &val, 0, req));
5289}
5290
5291/*
5292 * Add sysctl variables, one per statistic, to the system.
5293 */
5294static void
5295em_add_hw_stats(struct adapter *adapter)
5296{
5297	int i;
5298	device_t dev = adapter->dev;
5299
5300	struct tx_ring *txr = adapter->tx_rings;
5301	struct rx_ring *rxr = adapter->rx_rings;
5302
5303	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5304	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5305	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5306	struct e1000_hw_stats *stats = &adapter->stats;
5307
5308	struct sysctl_oid *stat_node, *queue_node, *int_node;
5309	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5310
5311#define QUEUE_NAME_LEN 32
5312	char namebuf[QUEUE_NAME_LEN];
5313
5314	/* Driver Statistics */
5315	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5316			CTLFLAG_RD, &adapter->link_irq,
5317			"Link MSIX IRQ Handled");
5318	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5319			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5320			 "Std mbuf failed");
5321	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5322			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5323			 "Std mbuf cluster failed");
5324	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5325			CTLFLAG_RD, &adapter->dropped_pkts,
5326			"Driver dropped packets");
5327	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5328			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5329			"Driver tx dma failure in xmit");
5330	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5331			CTLFLAG_RD, &adapter->rx_overruns,
5332			"RX overruns");
5333	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5334			CTLFLAG_RD, &adapter->watchdog_events,
5335			"Watchdog timeouts");
5336
5337	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5338			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5339			em_sysctl_reg_handler, "IU",
5340			"Device Control Register");
5341	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5342			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5343			em_sysctl_reg_handler, "IU",
5344			"Receiver Control Register");
5345	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5346			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5347			"Flow Control High Watermark");
5348	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5349			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5350			"Flow Control Low Watermark");
5351
5352	for (i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5353		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5354		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5355					    CTLFLAG_RD, NULL, "Queue Name");
5356		queue_list = SYSCTL_CHILDREN(queue_node);
5357
5358		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5359				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5360				E1000_TDH(txr->me),
5361				em_sysctl_reg_handler, "IU",
5362 				"Transmit Descriptor Head");
5363		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5364				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5365				E1000_TDT(txr->me),
5366				em_sysctl_reg_handler, "IU",
5367 				"Transmit Descriptor Tail");
5368		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5369				CTLFLAG_RD, &txr->tx_irq,
5370				"Queue MSI-X Transmit Interrupts");
5371		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5372				CTLFLAG_RD, &txr->no_desc_avail,
5373				"Queue No Descriptor Available");
5374
5375		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5376				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5377				E1000_RDH(rxr->me),
5378				em_sysctl_reg_handler, "IU",
5379				"Receive Descriptor Head");
5380		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5381				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5382				E1000_RDT(rxr->me),
5383				em_sysctl_reg_handler, "IU",
5384				"Receive Descriptor Tail");
5385		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5386				CTLFLAG_RD, &rxr->rx_irq,
5387				"Queue MSI-X Receive Interrupts");
5388	}
5389
5390	/* MAC stats get their own sub node */
5391
5392	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5393				    CTLFLAG_RD, NULL, "Statistics");
5394	stat_list = SYSCTL_CHILDREN(stat_node);
5395
5396	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5397			CTLFLAG_RD, &stats->ecol,
5398			"Excessive collisions");
5399	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5400			CTLFLAG_RD, &stats->scc,
5401			"Single collisions");
5402	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5403			CTLFLAG_RD, &stats->mcc,
5404			"Multiple collisions");
5405	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5406			CTLFLAG_RD, &stats->latecol,
5407			"Late collisions");
5408	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5409			CTLFLAG_RD, &stats->colc,
5410			"Collision Count");
5411	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5412			CTLFLAG_RD, &adapter->stats.symerrs,
5413			"Symbol Errors");
5414	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5415			CTLFLAG_RD, &adapter->stats.sec,
5416			"Sequence Errors");
5417	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5418			CTLFLAG_RD, &adapter->stats.dc,
5419			"Defer Count");
5420	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5421			CTLFLAG_RD, &adapter->stats.mpc,
5422			"Missed Packets");
5423	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5424			CTLFLAG_RD, &adapter->stats.rnbc,
5425			"Receive No Buffers");
5426	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5427			CTLFLAG_RD, &adapter->stats.ruc,
5428			"Receive Undersize");
5429	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5430			CTLFLAG_RD, &adapter->stats.rfc,
5431			"Fragmented Packets Received ");
5432	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5433			CTLFLAG_RD, &adapter->stats.roc,
5434			"Oversized Packets Received");
5435	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5436			CTLFLAG_RD, &adapter->stats.rjc,
5437			"Recevied Jabber");
5438	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5439			CTLFLAG_RD, &adapter->stats.rxerrc,
5440			"Receive Errors");
5441	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5442			CTLFLAG_RD, &adapter->stats.crcerrs,
5443			"CRC errors");
5444	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5445			CTLFLAG_RD, &adapter->stats.algnerrc,
5446			"Alignment Errors");
5447	/* On 82575 these are collision counts */
5448	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5449			CTLFLAG_RD, &adapter->stats.cexterr,
5450			"Collision/Carrier extension errors");
5451	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5452			CTLFLAG_RD, &adapter->stats.xonrxc,
5453			"XON Received");
5454	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5455			CTLFLAG_RD, &adapter->stats.xontxc,
5456			"XON Transmitted");
5457	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5458			CTLFLAG_RD, &adapter->stats.xoffrxc,
5459			"XOFF Received");
5460	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5461			CTLFLAG_RD, &adapter->stats.xofftxc,
5462			"XOFF Transmitted");
5463
5464	/* Packet Reception Stats */
5465	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5466			CTLFLAG_RD, &adapter->stats.tpr,
5467			"Total Packets Received ");
5468	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5469			CTLFLAG_RD, &adapter->stats.gprc,
5470			"Good Packets Received");
5471	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5472			CTLFLAG_RD, &adapter->stats.bprc,
5473			"Broadcast Packets Received");
5474	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5475			CTLFLAG_RD, &adapter->stats.mprc,
5476			"Multicast Packets Received");
5477	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5478			CTLFLAG_RD, &adapter->stats.prc64,
5479			"64 byte frames received ");
5480	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5481			CTLFLAG_RD, &adapter->stats.prc127,
5482			"65-127 byte frames received");
5483	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5484			CTLFLAG_RD, &adapter->stats.prc255,
5485			"128-255 byte frames received");
5486	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5487			CTLFLAG_RD, &adapter->stats.prc511,
5488			"256-511 byte frames received");
5489	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5490			CTLFLAG_RD, &adapter->stats.prc1023,
5491			"512-1023 byte frames received");
5492	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5493			CTLFLAG_RD, &adapter->stats.prc1522,
5494			"1023-1522 byte frames received");
5495 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5496 			CTLFLAG_RD, &adapter->stats.gorc,
5497 			"Good Octets Received");
5498
5499	/* Packet Transmission Stats */
5500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5501 			CTLFLAG_RD, &adapter->stats.gotc,
5502 			"Good Octets Transmitted");
5503	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5504			CTLFLAG_RD, &adapter->stats.tpt,
5505			"Total Packets Transmitted");
5506	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5507			CTLFLAG_RD, &adapter->stats.gptc,
5508			"Good Packets Transmitted");
5509	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5510			CTLFLAG_RD, &adapter->stats.bptc,
5511			"Broadcast Packets Transmitted");
5512	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5513			CTLFLAG_RD, &adapter->stats.mptc,
5514			"Multicast Packets Transmitted");
5515	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5516			CTLFLAG_RD, &adapter->stats.ptc64,
5517			"64 byte frames transmitted ");
5518	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5519			CTLFLAG_RD, &adapter->stats.ptc127,
5520			"65-127 byte frames transmitted");
5521	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5522			CTLFLAG_RD, &adapter->stats.ptc255,
5523			"128-255 byte frames transmitted");
5524	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5525			CTLFLAG_RD, &adapter->stats.ptc511,
5526			"256-511 byte frames transmitted");
5527	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5528			CTLFLAG_RD, &adapter->stats.ptc1023,
5529			"512-1023 byte frames transmitted");
5530	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5531			CTLFLAG_RD, &adapter->stats.ptc1522,
5532			"1024-1522 byte frames transmitted");
5533	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5534			CTLFLAG_RD, &adapter->stats.tsctc,
5535			"TSO Contexts Transmitted");
5536	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5537			CTLFLAG_RD, &adapter->stats.tsctfc,
5538			"TSO Contexts Failed");
5539
5540
5541	/* Interrupt Stats */
5542
5543	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5544				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5545	int_list = SYSCTL_CHILDREN(int_node);
5546
5547	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5548			CTLFLAG_RD, &adapter->stats.iac,
5549			"Interrupt Assertion Count");
5550
5551	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5552			CTLFLAG_RD, &adapter->stats.icrxptc,
5553			"Interrupt Cause Rx Pkt Timer Expire Count");
5554
5555	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5556			CTLFLAG_RD, &adapter->stats.icrxatc,
5557			"Interrupt Cause Rx Abs Timer Expire Count");
5558
5559	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5560			CTLFLAG_RD, &adapter->stats.ictxptc,
5561			"Interrupt Cause Tx Pkt Timer Expire Count");
5562
5563	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5564			CTLFLAG_RD, &adapter->stats.ictxatc,
5565			"Interrupt Cause Tx Abs Timer Expire Count");
5566
5567	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5568			CTLFLAG_RD, &adapter->stats.ictxqec,
5569			"Interrupt Cause Tx Queue Empty Count");
5570
5571	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5572			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5573			"Interrupt Cause Tx Queue Min Thresh Count");
5574
5575	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5576			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5577			"Interrupt Cause Rx Desc Min Thresh Count");
5578
5579	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5580			CTLFLAG_RD, &adapter->stats.icrxoc,
5581			"Interrupt Cause Receiver Overrun Count");
5582}
5583
5584/**********************************************************************
5585 *
5586 *  This routine provides a way to dump out the adapter eeprom,
5587 *  often a useful debug/service tool. This only dumps the first
5588 *  32 words, stuff that matters is in that extent.
5589 *
5590 **********************************************************************/
5591static int
5592em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5593{
5594	struct adapter *adapter = (struct adapter *)arg1;
5595	int error;
5596	int result;
5597
5598	result = -1;
5599	error = sysctl_handle_int(oidp, &result, 0, req);
5600
5601	if (error || !req->newptr)
5602		return (error);
5603
5604	/*
5605	 * This value will cause a hex dump of the
5606	 * first 32 16-bit words of the EEPROM to
5607	 * the screen.
5608	 */
5609	if (result == 1)
5610		em_print_nvm_info(adapter);
5611
5612	return (error);
5613}
5614
5615static void
5616em_print_nvm_info(struct adapter *adapter)
5617{
5618	u16	eeprom_data;
5619	int	i, j, row = 0;
5620
5621	/* Its a bit crude, but it gets the job done */
5622	printf("\nInterface EEPROM Dump:\n");
5623	printf("Offset\n0x0000  ");
5624	for (i = 0, j = 0; i < 32; i++, j++) {
5625		if (j == 8) { /* Make the offset block */
5626			j = 0; ++row;
5627			printf("\n0x00%x0  ",row);
5628		}
5629		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5630		printf("%04x ", eeprom_data);
5631	}
5632	printf("\n");
5633}
5634
5635static int
5636em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5637{
5638	struct em_int_delay_info *info;
5639	struct adapter *adapter;
5640	u32 regval;
5641	int error, usecs, ticks;
5642
5643	info = (struct em_int_delay_info *)arg1;
5644	usecs = info->value;
5645	error = sysctl_handle_int(oidp, &usecs, 0, req);
5646	if (error != 0 || req->newptr == NULL)
5647		return (error);
5648	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5649		return (EINVAL);
5650	info->value = usecs;
5651	ticks = EM_USECS_TO_TICKS(usecs);
5652
5653	adapter = info->adapter;
5654
5655	EM_CORE_LOCK(adapter);
5656	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5657	regval = (regval & ~0xffff) | (ticks & 0xffff);
5658	/* Handle a few special cases. */
5659	switch (info->offset) {
5660	case E1000_RDTR:
5661		break;
5662	case E1000_TIDV:
5663		if (ticks == 0) {
5664			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5665			/* Don't write 0 into the TIDV register. */
5666			regval++;
5667		} else
5668			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5669		break;
5670	}
5671	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5672	EM_CORE_UNLOCK(adapter);
5673	return (0);
5674}
5675
5676static void
5677em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5678	const char *description, struct em_int_delay_info *info,
5679	int offset, int value)
5680{
5681	info->adapter = adapter;
5682	info->offset = offset;
5683	info->value = value;
5684	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5685	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5686	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5687	    info, 0, em_sysctl_int_delay, "I", description);
5688}
5689
5690static void
5691em_set_sysctl_value(struct adapter *adapter, const char *name,
5692	const char *description, int *limit, int value)
5693{
5694	*limit = value;
5695	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5696	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5697	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5698}
5699
5700
5701/*
5702** Set flow control using sysctl:
5703** Flow control values:
5704**      0 - off
5705**      1 - rx pause
5706**      2 - tx pause
5707**      3 - full
5708*/
5709static int
5710em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5711{
5712        int		error;
5713	static int	input = 3; /* default is full */
5714        struct adapter	*adapter = (struct adapter *) arg1;
5715
5716        error = sysctl_handle_int(oidp, &input, 0, req);
5717
5718        if ((error) || (req->newptr == NULL))
5719                return (error);
5720
5721	if (input == adapter->fc) /* no change? */
5722		return (error);
5723
5724        switch (input) {
5725                case e1000_fc_rx_pause:
5726                case e1000_fc_tx_pause:
5727                case e1000_fc_full:
5728                case e1000_fc_none:
5729                        adapter->hw.fc.requested_mode = input;
5730			adapter->fc = input;
5731                        break;
5732                default:
5733			/* Do nothing */
5734			return (error);
5735        }
5736
5737        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5738        e1000_force_mac_fc(&adapter->hw);
5739        return (error);
5740}
5741
5742/*
5743** Manage Energy Efficient Ethernet:
5744** Control values:
5745**     0/1 - enabled/disabled
5746*/
5747static int
5748em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5749{
5750       struct adapter *adapter = (struct adapter *) arg1;
5751       int             error, value;
5752
5753       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5754       error = sysctl_handle_int(oidp, &value, 0, req);
5755       if (error || req->newptr == NULL)
5756               return (error);
5757       EM_CORE_LOCK(adapter);
5758       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5759       em_init_locked(adapter);
5760       EM_CORE_UNLOCK(adapter);
5761       return (0);
5762}
5763
5764static int
5765em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5766{
5767	struct adapter *adapter;
5768	int error;
5769	int result;
5770
5771	result = -1;
5772	error = sysctl_handle_int(oidp, &result, 0, req);
5773
5774	if (error || !req->newptr)
5775		return (error);
5776
5777	if (result == 1) {
5778		adapter = (struct adapter *)arg1;
5779		em_print_debug_info(adapter);
5780        }
5781
5782	return (error);
5783}
5784
5785/*
5786** This routine is meant to be fluid, add whatever is
5787** needed for debugging a problem.  -jfv
5788*/
5789static void
5790em_print_debug_info(struct adapter *adapter)
5791{
5792	device_t dev = adapter->dev;
5793	struct tx_ring *txr = adapter->tx_rings;
5794	struct rx_ring *rxr = adapter->rx_rings;
5795
5796	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5797		printf("Interface is RUNNING ");
5798	else
5799		printf("Interface is NOT RUNNING\n");
5800
5801	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5802		printf("and INACTIVE\n");
5803	else
5804		printf("and ACTIVE\n");
5805
5806	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5807	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5808	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5809	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5810	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5811	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5812	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5813	device_printf(dev, "TX descriptors avail = %d\n",
5814	    txr->tx_avail);
5815	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5816	    txr->no_desc_avail);
5817	device_printf(dev, "RX discarded packets = %ld\n",
5818	    rxr->rx_discarded);
5819	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5820	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5821}
5822