if_em.c revision 214441
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 214441 2010-10-28 00:16:54Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.1.7";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275
276static int	em_irq_fast(void *);
277
278/* MSIX handlers */
279static void	em_msix_tx(void *);
280static void	em_msix_rx(void *);
281static void	em_msix_link(void *);
282static void	em_handle_tx(void *context, int pending);
283static void	em_handle_rx(void *context, int pending);
284static void	em_handle_link(void *context, int pending);
285
286static void	em_add_rx_process_limit(struct adapter *, const char *,
287		    const char *, int *, int);
288static void	em_set_flow_cntrl(struct adapter *, const char *,
289		    const char *, int *, int);
290
291static __inline void em_rx_discard(struct rx_ring *, int);
292
293#ifdef DEVICE_POLLING
294static poll_handler_t em_poll;
295#endif /* POLLING */
296
297/*********************************************************************
298 *  FreeBSD Device Interface Entry Points
299 *********************************************************************/
300
301static device_method_t em_methods[] = {
302	/* Device interface */
303	DEVMETHOD(device_probe, em_probe),
304	DEVMETHOD(device_attach, em_attach),
305	DEVMETHOD(device_detach, em_detach),
306	DEVMETHOD(device_shutdown, em_shutdown),
307	DEVMETHOD(device_suspend, em_suspend),
308	DEVMETHOD(device_resume, em_resume),
309	{0, 0}
310};
311
312static driver_t em_driver = {
313	"em", em_methods, sizeof(struct adapter),
314};
315
316devclass_t em_devclass;
317DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
318MODULE_DEPEND(em, pci, 1, 1, 1);
319MODULE_DEPEND(em, ether, 1, 1, 1);
320
321/*********************************************************************
322 *  Tunable default values.
323 *********************************************************************/
324
325#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
326#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
327#define M_TSO_LEN			66
328
329/* Allow common code without TSO */
330#ifndef CSUM_TSO
331#define CSUM_TSO	0
332#endif
333
334static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
335static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
336TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
337TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
338
339static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
340static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
341TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
342TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
343
344static int em_rxd = EM_DEFAULT_RXD;
345static int em_txd = EM_DEFAULT_TXD;
346TUNABLE_INT("hw.em.rxd", &em_rxd);
347TUNABLE_INT("hw.em.txd", &em_txd);
348
349static int em_smart_pwr_down = FALSE;
350TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
351
352/* Controls whether promiscuous also shows bad packets */
353static int em_debug_sbp = FALSE;
354TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
355
356static int em_enable_msix = TRUE;
357TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
358
359/* How many packets rxeof tries to clean at a time */
360static int em_rx_process_limit = 100;
361TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
362
363/* Flow control setting - default to FULL */
364static int em_fc_setting = e1000_fc_full;
365TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
366
367/* Global used in WOL setup with multiport cards */
368static int global_quad_port_a = 0;
369
370/*********************************************************************
371 *  Device identification routine
372 *
373 *  em_probe determines if the driver should be loaded on
374 *  adapter based on PCI vendor/device id of the adapter.
375 *
376 *  return BUS_PROBE_DEFAULT on success, positive on failure
377 *********************************************************************/
378
379static int
380em_probe(device_t dev)
381{
382	char		adapter_name[60];
383	u16		pci_vendor_id = 0;
384	u16		pci_device_id = 0;
385	u16		pci_subvendor_id = 0;
386	u16		pci_subdevice_id = 0;
387	em_vendor_info_t *ent;
388
389	INIT_DEBUGOUT("em_probe: begin");
390
391	pci_vendor_id = pci_get_vendor(dev);
392	if (pci_vendor_id != EM_VENDOR_ID)
393		return (ENXIO);
394
395	pci_device_id = pci_get_device(dev);
396	pci_subvendor_id = pci_get_subvendor(dev);
397	pci_subdevice_id = pci_get_subdevice(dev);
398
399	ent = em_vendor_info_array;
400	while (ent->vendor_id != 0) {
401		if ((pci_vendor_id == ent->vendor_id) &&
402		    (pci_device_id == ent->device_id) &&
403
404		    ((pci_subvendor_id == ent->subvendor_id) ||
405		    (ent->subvendor_id == PCI_ANY_ID)) &&
406
407		    ((pci_subdevice_id == ent->subdevice_id) ||
408		    (ent->subdevice_id == PCI_ANY_ID))) {
409			sprintf(adapter_name, "%s %s",
410				em_strings[ent->index],
411				em_driver_version);
412			device_set_desc_copy(dev, adapter_name);
413			return (BUS_PROBE_DEFAULT);
414		}
415		ent++;
416	}
417
418	return (ENXIO);
419}
420
421/*********************************************************************
422 *  Device initialization routine
423 *
424 *  The attach entry point is called when the driver is being loaded.
425 *  This routine identifies the type of hardware, allocates all resources
426 *  and initializes the hardware.
427 *
428 *  return 0 on success, positive on failure
429 *********************************************************************/
430
431static int
432em_attach(device_t dev)
433{
434	struct adapter	*adapter;
435	int		error = 0;
436
437	INIT_DEBUGOUT("em_attach: begin");
438
439	adapter = device_get_softc(dev);
440	adapter->dev = adapter->osdep.dev = dev;
441	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
442
443	/* SYSCTL stuff */
444	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
445	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
446	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
447	    em_sysctl_nvm_info, "I", "NVM Information");
448
449	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
450	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
451	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
452	    em_sysctl_debug_info, "I", "Debug Information");
453
454	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
455
456	/* Determine hardware and mac info */
457	em_identify_hardware(adapter);
458
459	/* Setup PCI resources */
460	if (em_allocate_pci_resources(adapter)) {
461		device_printf(dev, "Allocation of PCI resources failed\n");
462		error = ENXIO;
463		goto err_pci;
464	}
465
466	/*
467	** For ICH8 and family we need to
468	** map the flash memory, and this
469	** must happen after the MAC is
470	** identified
471	*/
472	if ((adapter->hw.mac.type == e1000_ich8lan) ||
473	    (adapter->hw.mac.type == e1000_ich9lan) ||
474	    (adapter->hw.mac.type == e1000_ich10lan) ||
475	    (adapter->hw.mac.type == e1000_pchlan) ||
476	    (adapter->hw.mac.type == e1000_pch2lan)) {
477		int rid = EM_BAR_TYPE_FLASH;
478		adapter->flash = bus_alloc_resource_any(dev,
479		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
480		if (adapter->flash == NULL) {
481			device_printf(dev, "Mapping of Flash failed\n");
482			error = ENXIO;
483			goto err_pci;
484		}
485		/* This is used in the shared code */
486		adapter->hw.flash_address = (u8 *)adapter->flash;
487		adapter->osdep.flash_bus_space_tag =
488		    rman_get_bustag(adapter->flash);
489		adapter->osdep.flash_bus_space_handle =
490		    rman_get_bushandle(adapter->flash);
491	}
492
493	/* Do Shared Code initialization */
494	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
495		device_printf(dev, "Setup of Shared code failed\n");
496		error = ENXIO;
497		goto err_pci;
498	}
499
500	e1000_get_bus_info(&adapter->hw);
501
502	/* Set up some sysctls for the tunable interrupt delays */
503	em_add_int_delay_sysctl(adapter, "rx_int_delay",
504	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
505	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
506	em_add_int_delay_sysctl(adapter, "tx_int_delay",
507	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
510	    "receive interrupt delay limit in usecs",
511	    &adapter->rx_abs_int_delay,
512	    E1000_REGISTER(&adapter->hw, E1000_RADV),
513	    em_rx_abs_int_delay_dflt);
514	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
515	    "transmit interrupt delay limit in usecs",
516	    &adapter->tx_abs_int_delay,
517	    E1000_REGISTER(&adapter->hw, E1000_TADV),
518	    em_tx_abs_int_delay_dflt);
519
520	/* Sysctl for limiting the amount of work done in the taskqueue */
521	em_add_rx_process_limit(adapter, "rx_processing_limit",
522	    "max number of rx packets to process", &adapter->rx_process_limit,
523	    em_rx_process_limit);
524
525	/* Sysctl for setting the interface flow control */
526	em_set_flow_cntrl(adapter, "flow_control",
527	    "max number of rx packets to process",
528	    &adapter->fc_setting, em_fc_setting);
529
530	/*
531	 * Validate number of transmit and receive descriptors. It
532	 * must not exceed hardware maximum, and must be multiple
533	 * of E1000_DBA_ALIGN.
534	 */
535	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
536	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
537		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
538		    EM_DEFAULT_TXD, em_txd);
539		adapter->num_tx_desc = EM_DEFAULT_TXD;
540	} else
541		adapter->num_tx_desc = em_txd;
542
543	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
544	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
545		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
546		    EM_DEFAULT_RXD, em_rxd);
547		adapter->num_rx_desc = EM_DEFAULT_RXD;
548	} else
549		adapter->num_rx_desc = em_rxd;
550
551	adapter->hw.mac.autoneg = DO_AUTO_NEG;
552	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
553	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
554
555	/* Copper options */
556	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
557		adapter->hw.phy.mdix = AUTO_ALL_MODES;
558		adapter->hw.phy.disable_polarity_correction = FALSE;
559		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
560	}
561
562	/*
563	 * Set the frame limits assuming
564	 * standard ethernet sized frames.
565	 */
566	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
567	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
568
569	/*
570	 * This controls when hardware reports transmit completion
571	 * status.
572	 */
573	adapter->hw.mac.report_tx_early = 1;
574
575	/*
576	** Get queue/ring memory
577	*/
578	if (em_allocate_queues(adapter)) {
579		error = ENOMEM;
580		goto err_pci;
581	}
582
583	/* Allocate multicast array memory. */
584	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
585	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
586	if (adapter->mta == NULL) {
587		device_printf(dev, "Can not allocate multicast setup array\n");
588		error = ENOMEM;
589		goto err_late;
590	}
591
592	/* Check SOL/IDER usage */
593	if (e1000_check_reset_block(&adapter->hw))
594		device_printf(dev, "PHY reset is blocked"
595		    " due to SOL/IDER session.\n");
596
597	/*
598	** Start from a known state, this is
599	** important in reading the nvm and
600	** mac from that.
601	*/
602	e1000_reset_hw(&adapter->hw);
603
604	/* Make sure we have a good EEPROM before we read from it */
605	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
606		/*
607		** Some PCI-E parts fail the first check due to
608		** the link being in sleep state, call it again,
609		** if it fails a second time its a real issue.
610		*/
611		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
612			device_printf(dev,
613			    "The EEPROM Checksum Is Not Valid\n");
614			error = EIO;
615			goto err_late;
616		}
617	}
618
619	/* Copy the permanent MAC address out of the EEPROM */
620	if (e1000_read_mac_addr(&adapter->hw) < 0) {
621		device_printf(dev, "EEPROM read error while reading MAC"
622		    " address\n");
623		error = EIO;
624		goto err_late;
625	}
626
627	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
628		device_printf(dev, "Invalid MAC address\n");
629		error = EIO;
630		goto err_late;
631	}
632
633	/*
634	**  Do interrupt configuration
635	*/
636	if (adapter->msix > 1) /* Do MSIX */
637		error = em_allocate_msix(adapter);
638	else  /* MSI or Legacy */
639		error = em_allocate_legacy(adapter);
640	if (error)
641		goto err_late;
642
643	/*
644	 * Get Wake-on-Lan and Management info for later use
645	 */
646	em_get_wakeup(dev);
647
648	/* Setup OS specific network interface */
649	if (em_setup_interface(dev, adapter) != 0)
650		goto err_late;
651
652	em_reset(adapter);
653
654	/* Initialize statistics */
655	em_update_stats_counters(adapter);
656
657	adapter->hw.mac.get_link_status = 1;
658	em_update_link_status(adapter);
659
660	/* Register for VLAN events */
661	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
662	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
663	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
664	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
665
666	em_add_hw_stats(adapter);
667
668	/* Non-AMT based hardware can now take control from firmware */
669	if (adapter->has_manage && !adapter->has_amt)
670		em_get_hw_control(adapter);
671
672	/* Tell the stack that the interface is not active */
673	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
674
675	adapter->led_dev = led_create(em_led_func, adapter,
676	    device_get_nameunit(dev));
677
678	INIT_DEBUGOUT("em_attach: end");
679
680	return (0);
681
682err_late:
683	em_free_transmit_structures(adapter);
684	em_free_receive_structures(adapter);
685	em_release_hw_control(adapter);
686	if (adapter->ifp != NULL)
687		if_free(adapter->ifp);
688err_pci:
689	em_free_pci_resources(adapter);
690	free(adapter->mta, M_DEVBUF);
691	EM_CORE_LOCK_DESTROY(adapter);
692
693	return (error);
694}
695
696/*********************************************************************
697 *  Device removal routine
698 *
699 *  The detach entry point is called when the driver is being removed.
700 *  This routine stops the adapter and deallocates all the resources
701 *  that were allocated for driver operation.
702 *
703 *  return 0 on success, positive on failure
704 *********************************************************************/
705
706static int
707em_detach(device_t dev)
708{
709	struct adapter	*adapter = device_get_softc(dev);
710	struct ifnet	*ifp = adapter->ifp;
711
712	INIT_DEBUGOUT("em_detach: begin");
713
714	/* Make sure VLANS are not using driver */
715	if (adapter->ifp->if_vlantrunk != NULL) {
716		device_printf(dev,"Vlan in use, detach first\n");
717		return (EBUSY);
718	}
719
720#ifdef DEVICE_POLLING
721	if (ifp->if_capenable & IFCAP_POLLING)
722		ether_poll_deregister(ifp);
723#endif
724
725	if (adapter->led_dev != NULL)
726		led_destroy(adapter->led_dev);
727
728	EM_CORE_LOCK(adapter);
729	adapter->in_detach = 1;
730	em_stop(adapter);
731	EM_CORE_UNLOCK(adapter);
732	EM_CORE_LOCK_DESTROY(adapter);
733
734	e1000_phy_hw_reset(&adapter->hw);
735
736	em_release_manageability(adapter);
737	em_release_hw_control(adapter);
738
739	/* Unregister VLAN events */
740	if (adapter->vlan_attach != NULL)
741		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
742	if (adapter->vlan_detach != NULL)
743		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
744
745	ether_ifdetach(adapter->ifp);
746	callout_drain(&adapter->timer);
747
748	em_free_pci_resources(adapter);
749	bus_generic_detach(dev);
750	if_free(ifp);
751
752	em_free_transmit_structures(adapter);
753	em_free_receive_structures(adapter);
754
755	em_release_hw_control(adapter);
756	free(adapter->mta, M_DEVBUF);
757
758	return (0);
759}
760
761/*********************************************************************
762 *
763 *  Shutdown entry point
764 *
765 **********************************************************************/
766
767static int
768em_shutdown(device_t dev)
769{
770	return em_suspend(dev);
771}
772
773/*
774 * Suspend/resume device methods.
775 */
776static int
777em_suspend(device_t dev)
778{
779	struct adapter *adapter = device_get_softc(dev);
780
781	EM_CORE_LOCK(adapter);
782
783        em_release_manageability(adapter);
784	em_release_hw_control(adapter);
785	em_enable_wakeup(dev);
786
787	EM_CORE_UNLOCK(adapter);
788
789	return bus_generic_suspend(dev);
790}
791
792static int
793em_resume(device_t dev)
794{
795	struct adapter *adapter = device_get_softc(dev);
796	struct ifnet *ifp = adapter->ifp;
797
798	EM_CORE_LOCK(adapter);
799	em_init_locked(adapter);
800	em_init_manageability(adapter);
801	EM_CORE_UNLOCK(adapter);
802	em_start(ifp);
803
804	return bus_generic_resume(dev);
805}
806
807
808/*********************************************************************
809 *  Transmit entry point
810 *
811 *  em_start is called by the stack to initiate a transmit.
812 *  The driver will remain in this routine as long as there are
813 *  packets to transmit and transmit resources are available.
814 *  In case resources are not available stack is notified and
815 *  the packet is requeued.
816 **********************************************************************/
817
818#ifdef EM_MULTIQUEUE
819static int
820em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
821{
822	struct adapter  *adapter = txr->adapter;
823        struct mbuf     *next;
824        int             err = 0, enq = 0;
825
826	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
827	    IFF_DRV_RUNNING || adapter->link_active == 0) {
828		if (m != NULL)
829			err = drbr_enqueue(ifp, txr->br, m);
830		return (err);
831	}
832
833        /* Call cleanup if number of TX descriptors low */
834	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
835		em_txeof(txr);
836
837	enq = 0;
838	if (m == NULL) {
839		next = drbr_dequeue(ifp, txr->br);
840	} else if (drbr_needs_enqueue(ifp, txr->br)) {
841		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
842			return (err);
843		next = drbr_dequeue(ifp, txr->br);
844	} else
845		next = m;
846
847	/* Process the queue */
848	while (next != NULL) {
849		if ((err = em_xmit(txr, &next)) != 0) {
850                        if (next != NULL)
851                                err = drbr_enqueue(ifp, txr->br, next);
852                        break;
853		}
854		enq++;
855		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
856		ETHER_BPF_MTAP(ifp, next);
857		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
858                        break;
859		if (txr->tx_avail < EM_MAX_SCATTER) {
860			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
861			break;
862		}
863		next = drbr_dequeue(ifp, txr->br);
864	}
865
866	if (enq > 0) {
867                /* Set the watchdog */
868                txr->queue_status = EM_QUEUE_WORKING;
869		txr->watchdog_time = ticks;
870	}
871	return (err);
872}
873
874/*
875** Multiqueue capable stack interface
876*/
877static int
878em_mq_start(struct ifnet *ifp, struct mbuf *m)
879{
880	struct adapter	*adapter = ifp->if_softc;
881	struct tx_ring	*txr = adapter->tx_rings;
882	int 		error;
883
884	if (EM_TX_TRYLOCK(txr)) {
885		error = em_mq_start_locked(ifp, txr, m);
886		EM_TX_UNLOCK(txr);
887	} else
888		error = drbr_enqueue(ifp, txr->br, m);
889
890	return (error);
891}
892
893/*
894** Flush all ring buffers
895*/
896static void
897em_qflush(struct ifnet *ifp)
898{
899	struct adapter  *adapter = ifp->if_softc;
900	struct tx_ring  *txr = adapter->tx_rings;
901	struct mbuf     *m;
902
903	for (int i = 0; i < adapter->num_queues; i++, txr++) {
904		EM_TX_LOCK(txr);
905		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
906			m_freem(m);
907		EM_TX_UNLOCK(txr);
908	}
909	if_qflush(ifp);
910}
911
912#endif /* EM_MULTIQUEUE */
913
914static void
915em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
916{
917	struct adapter	*adapter = ifp->if_softc;
918	struct mbuf	*m_head;
919
920	EM_TX_LOCK_ASSERT(txr);
921
922	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
923	    IFF_DRV_RUNNING)
924		return;
925
926	if (!adapter->link_active)
927		return;
928
929        /* Call cleanup if number of TX descriptors low */
930	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
931		em_txeof(txr);
932
933	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
934		if (txr->tx_avail < EM_MAX_SCATTER) {
935			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
936			break;
937		}
938                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
939		if (m_head == NULL)
940			break;
941		/*
942		 *  Encapsulation can modify our pointer, and or make it
943		 *  NULL on failure.  In that event, we can't requeue.
944		 */
945		if (em_xmit(txr, &m_head)) {
946			if (m_head == NULL)
947				break;
948			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
949			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
950			break;
951		}
952
953		/* Send a copy of the frame to the BPF listener */
954		ETHER_BPF_MTAP(ifp, m_head);
955
956		/* Set timeout in case hardware has problems transmitting. */
957		txr->watchdog_time = ticks;
958                txr->queue_status = EM_QUEUE_WORKING;
959	}
960
961	return;
962}
963
964static void
965em_start(struct ifnet *ifp)
966{
967	struct adapter	*adapter = ifp->if_softc;
968	struct tx_ring	*txr = adapter->tx_rings;
969
970	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
971		EM_TX_LOCK(txr);
972		em_start_locked(ifp, txr);
973		EM_TX_UNLOCK(txr);
974	}
975	return;
976}
977
978/*********************************************************************
979 *  Ioctl entry point
980 *
981 *  em_ioctl is called when the user wants to configure the
982 *  interface.
983 *
984 *  return 0 on success, positive on failure
985 **********************************************************************/
986
987static int
988em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
989{
990	struct adapter	*adapter = ifp->if_softc;
991	struct ifreq *ifr = (struct ifreq *)data;
992#ifdef INET
993	struct ifaddr *ifa = (struct ifaddr *)data;
994#endif
995	int error = 0;
996
997	if (adapter->in_detach)
998		return (error);
999
1000	switch (command) {
1001	case SIOCSIFADDR:
1002#ifdef INET
1003		if (ifa->ifa_addr->sa_family == AF_INET) {
1004			/*
1005			 * XXX
1006			 * Since resetting hardware takes a very long time
1007			 * and results in link renegotiation we only
1008			 * initialize the hardware only when it is absolutely
1009			 * required.
1010			 */
1011			ifp->if_flags |= IFF_UP;
1012			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1013				EM_CORE_LOCK(adapter);
1014				em_init_locked(adapter);
1015				EM_CORE_UNLOCK(adapter);
1016			}
1017			arp_ifinit(ifp, ifa);
1018		} else
1019#endif
1020			error = ether_ioctl(ifp, command, data);
1021		break;
1022	case SIOCSIFMTU:
1023	    {
1024		int max_frame_size;
1025
1026		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1027
1028		EM_CORE_LOCK(adapter);
1029		switch (adapter->hw.mac.type) {
1030		case e1000_82571:
1031		case e1000_82572:
1032		case e1000_ich9lan:
1033		case e1000_ich10lan:
1034		case e1000_pch2lan:
1035		case e1000_82574:
1036		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1037			max_frame_size = 9234;
1038			break;
1039		case e1000_pchlan:
1040			max_frame_size = 4096;
1041			break;
1042			/* Adapters that do not support jumbo frames */
1043		case e1000_82583:
1044		case e1000_ich8lan:
1045			max_frame_size = ETHER_MAX_LEN;
1046			break;
1047		default:
1048			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1049		}
1050		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1051		    ETHER_CRC_LEN) {
1052			EM_CORE_UNLOCK(adapter);
1053			error = EINVAL;
1054			break;
1055		}
1056
1057		ifp->if_mtu = ifr->ifr_mtu;
1058		adapter->max_frame_size =
1059		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1060		em_init_locked(adapter);
1061		EM_CORE_UNLOCK(adapter);
1062		break;
1063	    }
1064	case SIOCSIFFLAGS:
1065		IOCTL_DEBUGOUT("ioctl rcv'd:\
1066		    SIOCSIFFLAGS (Set Interface Flags)");
1067		EM_CORE_LOCK(adapter);
1068		if (ifp->if_flags & IFF_UP) {
1069			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1070				if ((ifp->if_flags ^ adapter->if_flags) &
1071				    (IFF_PROMISC | IFF_ALLMULTI)) {
1072					em_disable_promisc(adapter);
1073					em_set_promisc(adapter);
1074				}
1075			} else
1076				em_init_locked(adapter);
1077		} else
1078			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1079				em_stop(adapter);
1080		adapter->if_flags = ifp->if_flags;
1081		EM_CORE_UNLOCK(adapter);
1082		break;
1083	case SIOCADDMULTI:
1084	case SIOCDELMULTI:
1085		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1086		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1087			EM_CORE_LOCK(adapter);
1088			em_disable_intr(adapter);
1089			em_set_multi(adapter);
1090#ifdef DEVICE_POLLING
1091			if (!(ifp->if_capenable & IFCAP_POLLING))
1092#endif
1093				em_enable_intr(adapter);
1094			EM_CORE_UNLOCK(adapter);
1095		}
1096		break;
1097	case SIOCSIFMEDIA:
1098		/*
1099		** As the speed/duplex settings are being
1100		** changed, we need to reset the PHY.
1101		*/
1102		adapter->hw.phy.reset_disable = FALSE;
1103		/* Check SOL/IDER usage */
1104		EM_CORE_LOCK(adapter);
1105		if (e1000_check_reset_block(&adapter->hw)) {
1106			EM_CORE_UNLOCK(adapter);
1107			device_printf(adapter->dev, "Media change is"
1108			    " blocked due to SOL/IDER session.\n");
1109			break;
1110		}
1111		EM_CORE_UNLOCK(adapter);
1112		/* falls thru */
1113	case SIOCGIFMEDIA:
1114		IOCTL_DEBUGOUT("ioctl rcv'd: \
1115		    SIOCxIFMEDIA (Get/Set Interface Media)");
1116		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1117		break;
1118	case SIOCSIFCAP:
1119	    {
1120		int mask, reinit;
1121
1122		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1123		reinit = 0;
1124		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1125#ifdef DEVICE_POLLING
1126		if (mask & IFCAP_POLLING) {
1127			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1128				error = ether_poll_register(em_poll, ifp);
1129				if (error)
1130					return (error);
1131				EM_CORE_LOCK(adapter);
1132				em_disable_intr(adapter);
1133				ifp->if_capenable |= IFCAP_POLLING;
1134				EM_CORE_UNLOCK(adapter);
1135			} else {
1136				error = ether_poll_deregister(ifp);
1137				/* Enable interrupt even in error case */
1138				EM_CORE_LOCK(adapter);
1139				em_enable_intr(adapter);
1140				ifp->if_capenable &= ~IFCAP_POLLING;
1141				EM_CORE_UNLOCK(adapter);
1142			}
1143		}
1144#endif
1145		if (mask & IFCAP_HWCSUM) {
1146			ifp->if_capenable ^= IFCAP_HWCSUM;
1147			reinit = 1;
1148		}
1149		if (mask & IFCAP_TSO4) {
1150			ifp->if_capenable ^= IFCAP_TSO4;
1151			reinit = 1;
1152		}
1153		if (mask & IFCAP_VLAN_HWTAGGING) {
1154			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1155			reinit = 1;
1156		}
1157		if (mask & IFCAP_VLAN_HWFILTER) {
1158			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1159			reinit = 1;
1160		}
1161		if ((mask & IFCAP_WOL) &&
1162		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1163			if (mask & IFCAP_WOL_MCAST)
1164				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1165			if (mask & IFCAP_WOL_MAGIC)
1166				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1167		}
1168		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1169			em_init(adapter);
1170		VLAN_CAPABILITIES(ifp);
1171		break;
1172	    }
1173
1174	default:
1175		error = ether_ioctl(ifp, command, data);
1176		break;
1177	}
1178
1179	return (error);
1180}
1181
1182
1183/*********************************************************************
1184 *  Init entry point
1185 *
1186 *  This routine is used in two ways. It is used by the stack as
1187 *  init entry point in network interface structure. It is also used
1188 *  by the driver as a hw/sw initialization routine to get to a
1189 *  consistent state.
1190 *
1191 *  return 0 on success, positive on failure
1192 **********************************************************************/
1193
1194static void
1195em_init_locked(struct adapter *adapter)
1196{
1197	struct ifnet	*ifp = adapter->ifp;
1198	device_t	dev = adapter->dev;
1199	u32		pba;
1200
1201	INIT_DEBUGOUT("em_init: begin");
1202
1203	EM_CORE_LOCK_ASSERT(adapter);
1204
1205	em_disable_intr(adapter);
1206	callout_stop(&adapter->timer);
1207
1208	/*
1209	 * Packet Buffer Allocation (PBA)
1210	 * Writing PBA sets the receive portion of the buffer
1211	 * the remainder is used for the transmit buffer.
1212	 */
1213	switch (adapter->hw.mac.type) {
1214	/* Total Packet Buffer on these is 48K */
1215	case e1000_82571:
1216	case e1000_82572:
1217	case e1000_80003es2lan:
1218			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1219		break;
1220	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1221			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1222		break;
1223	case e1000_82574:
1224	case e1000_82583:
1225			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1226		break;
1227	case e1000_ich8lan:
1228		pba = E1000_PBA_8K;
1229		break;
1230	case e1000_ich9lan:
1231	case e1000_ich10lan:
1232	case e1000_pchlan:
1233		pba = E1000_PBA_10K;
1234		break;
1235	case e1000_pch2lan:
1236		pba = E1000_PBA_26K;
1237		break;
1238	default:
1239		if (adapter->max_frame_size > 8192)
1240			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1241		else
1242			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1243	}
1244
1245	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1246	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1247
1248	/* Get the latest mac address, User can use a LAA */
1249        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1250              ETHER_ADDR_LEN);
1251
1252	/* Put the address into the Receive Address Array */
1253	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1254
1255	/*
1256	 * With the 82571 adapter, RAR[0] may be overwritten
1257	 * when the other port is reset, we make a duplicate
1258	 * in RAR[14] for that eventuality, this assures
1259	 * the interface continues to function.
1260	 */
1261	if (adapter->hw.mac.type == e1000_82571) {
1262		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1263		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1264		    E1000_RAR_ENTRIES - 1);
1265	}
1266
1267	/* Initialize the hardware */
1268	em_reset(adapter);
1269	em_update_link_status(adapter);
1270
1271	/* Setup VLAN support, basic and offload if available */
1272	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1273
1274	/* Set hardware offload abilities */
1275	ifp->if_hwassist = 0;
1276	if (ifp->if_capenable & IFCAP_TXCSUM)
1277		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1278	if (ifp->if_capenable & IFCAP_TSO4)
1279		ifp->if_hwassist |= CSUM_TSO;
1280
1281	/* Configure for OS presence */
1282	em_init_manageability(adapter);
1283
1284	/* Prepare transmit descriptors and buffers */
1285	em_setup_transmit_structures(adapter);
1286	em_initialize_transmit_unit(adapter);
1287
1288	/* Setup Multicast table */
1289	em_set_multi(adapter);
1290
1291	/*
1292	** Figure out the desired mbuf
1293	** pool for doing jumbos
1294	*/
1295	if (adapter->max_frame_size <= 2048)
1296		adapter->rx_mbuf_sz = MCLBYTES;
1297	else if (adapter->max_frame_size <= 4096)
1298		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1299	else
1300		adapter->rx_mbuf_sz = MJUM9BYTES;
1301
1302	/* Prepare receive descriptors and buffers */
1303	if (em_setup_receive_structures(adapter)) {
1304		device_printf(dev, "Could not setup receive structures\n");
1305		em_stop(adapter);
1306		return;
1307	}
1308	em_initialize_receive_unit(adapter);
1309
1310	/* Use real VLAN Filter support? */
1311	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1312		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1313			/* Use real VLAN Filter support */
1314			em_setup_vlan_hw_support(adapter);
1315		else {
1316			u32 ctrl;
1317			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1318			ctrl |= E1000_CTRL_VME;
1319			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1320		}
1321	}
1322
1323	/* Don't lose promiscuous settings */
1324	em_set_promisc(adapter);
1325
1326	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1327	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1328
1329	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1330	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1331
1332	/* MSI/X configuration for 82574 */
1333	if (adapter->hw.mac.type == e1000_82574) {
1334		int tmp;
1335		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1336		tmp |= E1000_CTRL_EXT_PBA_CLR;
1337		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1338		/* Set the IVAR - interrupt vector routing. */
1339		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1340	}
1341
1342#ifdef DEVICE_POLLING
1343	/*
1344	 * Only enable interrupts if we are not polling, make sure
1345	 * they are off otherwise.
1346	 */
1347	if (ifp->if_capenable & IFCAP_POLLING)
1348		em_disable_intr(adapter);
1349	else
1350#endif /* DEVICE_POLLING */
1351		em_enable_intr(adapter);
1352
1353	/* AMT based hardware can now take control from firmware */
1354	if (adapter->has_manage && adapter->has_amt)
1355		em_get_hw_control(adapter);
1356
1357	/* Don't reset the phy next time init gets called */
1358	adapter->hw.phy.reset_disable = TRUE;
1359}
1360
1361static void
1362em_init(void *arg)
1363{
1364	struct adapter *adapter = arg;
1365
1366	EM_CORE_LOCK(adapter);
1367	em_init_locked(adapter);
1368	EM_CORE_UNLOCK(adapter);
1369}
1370
1371
1372#ifdef DEVICE_POLLING
1373/*********************************************************************
1374 *
1375 *  Legacy polling routine: note this only works with single queue
1376 *
1377 *********************************************************************/
1378static int
1379em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1380{
1381	struct adapter *adapter = ifp->if_softc;
1382	struct tx_ring	*txr = adapter->tx_rings;
1383	struct rx_ring	*rxr = adapter->rx_rings;
1384	u32		reg_icr;
1385	int		rx_done;
1386
1387	EM_CORE_LOCK(adapter);
1388	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1389		EM_CORE_UNLOCK(adapter);
1390		return (0);
1391	}
1392
1393	if (cmd == POLL_AND_CHECK_STATUS) {
1394		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1395		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1396			callout_stop(&adapter->timer);
1397			adapter->hw.mac.get_link_status = 1;
1398			em_update_link_status(adapter);
1399			callout_reset(&adapter->timer, hz,
1400			    em_local_timer, adapter);
1401		}
1402	}
1403	EM_CORE_UNLOCK(adapter);
1404
1405	em_rxeof(rxr, count, &rx_done);
1406
1407	EM_TX_LOCK(txr);
1408	em_txeof(txr);
1409#ifdef EM_MULTIQUEUE
1410	if (!drbr_empty(ifp, txr->br))
1411		em_mq_start_locked(ifp, txr, NULL);
1412#else
1413	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1414		em_start_locked(ifp, txr);
1415#endif
1416	EM_TX_UNLOCK(txr);
1417
1418	return (rx_done);
1419}
1420#endif /* DEVICE_POLLING */
1421
1422
1423/*********************************************************************
1424 *
1425 *  Fast Legacy/MSI Combined Interrupt Service routine
1426 *
1427 *********************************************************************/
1428static int
1429em_irq_fast(void *arg)
1430{
1431	struct adapter	*adapter = arg;
1432	struct ifnet	*ifp;
1433	u32		reg_icr;
1434
1435	ifp = adapter->ifp;
1436
1437	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1438
1439	/* Hot eject?  */
1440	if (reg_icr == 0xffffffff)
1441		return FILTER_STRAY;
1442
1443	/* Definitely not our interrupt.  */
1444	if (reg_icr == 0x0)
1445		return FILTER_STRAY;
1446
1447	/*
1448	 * Starting with the 82571 chip, bit 31 should be used to
1449	 * determine whether the interrupt belongs to us.
1450	 */
1451	if (adapter->hw.mac.type >= e1000_82571 &&
1452	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1453		return FILTER_STRAY;
1454
1455	em_disable_intr(adapter);
1456	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1457
1458	/* Link status change */
1459	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1460		adapter->hw.mac.get_link_status = 1;
1461		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1462	}
1463
1464	if (reg_icr & E1000_ICR_RXO)
1465		adapter->rx_overruns++;
1466	return FILTER_HANDLED;
1467}
1468
1469/* Combined RX/TX handler, used by Legacy and MSI */
1470static void
1471em_handle_que(void *context, int pending)
1472{
1473	struct adapter	*adapter = context;
1474	struct ifnet	*ifp = adapter->ifp;
1475	struct tx_ring	*txr = adapter->tx_rings;
1476	struct rx_ring	*rxr = adapter->rx_rings;
1477	bool		more;
1478
1479
1480	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1481		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1482
1483		EM_TX_LOCK(txr);
1484		em_txeof(txr);
1485#ifdef EM_MULTIQUEUE
1486		if (!drbr_empty(ifp, txr->br))
1487			em_mq_start_locked(ifp, txr, NULL);
1488#else
1489		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1490			em_start_locked(ifp, txr);
1491#endif
1492		em_txeof(txr);
1493		EM_TX_UNLOCK(txr);
1494		if (more) {
1495			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1496			return;
1497		}
1498	}
1499
1500	em_enable_intr(adapter);
1501	return;
1502}
1503
1504
1505/*********************************************************************
1506 *
1507 *  MSIX Interrupt Service Routines
1508 *
1509 **********************************************************************/
1510static void
1511em_msix_tx(void *arg)
1512{
1513	struct tx_ring *txr = arg;
1514	struct adapter *adapter = txr->adapter;
1515	bool		more;
1516
1517	++txr->tx_irq;
1518	EM_TX_LOCK(txr);
1519	more = em_txeof(txr);
1520	EM_TX_UNLOCK(txr);
1521	if (more)
1522		taskqueue_enqueue(txr->tq, &txr->tx_task);
1523	else
1524		/* Reenable this interrupt */
1525		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1526	return;
1527}
1528
1529/*********************************************************************
1530 *
1531 *  MSIX RX Interrupt Service routine
1532 *
1533 **********************************************************************/
1534
1535static void
1536em_msix_rx(void *arg)
1537{
1538	struct rx_ring	*rxr = arg;
1539	struct adapter	*adapter = rxr->adapter;
1540	bool		more;
1541
1542	++rxr->rx_irq;
1543	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1544	if (more)
1545		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1546	else
1547		/* Reenable this interrupt */
1548		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1549	return;
1550}
1551
1552/*********************************************************************
1553 *
1554 *  MSIX Link Fast Interrupt Service routine
1555 *
1556 **********************************************************************/
1557static void
1558em_msix_link(void *arg)
1559{
1560	struct adapter	*adapter = arg;
1561	u32		reg_icr;
1562
1563	++adapter->link_irq;
1564	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1565
1566	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1567		adapter->hw.mac.get_link_status = 1;
1568		em_handle_link(adapter, 0);
1569	} else
1570		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1571		    EM_MSIX_LINK | E1000_IMS_LSC);
1572	return;
1573}
1574
1575static void
1576em_handle_rx(void *context, int pending)
1577{
1578	struct rx_ring	*rxr = context;
1579	struct adapter	*adapter = rxr->adapter;
1580        bool            more;
1581
1582	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1583	if (more)
1584		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1585	else
1586		/* Reenable this interrupt */
1587		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1588}
1589
1590static void
1591em_handle_tx(void *context, int pending)
1592{
1593	struct tx_ring	*txr = context;
1594	struct adapter	*adapter = txr->adapter;
1595	struct ifnet	*ifp = adapter->ifp;
1596
1597	EM_TX_LOCK(txr);
1598	em_txeof(txr);
1599#ifdef EM_MULTIQUEUE
1600	if (!drbr_empty(ifp, txr->br))
1601		em_mq_start_locked(ifp, txr, NULL);
1602#else
1603	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1604		em_start_locked(ifp, txr);
1605#endif
1606	em_txeof(txr);
1607	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1608	EM_TX_UNLOCK(txr);
1609}
1610
1611static void
1612em_handle_link(void *context, int pending)
1613{
1614	struct adapter	*adapter = context;
1615	struct ifnet *ifp = adapter->ifp;
1616
1617	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1618		return;
1619
1620	EM_CORE_LOCK(adapter);
1621	callout_stop(&adapter->timer);
1622	em_update_link_status(adapter);
1623	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1624	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1625	    EM_MSIX_LINK | E1000_IMS_LSC);
1626	EM_CORE_UNLOCK(adapter);
1627}
1628
1629
1630/*********************************************************************
1631 *
1632 *  Media Ioctl callback
1633 *
1634 *  This routine is called whenever the user queries the status of
1635 *  the interface using ifconfig.
1636 *
1637 **********************************************************************/
1638static void
1639em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1640{
1641	struct adapter *adapter = ifp->if_softc;
1642	u_char fiber_type = IFM_1000_SX;
1643
1644	INIT_DEBUGOUT("em_media_status: begin");
1645
1646	EM_CORE_LOCK(adapter);
1647	em_update_link_status(adapter);
1648
1649	ifmr->ifm_status = IFM_AVALID;
1650	ifmr->ifm_active = IFM_ETHER;
1651
1652	if (!adapter->link_active) {
1653		EM_CORE_UNLOCK(adapter);
1654		return;
1655	}
1656
1657	ifmr->ifm_status |= IFM_ACTIVE;
1658
1659	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1660	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1661		ifmr->ifm_active |= fiber_type | IFM_FDX;
1662	} else {
1663		switch (adapter->link_speed) {
1664		case 10:
1665			ifmr->ifm_active |= IFM_10_T;
1666			break;
1667		case 100:
1668			ifmr->ifm_active |= IFM_100_TX;
1669			break;
1670		case 1000:
1671			ifmr->ifm_active |= IFM_1000_T;
1672			break;
1673		}
1674		if (adapter->link_duplex == FULL_DUPLEX)
1675			ifmr->ifm_active |= IFM_FDX;
1676		else
1677			ifmr->ifm_active |= IFM_HDX;
1678	}
1679	EM_CORE_UNLOCK(adapter);
1680}
1681
1682/*********************************************************************
1683 *
1684 *  Media Ioctl callback
1685 *
1686 *  This routine is called when the user changes speed/duplex using
1687 *  media/mediopt option with ifconfig.
1688 *
1689 **********************************************************************/
1690static int
1691em_media_change(struct ifnet *ifp)
1692{
1693	struct adapter *adapter = ifp->if_softc;
1694	struct ifmedia  *ifm = &adapter->media;
1695
1696	INIT_DEBUGOUT("em_media_change: begin");
1697
1698	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1699		return (EINVAL);
1700
1701	EM_CORE_LOCK(adapter);
1702	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1703	case IFM_AUTO:
1704		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1705		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1706		break;
1707	case IFM_1000_LX:
1708	case IFM_1000_SX:
1709	case IFM_1000_T:
1710		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1711		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1712		break;
1713	case IFM_100_TX:
1714		adapter->hw.mac.autoneg = FALSE;
1715		adapter->hw.phy.autoneg_advertised = 0;
1716		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1717			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1718		else
1719			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1720		break;
1721	case IFM_10_T:
1722		adapter->hw.mac.autoneg = FALSE;
1723		adapter->hw.phy.autoneg_advertised = 0;
1724		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1725			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1726		else
1727			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1728		break;
1729	default:
1730		device_printf(adapter->dev, "Unsupported media type\n");
1731	}
1732
1733	em_init_locked(adapter);
1734	EM_CORE_UNLOCK(adapter);
1735
1736	return (0);
1737}
1738
1739/*********************************************************************
1740 *
1741 *  This routine maps the mbufs to tx descriptors.
1742 *
1743 *  return 0 on success, positive on failure
1744 **********************************************************************/
1745
1746static int
1747em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1748{
1749	struct adapter		*adapter = txr->adapter;
1750	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1751	bus_dmamap_t		map;
1752	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1753	struct e1000_tx_desc	*ctxd = NULL;
1754	struct mbuf		*m_head;
1755	struct ether_header	*eh;
1756	struct ip		*ip = NULL;
1757	struct tcphdr		*tp = NULL;
1758	u32			txd_upper, txd_lower, txd_used, txd_saved;
1759	int			ip_off, poff;
1760	int			nsegs, i, j, first, last = 0;
1761	int			error, do_tso, tso_desc = 0;
1762
1763	m_head = *m_headp;
1764	txd_upper = txd_lower = txd_used = txd_saved = 0;
1765	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1766	ip_off = poff = 0;
1767
1768	/*
1769	** When doing checksum offload, it is critical to
1770	** make sure the first mbuf has more than header,
1771	** because that routine expects data to be present.
1772	*/
1773	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1774	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1775		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1776		*m_headp = m_head;
1777		if (m_head == NULL)
1778			return (ENOBUFS);
1779	}
1780
1781	/*
1782	 * Intel recommends entire IP/TCP header length reside in a single
1783	 * buffer. If multiple descriptors are used to describe the IP and
1784	 * TCP header, each descriptor should describe one or more
1785	 * complete headers; descriptors referencing only parts of headers
1786	 * are not supported. If all layer headers are not coalesced into
1787	 * a single buffer, each buffer should not cross a 4KB boundary,
1788	 * or be larger than the maximum read request size.
1789	 * Controller also requires modifing IP/TCP header to make TSO work
1790	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1791	 * IP/TCP header into a single buffer to meet the requirement of
1792	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1793	 * which also has similiar restrictions.
1794	 */
1795	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1796		if (do_tso || (m_head->m_next != NULL &&
1797		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1798			if (M_WRITABLE(*m_headp) == 0) {
1799				m_head = m_dup(*m_headp, M_DONTWAIT);
1800				m_freem(*m_headp);
1801				if (m_head == NULL) {
1802					*m_headp = NULL;
1803					return (ENOBUFS);
1804				}
1805				*m_headp = m_head;
1806			}
1807		}
1808		/*
1809		 * XXX
1810		 * Assume IPv4, we don't have TSO/checksum offload support
1811		 * for IPv6 yet.
1812		 */
1813		ip_off = sizeof(struct ether_header);
1814		m_head = m_pullup(m_head, ip_off);
1815		if (m_head == NULL) {
1816			*m_headp = NULL;
1817			return (ENOBUFS);
1818		}
1819		eh = mtod(m_head, struct ether_header *);
1820		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1821			ip_off = sizeof(struct ether_vlan_header);
1822			m_head = m_pullup(m_head, ip_off);
1823			if (m_head == NULL) {
1824				*m_headp = NULL;
1825				return (ENOBUFS);
1826			}
1827		}
1828		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1829		if (m_head == NULL) {
1830			*m_headp = NULL;
1831			return (ENOBUFS);
1832		}
1833		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1834		poff = ip_off + (ip->ip_hl << 2);
1835		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1836		if (m_head == NULL) {
1837			*m_headp = NULL;
1838			return (ENOBUFS);
1839		}
1840		if (do_tso) {
1841			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1842			/*
1843			 * TSO workaround:
1844			 *   pull 4 more bytes of data into it.
1845			 */
1846			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1847			if (m_head == NULL) {
1848				*m_headp = NULL;
1849				return (ENOBUFS);
1850			}
1851			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1852			ip->ip_len = 0;
1853			ip->ip_sum = 0;
1854			/*
1855			 * The pseudo TCP checksum does not include TCP payload
1856			 * length so driver should recompute the checksum here
1857			 * what hardware expect to see. This is adherence of
1858			 * Microsoft's Large Send specification.
1859			 */
1860			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1861			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1862			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1863		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1864			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1865			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1866			if (m_head == NULL) {
1867				*m_headp = NULL;
1868				return (ENOBUFS);
1869			}
1870			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1871			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1872		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1873			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879		}
1880		*m_headp = m_head;
1881	}
1882
1883	/*
1884	 * Map the packet for DMA
1885	 *
1886	 * Capture the first descriptor index,
1887	 * this descriptor will have the index
1888	 * of the EOP which is the only one that
1889	 * now gets a DONE bit writeback.
1890	 */
1891	first = txr->next_avail_desc;
1892	tx_buffer = &txr->tx_buffers[first];
1893	tx_buffer_mapped = tx_buffer;
1894	map = tx_buffer->map;
1895
1896	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1897	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1898
1899	/*
1900	 * There are two types of errors we can (try) to handle:
1901	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1902	 *   out of segments.  Defragment the mbuf chain and try again.
1903	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1904	 *   at this point in time.  Defer sending and try again later.
1905	 * All other errors, in particular EINVAL, are fatal and prevent the
1906	 * mbuf chain from ever going through.  Drop it and report error.
1907	 */
1908	if (error == EFBIG) {
1909		struct mbuf *m;
1910
1911		m = m_defrag(*m_headp, M_DONTWAIT);
1912		if (m == NULL) {
1913			adapter->mbuf_alloc_failed++;
1914			m_freem(*m_headp);
1915			*m_headp = NULL;
1916			return (ENOBUFS);
1917		}
1918		*m_headp = m;
1919
1920		/* Try it again */
1921		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1922		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1923
1924		if (error) {
1925			adapter->no_tx_dma_setup++;
1926			m_freem(*m_headp);
1927			*m_headp = NULL;
1928			return (error);
1929		}
1930	} else if (error != 0) {
1931		adapter->no_tx_dma_setup++;
1932		return (error);
1933	}
1934
1935	/*
1936	 * TSO Hardware workaround, if this packet is not
1937	 * TSO, and is only a single descriptor long, and
1938	 * it follows a TSO burst, then we need to add a
1939	 * sentinel descriptor to prevent premature writeback.
1940	 */
1941	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1942		if (nsegs == 1)
1943			tso_desc = TRUE;
1944		txr->tx_tso = FALSE;
1945	}
1946
1947        if (nsegs > (txr->tx_avail - 2)) {
1948                txr->no_desc_avail++;
1949		bus_dmamap_unload(txr->txtag, map);
1950		return (ENOBUFS);
1951        }
1952	m_head = *m_headp;
1953
1954	/* Do hardware assists */
1955	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1956		em_tso_setup(txr, m_head, ip_off, ip, tp,
1957		    &txd_upper, &txd_lower);
1958		/* we need to make a final sentinel transmit desc */
1959		tso_desc = TRUE;
1960	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1961		em_transmit_checksum_setup(txr, m_head,
1962		    ip_off, ip, &txd_upper, &txd_lower);
1963
1964	i = txr->next_avail_desc;
1965
1966	/* Set up our transmit descriptors */
1967	for (j = 0; j < nsegs; j++) {
1968		bus_size_t seg_len;
1969		bus_addr_t seg_addr;
1970
1971		tx_buffer = &txr->tx_buffers[i];
1972		ctxd = &txr->tx_base[i];
1973		seg_addr = segs[j].ds_addr;
1974		seg_len  = segs[j].ds_len;
1975		/*
1976		** TSO Workaround:
1977		** If this is the last descriptor, we want to
1978		** split it so we have a small final sentinel
1979		*/
1980		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1981			seg_len -= 4;
1982			ctxd->buffer_addr = htole64(seg_addr);
1983			ctxd->lower.data = htole32(
1984			adapter->txd_cmd | txd_lower | seg_len);
1985			ctxd->upper.data =
1986			    htole32(txd_upper);
1987			if (++i == adapter->num_tx_desc)
1988				i = 0;
1989			/* Now make the sentinel */
1990			++txd_used; /* using an extra txd */
1991			ctxd = &txr->tx_base[i];
1992			tx_buffer = &txr->tx_buffers[i];
1993			ctxd->buffer_addr =
1994			    htole64(seg_addr + seg_len);
1995			ctxd->lower.data = htole32(
1996			adapter->txd_cmd | txd_lower | 4);
1997			ctxd->upper.data =
1998			    htole32(txd_upper);
1999			last = i;
2000			if (++i == adapter->num_tx_desc)
2001				i = 0;
2002		} else {
2003			ctxd->buffer_addr = htole64(seg_addr);
2004			ctxd->lower.data = htole32(
2005			adapter->txd_cmd | txd_lower | seg_len);
2006			ctxd->upper.data =
2007			    htole32(txd_upper);
2008			last = i;
2009			if (++i == adapter->num_tx_desc)
2010				i = 0;
2011		}
2012		tx_buffer->m_head = NULL;
2013		tx_buffer->next_eop = -1;
2014	}
2015
2016	txr->next_avail_desc = i;
2017	txr->tx_avail -= nsegs;
2018	if (tso_desc) /* TSO used an extra for sentinel */
2019		txr->tx_avail -= txd_used;
2020
2021	if (m_head->m_flags & M_VLANTAG) {
2022		/* Set the vlan id. */
2023		ctxd->upper.fields.special =
2024		    htole16(m_head->m_pkthdr.ether_vtag);
2025                /* Tell hardware to add tag */
2026                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2027        }
2028
2029        tx_buffer->m_head = m_head;
2030	tx_buffer_mapped->map = tx_buffer->map;
2031	tx_buffer->map = map;
2032        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2033
2034        /*
2035         * Last Descriptor of Packet
2036	 * needs End Of Packet (EOP)
2037	 * and Report Status (RS)
2038         */
2039        ctxd->lower.data |=
2040	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2041	/*
2042	 * Keep track in the first buffer which
2043	 * descriptor will be written back
2044	 */
2045	tx_buffer = &txr->tx_buffers[first];
2046	tx_buffer->next_eop = last;
2047	/* Update the watchdog time early and often */
2048	txr->watchdog_time = ticks;
2049
2050	/*
2051	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2052	 * that this frame is available to transmit.
2053	 */
2054	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2055	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2056	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2057
2058	return (0);
2059}
2060
2061static void
2062em_set_promisc(struct adapter *adapter)
2063{
2064	struct ifnet	*ifp = adapter->ifp;
2065	u32		reg_rctl;
2066
2067	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2068
2069	if (ifp->if_flags & IFF_PROMISC) {
2070		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2071		/* Turn this on if you want to see bad packets */
2072		if (em_debug_sbp)
2073			reg_rctl |= E1000_RCTL_SBP;
2074		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2075	} else if (ifp->if_flags & IFF_ALLMULTI) {
2076		reg_rctl |= E1000_RCTL_MPE;
2077		reg_rctl &= ~E1000_RCTL_UPE;
2078		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079	}
2080}
2081
2082static void
2083em_disable_promisc(struct adapter *adapter)
2084{
2085	u32	reg_rctl;
2086
2087	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2088
2089	reg_rctl &=  (~E1000_RCTL_UPE);
2090	reg_rctl &=  (~E1000_RCTL_MPE);
2091	reg_rctl &=  (~E1000_RCTL_SBP);
2092	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2093}
2094
2095
2096/*********************************************************************
2097 *  Multicast Update
2098 *
2099 *  This routine is called whenever multicast address list is updated.
2100 *
2101 **********************************************************************/
2102
2103static void
2104em_set_multi(struct adapter *adapter)
2105{
2106	struct ifnet	*ifp = adapter->ifp;
2107	struct ifmultiaddr *ifma;
2108	u32 reg_rctl = 0;
2109	u8  *mta; /* Multicast array memory */
2110	int mcnt = 0;
2111
2112	IOCTL_DEBUGOUT("em_set_multi: begin");
2113
2114	mta = adapter->mta;
2115	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2116
2117	if (adapter->hw.mac.type == e1000_82542 &&
2118	    adapter->hw.revision_id == E1000_REVISION_2) {
2119		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2120		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2121			e1000_pci_clear_mwi(&adapter->hw);
2122		reg_rctl |= E1000_RCTL_RST;
2123		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2124		msec_delay(5);
2125	}
2126
2127#if __FreeBSD_version < 800000
2128	IF_ADDR_LOCK(ifp);
2129#else
2130	if_maddr_rlock(ifp);
2131#endif
2132	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2133		if (ifma->ifma_addr->sa_family != AF_LINK)
2134			continue;
2135
2136		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2137			break;
2138
2139		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2140		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2141		mcnt++;
2142	}
2143#if __FreeBSD_version < 800000
2144	IF_ADDR_UNLOCK(ifp);
2145#else
2146	if_maddr_runlock(ifp);
2147#endif
2148	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2149		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150		reg_rctl |= E1000_RCTL_MPE;
2151		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2152	} else
2153		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2154
2155	if (adapter->hw.mac.type == e1000_82542 &&
2156	    adapter->hw.revision_id == E1000_REVISION_2) {
2157		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2158		reg_rctl &= ~E1000_RCTL_RST;
2159		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2160		msec_delay(5);
2161		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2162			e1000_pci_set_mwi(&adapter->hw);
2163	}
2164}
2165
2166
2167/*********************************************************************
2168 *  Timer routine
2169 *
2170 *  This routine checks for link status and updates statistics.
2171 *
2172 **********************************************************************/
2173
2174static void
2175em_local_timer(void *arg)
2176{
2177	struct adapter	*adapter = arg;
2178	struct ifnet	*ifp = adapter->ifp;
2179	struct tx_ring	*txr = adapter->tx_rings;
2180
2181	EM_CORE_LOCK_ASSERT(adapter);
2182
2183	em_update_link_status(adapter);
2184	em_update_stats_counters(adapter);
2185
2186	/* Reset LAA into RAR[0] on 82571 */
2187	if ((adapter->hw.mac.type == e1000_82571) &&
2188	    e1000_get_laa_state_82571(&adapter->hw))
2189		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2190
2191	/*
2192	** Don't do TX watchdog check if we've been paused
2193	*/
2194	if (adapter->pause_frames) {
2195		adapter->pause_frames = 0;
2196		goto out;
2197	}
2198	/*
2199	** Check on the state of the TX queue(s), this
2200	** can be done without the lock because its RO
2201	** and the HUNG state will be static if set.
2202	*/
2203	for (int i = 0; i < adapter->num_queues; i++, txr++)
2204		if (txr->queue_status == EM_QUEUE_HUNG)
2205			goto hung;
2206out:
2207	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2208	return;
2209hung:
2210	/* Looks like we're hung */
2211	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2212	device_printf(adapter->dev,
2213	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2214	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2215	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2216	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2217	    "Next TX to Clean = %d\n",
2218	    txr->me, txr->tx_avail, txr->next_to_clean);
2219	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2220	adapter->watchdog_events++;
2221	EM_TX_UNLOCK(txr);
2222	em_init_locked(adapter);
2223}
2224
2225
2226static void
2227em_update_link_status(struct adapter *adapter)
2228{
2229	struct e1000_hw *hw = &adapter->hw;
2230	struct ifnet *ifp = adapter->ifp;
2231	device_t dev = adapter->dev;
2232	struct tx_ring *txr = adapter->tx_rings;
2233	u32 link_check = 0;
2234
2235	/* Get the cached link value or read phy for real */
2236	switch (hw->phy.media_type) {
2237	case e1000_media_type_copper:
2238		if (hw->mac.get_link_status) {
2239			/* Do the work to read phy */
2240			e1000_check_for_link(hw);
2241			link_check = !hw->mac.get_link_status;
2242			if (link_check) /* ESB2 fix */
2243				e1000_cfg_on_link_up(hw);
2244		} else
2245			link_check = TRUE;
2246		break;
2247	case e1000_media_type_fiber:
2248		e1000_check_for_link(hw);
2249		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2250                                 E1000_STATUS_LU);
2251		break;
2252	case e1000_media_type_internal_serdes:
2253		e1000_check_for_link(hw);
2254		link_check = adapter->hw.mac.serdes_has_link;
2255		break;
2256	default:
2257	case e1000_media_type_unknown:
2258		break;
2259	}
2260
2261	/* Now check for a transition */
2262	if (link_check && (adapter->link_active == 0)) {
2263		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2264		    &adapter->link_duplex);
2265		/* Check if we must disable SPEED_MODE bit on PCI-E */
2266		if ((adapter->link_speed != SPEED_1000) &&
2267		    ((hw->mac.type == e1000_82571) ||
2268		    (hw->mac.type == e1000_82572))) {
2269			int tarc0;
2270			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2271			tarc0 &= ~SPEED_MODE_BIT;
2272			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2273		}
2274		if (bootverbose)
2275			device_printf(dev, "Link is up %d Mbps %s\n",
2276			    adapter->link_speed,
2277			    ((adapter->link_duplex == FULL_DUPLEX) ?
2278			    "Full Duplex" : "Half Duplex"));
2279		adapter->link_active = 1;
2280		adapter->smartspeed = 0;
2281		ifp->if_baudrate = adapter->link_speed * 1000000;
2282		if_link_state_change(ifp, LINK_STATE_UP);
2283	} else if (!link_check && (adapter->link_active == 1)) {
2284		ifp->if_baudrate = adapter->link_speed = 0;
2285		adapter->link_duplex = 0;
2286		if (bootverbose)
2287			device_printf(dev, "Link is Down\n");
2288		adapter->link_active = 0;
2289		/* Link down, disable watchdog */
2290		for (int i = 0; i < adapter->num_queues; i++, txr++)
2291			txr->queue_status = EM_QUEUE_IDLE;
2292		if_link_state_change(ifp, LINK_STATE_DOWN);
2293	}
2294}
2295
2296/*********************************************************************
2297 *
2298 *  This routine disables all traffic on the adapter by issuing a
2299 *  global reset on the MAC and deallocates TX/RX buffers.
2300 *
2301 *  This routine should always be called with BOTH the CORE
2302 *  and TX locks.
2303 **********************************************************************/
2304
2305static void
2306em_stop(void *arg)
2307{
2308	struct adapter	*adapter = arg;
2309	struct ifnet	*ifp = adapter->ifp;
2310	struct tx_ring	*txr = adapter->tx_rings;
2311
2312	EM_CORE_LOCK_ASSERT(adapter);
2313
2314	INIT_DEBUGOUT("em_stop: begin");
2315
2316	em_disable_intr(adapter);
2317	callout_stop(&adapter->timer);
2318
2319	/* Tell the stack that the interface is no longer active */
2320	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2321
2322        /* Unarm watchdog timer. */
2323	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2324		EM_TX_LOCK(txr);
2325		txr->queue_status = EM_QUEUE_IDLE;
2326		EM_TX_UNLOCK(txr);
2327	}
2328
2329	e1000_reset_hw(&adapter->hw);
2330	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2331
2332	e1000_led_off(&adapter->hw);
2333	e1000_cleanup_led(&adapter->hw);
2334}
2335
2336
2337/*********************************************************************
2338 *
2339 *  Determine hardware revision.
2340 *
2341 **********************************************************************/
2342static void
2343em_identify_hardware(struct adapter *adapter)
2344{
2345	device_t dev = adapter->dev;
2346
2347	/* Make sure our PCI config space has the necessary stuff set */
2348	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2349	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2350	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2351		device_printf(dev, "Memory Access and/or Bus Master bits "
2352		    "were not set!\n");
2353		adapter->hw.bus.pci_cmd_word |=
2354		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2355		pci_write_config(dev, PCIR_COMMAND,
2356		    adapter->hw.bus.pci_cmd_word, 2);
2357	}
2358
2359	/* Save off the information about this board */
2360	adapter->hw.vendor_id = pci_get_vendor(dev);
2361	adapter->hw.device_id = pci_get_device(dev);
2362	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2363	adapter->hw.subsystem_vendor_id =
2364	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2365	adapter->hw.subsystem_device_id =
2366	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2367
2368	/* Do Shared Code Init and Setup */
2369	if (e1000_set_mac_type(&adapter->hw)) {
2370		device_printf(dev, "Setup init failure\n");
2371		return;
2372	}
2373}
2374
2375static int
2376em_allocate_pci_resources(struct adapter *adapter)
2377{
2378	device_t	dev = adapter->dev;
2379	int		rid;
2380
2381	rid = PCIR_BAR(0);
2382	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2383	    &rid, RF_ACTIVE);
2384	if (adapter->memory == NULL) {
2385		device_printf(dev, "Unable to allocate bus resource: memory\n");
2386		return (ENXIO);
2387	}
2388	adapter->osdep.mem_bus_space_tag =
2389	    rman_get_bustag(adapter->memory);
2390	adapter->osdep.mem_bus_space_handle =
2391	    rman_get_bushandle(adapter->memory);
2392	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2393
2394	/* Default to a single queue */
2395	adapter->num_queues = 1;
2396
2397	/*
2398	 * Setup MSI/X or MSI if PCI Express
2399	 */
2400	adapter->msix = em_setup_msix(adapter);
2401
2402	adapter->hw.back = &adapter->osdep;
2403
2404	return (0);
2405}
2406
2407/*********************************************************************
2408 *
2409 *  Setup the Legacy or MSI Interrupt handler
2410 *
2411 **********************************************************************/
2412int
2413em_allocate_legacy(struct adapter *adapter)
2414{
2415	device_t dev = adapter->dev;
2416	int error, rid = 0;
2417
2418	/* Manually turn off all interrupts */
2419	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2420
2421	if (adapter->msix == 1) /* using MSI */
2422		rid = 1;
2423	/* We allocate a single interrupt resource */
2424	adapter->res = bus_alloc_resource_any(dev,
2425	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2426	if (adapter->res == NULL) {
2427		device_printf(dev, "Unable to allocate bus resource: "
2428		    "interrupt\n");
2429		return (ENXIO);
2430	}
2431
2432	/*
2433	 * Allocate a fast interrupt and the associated
2434	 * deferred processing contexts.
2435	 */
2436	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2437	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2438	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2439	    taskqueue_thread_enqueue, &adapter->tq);
2440	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2441	    device_get_nameunit(adapter->dev));
2442	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2443	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2444		device_printf(dev, "Failed to register fast interrupt "
2445			    "handler: %d\n", error);
2446		taskqueue_free(adapter->tq);
2447		adapter->tq = NULL;
2448		return (error);
2449	}
2450
2451	return (0);
2452}
2453
2454/*********************************************************************
2455 *
2456 *  Setup the MSIX Interrupt handlers
2457 *   This is not really Multiqueue, rather
2458 *   its just multiple interrupt vectors.
2459 *
2460 **********************************************************************/
2461int
2462em_allocate_msix(struct adapter *adapter)
2463{
2464	device_t	dev = adapter->dev;
2465	struct		tx_ring *txr = adapter->tx_rings;
2466	struct		rx_ring *rxr = adapter->rx_rings;
2467	int		error, rid, vector = 0;
2468
2469
2470	/* Make sure all interrupts are disabled */
2471	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2472
2473	/* First set up ring resources */
2474	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2475
2476		/* RX ring */
2477		rid = vector + 1;
2478
2479		rxr->res = bus_alloc_resource_any(dev,
2480		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2481		if (rxr->res == NULL) {
2482			device_printf(dev,
2483			    "Unable to allocate bus resource: "
2484			    "RX MSIX Interrupt %d\n", i);
2485			return (ENXIO);
2486		}
2487		if ((error = bus_setup_intr(dev, rxr->res,
2488		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2489		    rxr, &rxr->tag)) != 0) {
2490			device_printf(dev, "Failed to register RX handler");
2491			return (error);
2492		}
2493#if __FreeBSD_version >= 800504
2494		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2495#endif
2496		rxr->msix = vector++; /* NOTE increment vector for TX */
2497		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2498		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2499		    taskqueue_thread_enqueue, &rxr->tq);
2500		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2501		    device_get_nameunit(adapter->dev));
2502		/*
2503		** Set the bit to enable interrupt
2504		** in E1000_IMS -- bits 20 and 21
2505		** are for RX0 and RX1, note this has
2506		** NOTHING to do with the MSIX vector
2507		*/
2508		rxr->ims = 1 << (20 + i);
2509		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2510
2511		/* TX ring */
2512		rid = vector + 1;
2513		txr->res = bus_alloc_resource_any(dev,
2514		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2515		if (txr->res == NULL) {
2516			device_printf(dev,
2517			    "Unable to allocate bus resource: "
2518			    "TX MSIX Interrupt %d\n", i);
2519			return (ENXIO);
2520		}
2521		if ((error = bus_setup_intr(dev, txr->res,
2522		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2523		    txr, &txr->tag)) != 0) {
2524			device_printf(dev, "Failed to register TX handler");
2525			return (error);
2526		}
2527#if __FreeBSD_version >= 800504
2528		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2529#endif
2530		txr->msix = vector++; /* Increment vector for next pass */
2531		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2532		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2533		    taskqueue_thread_enqueue, &txr->tq);
2534		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2535		    device_get_nameunit(adapter->dev));
2536		/*
2537		** Set the bit to enable interrupt
2538		** in E1000_IMS -- bits 22 and 23
2539		** are for TX0 and TX1, note this has
2540		** NOTHING to do with the MSIX vector
2541		*/
2542		txr->ims = 1 << (22 + i);
2543		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2544	}
2545
2546	/* Link interrupt */
2547	++rid;
2548	adapter->res = bus_alloc_resource_any(dev,
2549	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2550	if (!adapter->res) {
2551		device_printf(dev,"Unable to allocate "
2552		    "bus resource: Link interrupt [%d]\n", rid);
2553		return (ENXIO);
2554        }
2555	/* Set the link handler function */
2556	error = bus_setup_intr(dev, adapter->res,
2557	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2558	    em_msix_link, adapter, &adapter->tag);
2559	if (error) {
2560		adapter->res = NULL;
2561		device_printf(dev, "Failed to register LINK handler");
2562		return (error);
2563	}
2564#if __FreeBSD_version >= 800504
2565		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2566#endif
2567	adapter->linkvec = vector;
2568	adapter->ivars |=  (8 | vector) << 16;
2569	adapter->ivars |= 0x80000000;
2570
2571	return (0);
2572}
2573
2574
2575static void
2576em_free_pci_resources(struct adapter *adapter)
2577{
2578	device_t	dev = adapter->dev;
2579	struct tx_ring	*txr;
2580	struct rx_ring	*rxr;
2581	int		rid;
2582
2583
2584	/*
2585	** Release all the queue interrupt resources:
2586	*/
2587	for (int i = 0; i < adapter->num_queues; i++) {
2588		txr = &adapter->tx_rings[i];
2589		rxr = &adapter->rx_rings[i];
2590		/* an early abort? */
2591		if ((txr == NULL) || (rxr == NULL))
2592			break;
2593		rid = txr->msix +1;
2594		if (txr->tag != NULL) {
2595			bus_teardown_intr(dev, txr->res, txr->tag);
2596			txr->tag = NULL;
2597		}
2598		if (txr->res != NULL)
2599			bus_release_resource(dev, SYS_RES_IRQ,
2600			    rid, txr->res);
2601		rid = rxr->msix +1;
2602		if (rxr->tag != NULL) {
2603			bus_teardown_intr(dev, rxr->res, rxr->tag);
2604			rxr->tag = NULL;
2605		}
2606		if (rxr->res != NULL)
2607			bus_release_resource(dev, SYS_RES_IRQ,
2608			    rid, rxr->res);
2609	}
2610
2611        if (adapter->linkvec) /* we are doing MSIX */
2612                rid = adapter->linkvec + 1;
2613        else
2614                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2615
2616	if (adapter->tag != NULL) {
2617		bus_teardown_intr(dev, adapter->res, adapter->tag);
2618		adapter->tag = NULL;
2619	}
2620
2621	if (adapter->res != NULL)
2622		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2623
2624
2625	if (adapter->msix)
2626		pci_release_msi(dev);
2627
2628	if (adapter->msix_mem != NULL)
2629		bus_release_resource(dev, SYS_RES_MEMORY,
2630		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2631
2632	if (adapter->memory != NULL)
2633		bus_release_resource(dev, SYS_RES_MEMORY,
2634		    PCIR_BAR(0), adapter->memory);
2635
2636	if (adapter->flash != NULL)
2637		bus_release_resource(dev, SYS_RES_MEMORY,
2638		    EM_FLASH, adapter->flash);
2639}
2640
2641/*
2642 * Setup MSI or MSI/X
2643 */
2644static int
2645em_setup_msix(struct adapter *adapter)
2646{
2647	device_t dev = adapter->dev;
2648	int val = 0;
2649
2650
2651	/*
2652	** Setup MSI/X for Hartwell: tests have shown
2653	** use of two queues to be unstable, and to
2654	** provide no great gain anyway, so we simply
2655	** seperate the interrupts and use a single queue.
2656	*/
2657	if ((adapter->hw.mac.type == e1000_82574) &&
2658	    (em_enable_msix == TRUE)) {
2659		/* Map the MSIX BAR */
2660		int rid = PCIR_BAR(EM_MSIX_BAR);
2661		adapter->msix_mem = bus_alloc_resource_any(dev,
2662		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2663       		if (!adapter->msix_mem) {
2664			/* May not be enabled */
2665               		device_printf(adapter->dev,
2666			    "Unable to map MSIX table \n");
2667			goto msi;
2668       		}
2669		val = pci_msix_count(dev);
2670		if (val < 3) {
2671			bus_release_resource(dev, SYS_RES_MEMORY,
2672			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2673			adapter->msix_mem = NULL;
2674               		device_printf(adapter->dev,
2675			    "MSIX: insufficient vectors, using MSI\n");
2676			goto msi;
2677		}
2678		val = 3;
2679		adapter->num_queues = 1;
2680		if (pci_alloc_msix(dev, &val) == 0) {
2681			device_printf(adapter->dev,
2682			    "Using MSIX interrupts "
2683			    "with %d vectors\n", val);
2684		}
2685
2686		return (val);
2687	}
2688msi:
2689       	val = pci_msi_count(dev);
2690       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2691               	adapter->msix = 1;
2692               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2693		return (val);
2694	}
2695	/* Should only happen due to manual configuration */
2696	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2697	return (0);
2698}
2699
2700
2701/*********************************************************************
2702 *
2703 *  Initialize the hardware to a configuration
2704 *  as specified by the adapter structure.
2705 *
2706 **********************************************************************/
2707static void
2708em_reset(struct adapter *adapter)
2709{
2710	device_t	dev = adapter->dev;
2711	struct ifnet	*ifp = adapter->ifp;
2712	struct e1000_hw	*hw = &adapter->hw;
2713	u16		rx_buffer_size;
2714
2715	INIT_DEBUGOUT("em_reset: begin");
2716
2717	/* Set up smart power down as default off on newer adapters. */
2718	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2719	    hw->mac.type == e1000_82572)) {
2720		u16 phy_tmp = 0;
2721
2722		/* Speed up time to link by disabling smart power down. */
2723		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2724		phy_tmp &= ~IGP02E1000_PM_SPD;
2725		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2726	}
2727
2728	/*
2729	 * These parameters control the automatic generation (Tx) and
2730	 * response (Rx) to Ethernet PAUSE frames.
2731	 * - High water mark should allow for at least two frames to be
2732	 *   received after sending an XOFF.
2733	 * - Low water mark works best when it is very near the high water mark.
2734	 *   This allows the receiver to restart by sending XON when it has
2735	 *   drained a bit. Here we use an arbitary value of 1500 which will
2736	 *   restart after one full frame is pulled from the buffer. There
2737	 *   could be several smaller frames in the buffer and if so they will
2738	 *   not trigger the XON until their total number reduces the buffer
2739	 *   by 1500.
2740	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2741	 */
2742	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2743
2744	hw->fc.high_water = rx_buffer_size -
2745	    roundup2(adapter->max_frame_size, 1024);
2746	hw->fc.low_water = hw->fc.high_water - 1500;
2747
2748	if (hw->mac.type == e1000_80003es2lan)
2749		hw->fc.pause_time = 0xFFFF;
2750	else
2751		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2752
2753	hw->fc.send_xon = TRUE;
2754
2755        /* Set Flow control, use the tunable location if sane */
2756	hw->fc.requested_mode = adapter->fc_setting;
2757
2758	/* Workaround: no TX flow ctrl for PCH */
2759	if (hw->mac.type == e1000_pchlan)
2760                hw->fc.requested_mode = e1000_fc_rx_pause;
2761
2762	/* Override - settings for PCH2LAN, ya its magic :) */
2763	if (hw->mac.type == e1000_pch2lan) {
2764		hw->fc.high_water = 0x5C20;
2765		hw->fc.low_water = 0x5048;
2766		hw->fc.pause_time = 0x0650;
2767		hw->fc.refresh_time = 0x0400;
2768		/* Jumbos need adjusted PBA */
2769		if (ifp->if_mtu > ETHERMTU)
2770			E1000_WRITE_REG(hw, E1000_PBA, 12);
2771		else
2772			E1000_WRITE_REG(hw, E1000_PBA, 26);
2773	}
2774
2775	/* Issue a global reset */
2776	e1000_reset_hw(hw);
2777	E1000_WRITE_REG(hw, E1000_WUC, 0);
2778
2779	if (e1000_init_hw(hw) < 0) {
2780		device_printf(dev, "Hardware Initialization Failed\n");
2781		return;
2782	}
2783
2784	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2785	e1000_get_phy_info(hw);
2786	e1000_check_for_link(hw);
2787	return;
2788}
2789
2790/*********************************************************************
2791 *
2792 *  Setup networking device structure and register an interface.
2793 *
2794 **********************************************************************/
2795static int
2796em_setup_interface(device_t dev, struct adapter *adapter)
2797{
2798	struct ifnet   *ifp;
2799
2800	INIT_DEBUGOUT("em_setup_interface: begin");
2801
2802	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2803	if (ifp == NULL) {
2804		device_printf(dev, "can not allocate ifnet structure\n");
2805		return (-1);
2806	}
2807	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2808	ifp->if_mtu = ETHERMTU;
2809	ifp->if_init =  em_init;
2810	ifp->if_softc = adapter;
2811	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2812	ifp->if_ioctl = em_ioctl;
2813	ifp->if_start = em_start;
2814	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2815	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2816	IFQ_SET_READY(&ifp->if_snd);
2817
2818	ether_ifattach(ifp, adapter->hw.mac.addr);
2819
2820	ifp->if_capabilities = ifp->if_capenable = 0;
2821
2822#ifdef EM_MULTIQUEUE
2823	/* Multiqueue tx functions */
2824	ifp->if_transmit = em_mq_start;
2825	ifp->if_qflush = em_qflush;
2826#endif
2827
2828	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2829	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2830
2831	/* Enable TSO by default, can disable with ifconfig */
2832	ifp->if_capabilities |= IFCAP_TSO4;
2833	ifp->if_capenable |= IFCAP_TSO4;
2834
2835	/*
2836	 * Tell the upper layer(s) we
2837	 * support full VLAN capability
2838	 */
2839	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2840	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2841	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2842
2843	/*
2844	** Dont turn this on by default, if vlans are
2845	** created on another pseudo device (eg. lagg)
2846	** then vlan events are not passed thru, breaking
2847	** operation, but with HW FILTER off it works. If
2848	** using vlans directly on the em driver you can
2849	** enable this and get full hardware tag filtering.
2850	*/
2851	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2852
2853#ifdef DEVICE_POLLING
2854	ifp->if_capabilities |= IFCAP_POLLING;
2855#endif
2856
2857	/* Enable only WOL MAGIC by default */
2858	if (adapter->wol) {
2859		ifp->if_capabilities |= IFCAP_WOL;
2860		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2861	}
2862
2863	/*
2864	 * Specify the media types supported by this adapter and register
2865	 * callbacks to update media and link information
2866	 */
2867	ifmedia_init(&adapter->media, IFM_IMASK,
2868	    em_media_change, em_media_status);
2869	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2870	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2871		u_char fiber_type = IFM_1000_SX;	/* default type */
2872
2873		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2874			    0, NULL);
2875		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2876	} else {
2877		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2878		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2879			    0, NULL);
2880		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2881			    0, NULL);
2882		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2883			    0, NULL);
2884		if (adapter->hw.phy.type != e1000_phy_ife) {
2885			ifmedia_add(&adapter->media,
2886				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2887			ifmedia_add(&adapter->media,
2888				IFM_ETHER | IFM_1000_T, 0, NULL);
2889		}
2890	}
2891	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2892	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2893	return (0);
2894}
2895
2896
2897/*
2898 * Manage DMA'able memory.
2899 */
2900static void
2901em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2902{
2903	if (error)
2904		return;
2905	*(bus_addr_t *) arg = segs[0].ds_addr;
2906}
2907
2908static int
2909em_dma_malloc(struct adapter *adapter, bus_size_t size,
2910        struct em_dma_alloc *dma, int mapflags)
2911{
2912	int error;
2913
2914	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2915				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2916				BUS_SPACE_MAXADDR,	/* lowaddr */
2917				BUS_SPACE_MAXADDR,	/* highaddr */
2918				NULL, NULL,		/* filter, filterarg */
2919				size,			/* maxsize */
2920				1,			/* nsegments */
2921				size,			/* maxsegsize */
2922				0,			/* flags */
2923				NULL,			/* lockfunc */
2924				NULL,			/* lockarg */
2925				&dma->dma_tag);
2926	if (error) {
2927		device_printf(adapter->dev,
2928		    "%s: bus_dma_tag_create failed: %d\n",
2929		    __func__, error);
2930		goto fail_0;
2931	}
2932
2933	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2934	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2935	if (error) {
2936		device_printf(adapter->dev,
2937		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2938		    __func__, (uintmax_t)size, error);
2939		goto fail_2;
2940	}
2941
2942	dma->dma_paddr = 0;
2943	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2944	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2945	if (error || dma->dma_paddr == 0) {
2946		device_printf(adapter->dev,
2947		    "%s: bus_dmamap_load failed: %d\n",
2948		    __func__, error);
2949		goto fail_3;
2950	}
2951
2952	return (0);
2953
2954fail_3:
2955	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2956fail_2:
2957	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2958	bus_dma_tag_destroy(dma->dma_tag);
2959fail_0:
2960	dma->dma_map = NULL;
2961	dma->dma_tag = NULL;
2962
2963	return (error);
2964}
2965
2966static void
2967em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2968{
2969	if (dma->dma_tag == NULL)
2970		return;
2971	if (dma->dma_map != NULL) {
2972		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2973		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2974		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2975		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2976		dma->dma_map = NULL;
2977	}
2978	bus_dma_tag_destroy(dma->dma_tag);
2979	dma->dma_tag = NULL;
2980}
2981
2982
2983/*********************************************************************
2984 *
2985 *  Allocate memory for the transmit and receive rings, and then
2986 *  the descriptors associated with each, called only once at attach.
2987 *
2988 **********************************************************************/
2989static int
2990em_allocate_queues(struct adapter *adapter)
2991{
2992	device_t		dev = adapter->dev;
2993	struct tx_ring		*txr = NULL;
2994	struct rx_ring		*rxr = NULL;
2995	int rsize, tsize, error = E1000_SUCCESS;
2996	int txconf = 0, rxconf = 0;
2997
2998
2999	/* Allocate the TX ring struct memory */
3000	if (!(adapter->tx_rings =
3001	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3002	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3003		device_printf(dev, "Unable to allocate TX ring memory\n");
3004		error = ENOMEM;
3005		goto fail;
3006	}
3007
3008	/* Now allocate the RX */
3009	if (!(adapter->rx_rings =
3010	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3011	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3012		device_printf(dev, "Unable to allocate RX ring memory\n");
3013		error = ENOMEM;
3014		goto rx_fail;
3015	}
3016
3017	tsize = roundup2(adapter->num_tx_desc *
3018	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3019	/*
3020	 * Now set up the TX queues, txconf is needed to handle the
3021	 * possibility that things fail midcourse and we need to
3022	 * undo memory gracefully
3023	 */
3024	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3025		/* Set up some basics */
3026		txr = &adapter->tx_rings[i];
3027		txr->adapter = adapter;
3028		txr->me = i;
3029
3030		/* Initialize the TX lock */
3031		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3032		    device_get_nameunit(dev), txr->me);
3033		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3034
3035		if (em_dma_malloc(adapter, tsize,
3036			&txr->txdma, BUS_DMA_NOWAIT)) {
3037			device_printf(dev,
3038			    "Unable to allocate TX Descriptor memory\n");
3039			error = ENOMEM;
3040			goto err_tx_desc;
3041		}
3042		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3043		bzero((void *)txr->tx_base, tsize);
3044
3045        	if (em_allocate_transmit_buffers(txr)) {
3046			device_printf(dev,
3047			    "Critical Failure setting up transmit buffers\n");
3048			error = ENOMEM;
3049			goto err_tx_desc;
3050        	}
3051#if __FreeBSD_version >= 800000
3052		/* Allocate a buf ring */
3053		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3054		    M_WAITOK, &txr->tx_mtx);
3055#endif
3056	}
3057
3058	/*
3059	 * Next the RX queues...
3060	 */
3061	rsize = roundup2(adapter->num_rx_desc *
3062	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3063	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3064		rxr = &adapter->rx_rings[i];
3065		rxr->adapter = adapter;
3066		rxr->me = i;
3067
3068		/* Initialize the RX lock */
3069		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3070		    device_get_nameunit(dev), txr->me);
3071		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3072
3073		if (em_dma_malloc(adapter, rsize,
3074			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3075			device_printf(dev,
3076			    "Unable to allocate RxDescriptor memory\n");
3077			error = ENOMEM;
3078			goto err_rx_desc;
3079		}
3080		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3081		bzero((void *)rxr->rx_base, rsize);
3082
3083        	/* Allocate receive buffers for the ring*/
3084		if (em_allocate_receive_buffers(rxr)) {
3085			device_printf(dev,
3086			    "Critical Failure setting up receive buffers\n");
3087			error = ENOMEM;
3088			goto err_rx_desc;
3089		}
3090	}
3091
3092	return (0);
3093
3094err_rx_desc:
3095	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3096		em_dma_free(adapter, &rxr->rxdma);
3097err_tx_desc:
3098	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3099		em_dma_free(adapter, &txr->txdma);
3100	free(adapter->rx_rings, M_DEVBUF);
3101rx_fail:
3102#if __FreeBSD_version >= 800000
3103	buf_ring_free(txr->br, M_DEVBUF);
3104#endif
3105	free(adapter->tx_rings, M_DEVBUF);
3106fail:
3107	return (error);
3108}
3109
3110
3111/*********************************************************************
3112 *
3113 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3114 *  the information needed to transmit a packet on the wire. This is
3115 *  called only once at attach, setup is done every reset.
3116 *
3117 **********************************************************************/
3118static int
3119em_allocate_transmit_buffers(struct tx_ring *txr)
3120{
3121	struct adapter *adapter = txr->adapter;
3122	device_t dev = adapter->dev;
3123	struct em_buffer *txbuf;
3124	int error, i;
3125
3126	/*
3127	 * Setup DMA descriptor areas.
3128	 */
3129	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3130			       1, 0,			/* alignment, bounds */
3131			       BUS_SPACE_MAXADDR,	/* lowaddr */
3132			       BUS_SPACE_MAXADDR,	/* highaddr */
3133			       NULL, NULL,		/* filter, filterarg */
3134			       EM_TSO_SIZE,		/* maxsize */
3135			       EM_MAX_SCATTER,		/* nsegments */
3136			       PAGE_SIZE,		/* maxsegsize */
3137			       0,			/* flags */
3138			       NULL,			/* lockfunc */
3139			       NULL,			/* lockfuncarg */
3140			       &txr->txtag))) {
3141		device_printf(dev,"Unable to allocate TX DMA tag\n");
3142		goto fail;
3143	}
3144
3145	if (!(txr->tx_buffers =
3146	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3147	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3148		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3149		error = ENOMEM;
3150		goto fail;
3151	}
3152
3153        /* Create the descriptor buffer dma maps */
3154	txbuf = txr->tx_buffers;
3155	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3156		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3157		if (error != 0) {
3158			device_printf(dev, "Unable to create TX DMA map\n");
3159			goto fail;
3160		}
3161	}
3162
3163	return 0;
3164fail:
3165	/* We free all, it handles case where we are in the middle */
3166	em_free_transmit_structures(adapter);
3167	return (error);
3168}
3169
3170/*********************************************************************
3171 *
3172 *  Initialize a transmit ring.
3173 *
3174 **********************************************************************/
3175static void
3176em_setup_transmit_ring(struct tx_ring *txr)
3177{
3178	struct adapter *adapter = txr->adapter;
3179	struct em_buffer *txbuf;
3180	int i;
3181
3182	/* Clear the old descriptor contents */
3183	EM_TX_LOCK(txr);
3184	bzero((void *)txr->tx_base,
3185	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3186	/* Reset indices */
3187	txr->next_avail_desc = 0;
3188	txr->next_to_clean = 0;
3189
3190	/* Free any existing tx buffers. */
3191        txbuf = txr->tx_buffers;
3192	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3193		if (txbuf->m_head != NULL) {
3194			bus_dmamap_sync(txr->txtag, txbuf->map,
3195			    BUS_DMASYNC_POSTWRITE);
3196			bus_dmamap_unload(txr->txtag, txbuf->map);
3197			m_freem(txbuf->m_head);
3198			txbuf->m_head = NULL;
3199		}
3200		/* clear the watch index */
3201		txbuf->next_eop = -1;
3202        }
3203
3204	/* Set number of descriptors available */
3205	txr->tx_avail = adapter->num_tx_desc;
3206	txr->queue_status = EM_QUEUE_IDLE;
3207
3208	/* Clear checksum offload context. */
3209	txr->last_hw_offload = 0;
3210	txr->last_hw_ipcss = 0;
3211	txr->last_hw_ipcso = 0;
3212	txr->last_hw_tucss = 0;
3213	txr->last_hw_tucso = 0;
3214
3215	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3216	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3217	EM_TX_UNLOCK(txr);
3218}
3219
3220/*********************************************************************
3221 *
3222 *  Initialize all transmit rings.
3223 *
3224 **********************************************************************/
3225static void
3226em_setup_transmit_structures(struct adapter *adapter)
3227{
3228	struct tx_ring *txr = adapter->tx_rings;
3229
3230	for (int i = 0; i < adapter->num_queues; i++, txr++)
3231		em_setup_transmit_ring(txr);
3232
3233	return;
3234}
3235
3236/*********************************************************************
3237 *
3238 *  Enable transmit unit.
3239 *
3240 **********************************************************************/
3241static void
3242em_initialize_transmit_unit(struct adapter *adapter)
3243{
3244	struct tx_ring	*txr = adapter->tx_rings;
3245	struct e1000_hw	*hw = &adapter->hw;
3246	u32	tctl, tarc, tipg = 0;
3247
3248	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3249
3250	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3251		u64 bus_addr = txr->txdma.dma_paddr;
3252		/* Base and Len of TX Ring */
3253		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3254	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3255		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3256	    	    (u32)(bus_addr >> 32));
3257		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3258	    	    (u32)bus_addr);
3259		/* Init the HEAD/TAIL indices */
3260		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3261		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3262
3263		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3264		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3265		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3266
3267		txr->queue_status = EM_QUEUE_IDLE;
3268	}
3269
3270	/* Set the default values for the Tx Inter Packet Gap timer */
3271	switch (adapter->hw.mac.type) {
3272	case e1000_82542:
3273		tipg = DEFAULT_82542_TIPG_IPGT;
3274		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3275		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3276		break;
3277	case e1000_80003es2lan:
3278		tipg = DEFAULT_82543_TIPG_IPGR1;
3279		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3280		    E1000_TIPG_IPGR2_SHIFT;
3281		break;
3282	default:
3283		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3284		    (adapter->hw.phy.media_type ==
3285		    e1000_media_type_internal_serdes))
3286			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3287		else
3288			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3289		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3290		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3291	}
3292
3293	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3294	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3295
3296	if(adapter->hw.mac.type >= e1000_82540)
3297		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3298		    adapter->tx_abs_int_delay.value);
3299
3300	if ((adapter->hw.mac.type == e1000_82571) ||
3301	    (adapter->hw.mac.type == e1000_82572)) {
3302		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3303		tarc |= SPEED_MODE_BIT;
3304		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3305	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3306		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3307		tarc |= 1;
3308		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3309		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3310		tarc |= 1;
3311		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3312	}
3313
3314	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3315	if (adapter->tx_int_delay.value > 0)
3316		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3317
3318	/* Program the Transmit Control Register */
3319	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3320	tctl &= ~E1000_TCTL_CT;
3321	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3322		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3323
3324	if (adapter->hw.mac.type >= e1000_82571)
3325		tctl |= E1000_TCTL_MULR;
3326
3327	/* This write will effectively turn on the transmit unit. */
3328	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3329
3330}
3331
3332
3333/*********************************************************************
3334 *
3335 *  Free all transmit rings.
3336 *
3337 **********************************************************************/
3338static void
3339em_free_transmit_structures(struct adapter *adapter)
3340{
3341	struct tx_ring *txr = adapter->tx_rings;
3342
3343	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3344		EM_TX_LOCK(txr);
3345		em_free_transmit_buffers(txr);
3346		em_dma_free(adapter, &txr->txdma);
3347		EM_TX_UNLOCK(txr);
3348		EM_TX_LOCK_DESTROY(txr);
3349	}
3350
3351	free(adapter->tx_rings, M_DEVBUF);
3352}
3353
3354/*********************************************************************
3355 *
3356 *  Free transmit ring related data structures.
3357 *
3358 **********************************************************************/
3359static void
3360em_free_transmit_buffers(struct tx_ring *txr)
3361{
3362	struct adapter		*adapter = txr->adapter;
3363	struct em_buffer	*txbuf;
3364
3365	INIT_DEBUGOUT("free_transmit_ring: begin");
3366
3367	if (txr->tx_buffers == NULL)
3368		return;
3369
3370	for (int i = 0; i < adapter->num_tx_desc; i++) {
3371		txbuf = &txr->tx_buffers[i];
3372		if (txbuf->m_head != NULL) {
3373			bus_dmamap_sync(txr->txtag, txbuf->map,
3374			    BUS_DMASYNC_POSTWRITE);
3375			bus_dmamap_unload(txr->txtag,
3376			    txbuf->map);
3377			m_freem(txbuf->m_head);
3378			txbuf->m_head = NULL;
3379			if (txbuf->map != NULL) {
3380				bus_dmamap_destroy(txr->txtag,
3381				    txbuf->map);
3382				txbuf->map = NULL;
3383			}
3384		} else if (txbuf->map != NULL) {
3385			bus_dmamap_unload(txr->txtag,
3386			    txbuf->map);
3387			bus_dmamap_destroy(txr->txtag,
3388			    txbuf->map);
3389			txbuf->map = NULL;
3390		}
3391	}
3392#if __FreeBSD_version >= 800000
3393	if (txr->br != NULL)
3394		buf_ring_free(txr->br, M_DEVBUF);
3395#endif
3396	if (txr->tx_buffers != NULL) {
3397		free(txr->tx_buffers, M_DEVBUF);
3398		txr->tx_buffers = NULL;
3399	}
3400	if (txr->txtag != NULL) {
3401		bus_dma_tag_destroy(txr->txtag);
3402		txr->txtag = NULL;
3403	}
3404	return;
3405}
3406
3407
3408/*********************************************************************
3409 *  The offload context is protocol specific (TCP/UDP) and thus
3410 *  only needs to be set when the protocol changes. The occasion
3411 *  of a context change can be a performance detriment, and
3412 *  might be better just disabled. The reason arises in the way
3413 *  in which the controller supports pipelined requests from the
3414 *  Tx data DMA. Up to four requests can be pipelined, and they may
3415 *  belong to the same packet or to multiple packets. However all
3416 *  requests for one packet are issued before a request is issued
3417 *  for a subsequent packet and if a request for the next packet
3418 *  requires a context change, that request will be stalled
3419 *  until the previous request completes. This means setting up
3420 *  a new context effectively disables pipelined Tx data DMA which
3421 *  in turn greatly slow down performance to send small sized
3422 *  frames.
3423 **********************************************************************/
3424static void
3425em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3426    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3427{
3428	struct adapter			*adapter = txr->adapter;
3429	struct e1000_context_desc	*TXD = NULL;
3430	struct em_buffer		*tx_buffer;
3431	int				cur, hdr_len;
3432	u32				cmd = 0;
3433	u16				offload = 0;
3434	u8				ipcso, ipcss, tucso, tucss;
3435
3436	ipcss = ipcso = tucss = tucso = 0;
3437	hdr_len = ip_off + (ip->ip_hl << 2);
3438	cur = txr->next_avail_desc;
3439
3440	/* Setup of IP header checksum. */
3441	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3442		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3443		offload |= CSUM_IP;
3444		ipcss = ip_off;
3445		ipcso = ip_off + offsetof(struct ip, ip_sum);
3446		/*
3447		 * Start offset for header checksum calculation.
3448		 * End offset for header checksum calculation.
3449		 * Offset of place to put the checksum.
3450		 */
3451		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3452		TXD->lower_setup.ip_fields.ipcss = ipcss;
3453		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3454		TXD->lower_setup.ip_fields.ipcso = ipcso;
3455		cmd |= E1000_TXD_CMD_IP;
3456	}
3457
3458	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3459 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3460 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3461 		offload |= CSUM_TCP;
3462 		tucss = hdr_len;
3463 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3464 		/*
3465 		 * Setting up new checksum offload context for every frames
3466 		 * takes a lot of processing time for hardware. This also
3467 		 * reduces performance a lot for small sized frames so avoid
3468 		 * it if driver can use previously configured checksum
3469 		 * offload context.
3470 		 */
3471 		if (txr->last_hw_offload == offload) {
3472 			if (offload & CSUM_IP) {
3473 				if (txr->last_hw_ipcss == ipcss &&
3474 				    txr->last_hw_ipcso == ipcso &&
3475 				    txr->last_hw_tucss == tucss &&
3476 				    txr->last_hw_tucso == tucso)
3477 					return;
3478 			} else {
3479 				if (txr->last_hw_tucss == tucss &&
3480 				    txr->last_hw_tucso == tucso)
3481 					return;
3482 			}
3483  		}
3484 		txr->last_hw_offload = offload;
3485 		txr->last_hw_tucss = tucss;
3486 		txr->last_hw_tucso = tucso;
3487 		/*
3488 		 * Start offset for payload checksum calculation.
3489 		 * End offset for payload checksum calculation.
3490 		 * Offset of place to put the checksum.
3491 		 */
3492		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3493 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3494 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3495 		TXD->upper_setup.tcp_fields.tucso = tucso;
3496 		cmd |= E1000_TXD_CMD_TCP;
3497 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3498 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3499 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3500 		tucss = hdr_len;
3501 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3502 		/*
3503 		 * Setting up new checksum offload context for every frames
3504 		 * takes a lot of processing time for hardware. This also
3505 		 * reduces performance a lot for small sized frames so avoid
3506 		 * it if driver can use previously configured checksum
3507 		 * offload context.
3508 		 */
3509 		if (txr->last_hw_offload == offload) {
3510 			if (offload & CSUM_IP) {
3511 				if (txr->last_hw_ipcss == ipcss &&
3512 				    txr->last_hw_ipcso == ipcso &&
3513 				    txr->last_hw_tucss == tucss &&
3514 				    txr->last_hw_tucso == tucso)
3515 					return;
3516 			} else {
3517 				if (txr->last_hw_tucss == tucss &&
3518 				    txr->last_hw_tucso == tucso)
3519 					return;
3520 			}
3521 		}
3522 		txr->last_hw_offload = offload;
3523 		txr->last_hw_tucss = tucss;
3524 		txr->last_hw_tucso = tucso;
3525 		/*
3526 		 * Start offset for header checksum calculation.
3527 		 * End offset for header checksum calculation.
3528 		 * Offset of place to put the checksum.
3529 		 */
3530		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3531 		TXD->upper_setup.tcp_fields.tucss = tucss;
3532 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3533 		TXD->upper_setup.tcp_fields.tucso = tucso;
3534  	}
3535
3536 	if (offload & CSUM_IP) {
3537 		txr->last_hw_ipcss = ipcss;
3538 		txr->last_hw_ipcso = ipcso;
3539  	}
3540
3541	TXD->tcp_seg_setup.data = htole32(0);
3542	TXD->cmd_and_length =
3543	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3544	tx_buffer = &txr->tx_buffers[cur];
3545	tx_buffer->m_head = NULL;
3546	tx_buffer->next_eop = -1;
3547
3548	if (++cur == adapter->num_tx_desc)
3549		cur = 0;
3550
3551	txr->tx_avail--;
3552	txr->next_avail_desc = cur;
3553}
3554
3555
3556/**********************************************************************
3557 *
3558 *  Setup work for hardware segmentation offload (TSO)
3559 *
3560 **********************************************************************/
3561static void
3562em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3563    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3564{
3565	struct adapter			*adapter = txr->adapter;
3566	struct e1000_context_desc	*TXD;
3567	struct em_buffer		*tx_buffer;
3568	int cur, hdr_len;
3569
3570	/*
3571	 * In theory we can use the same TSO context if and only if
3572	 * frame is the same type(IP/TCP) and the same MSS. However
3573	 * checking whether a frame has the same IP/TCP structure is
3574	 * hard thing so just ignore that and always restablish a
3575	 * new TSO context.
3576	 */
3577	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3578	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3579		      E1000_TXD_DTYP_D |	/* Data descr type */
3580		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3581
3582	/* IP and/or TCP header checksum calculation and insertion. */
3583	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3584
3585	cur = txr->next_avail_desc;
3586	tx_buffer = &txr->tx_buffers[cur];
3587	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3588
3589	/*
3590	 * Start offset for header checksum calculation.
3591	 * End offset for header checksum calculation.
3592	 * Offset of place put the checksum.
3593	 */
3594	TXD->lower_setup.ip_fields.ipcss = ip_off;
3595	TXD->lower_setup.ip_fields.ipcse =
3596	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3597	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3598	/*
3599	 * Start offset for payload checksum calculation.
3600	 * End offset for payload checksum calculation.
3601	 * Offset of place to put the checksum.
3602	 */
3603	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3604	TXD->upper_setup.tcp_fields.tucse = 0;
3605	TXD->upper_setup.tcp_fields.tucso =
3606	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3607	/*
3608	 * Payload size per packet w/o any headers.
3609	 * Length of all headers up to payload.
3610	 */
3611	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3612	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3613
3614	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3615				E1000_TXD_CMD_DEXT |	/* Extended descr */
3616				E1000_TXD_CMD_TSE |	/* TSE context */
3617				E1000_TXD_CMD_IP |	/* Do IP csum */
3618				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3619				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3620
3621	tx_buffer->m_head = NULL;
3622	tx_buffer->next_eop = -1;
3623
3624	if (++cur == adapter->num_tx_desc)
3625		cur = 0;
3626
3627	txr->tx_avail--;
3628	txr->next_avail_desc = cur;
3629	txr->tx_tso = TRUE;
3630}
3631
3632
3633/**********************************************************************
3634 *
3635 *  Examine each tx_buffer in the used queue. If the hardware is done
3636 *  processing the packet then free associated resources. The
3637 *  tx_buffer is put back on the free queue.
3638 *
3639 **********************************************************************/
3640static bool
3641em_txeof(struct tx_ring *txr)
3642{
3643	struct adapter	*adapter = txr->adapter;
3644        int first, last, done, processed;
3645        struct em_buffer *tx_buffer;
3646        struct e1000_tx_desc   *tx_desc, *eop_desc;
3647	struct ifnet   *ifp = adapter->ifp;
3648
3649	EM_TX_LOCK_ASSERT(txr);
3650
3651	/* No work, make sure watchdog is off */
3652        if (txr->tx_avail == adapter->num_tx_desc) {
3653		txr->queue_status = EM_QUEUE_IDLE;
3654                return (FALSE);
3655	}
3656
3657	processed = 0;
3658        first = txr->next_to_clean;
3659        tx_desc = &txr->tx_base[first];
3660        tx_buffer = &txr->tx_buffers[first];
3661	last = tx_buffer->next_eop;
3662        eop_desc = &txr->tx_base[last];
3663
3664	/*
3665	 * What this does is get the index of the
3666	 * first descriptor AFTER the EOP of the
3667	 * first packet, that way we can do the
3668	 * simple comparison on the inner while loop.
3669	 */
3670	if (++last == adapter->num_tx_desc)
3671 		last = 0;
3672	done = last;
3673
3674        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3675            BUS_DMASYNC_POSTREAD);
3676
3677        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3678		/* We clean the range of the packet */
3679		while (first != done) {
3680                	tx_desc->upper.data = 0;
3681                	tx_desc->lower.data = 0;
3682                	tx_desc->buffer_addr = 0;
3683                	++txr->tx_avail;
3684			++processed;
3685
3686			if (tx_buffer->m_head) {
3687				bus_dmamap_sync(txr->txtag,
3688				    tx_buffer->map,
3689				    BUS_DMASYNC_POSTWRITE);
3690				bus_dmamap_unload(txr->txtag,
3691				    tx_buffer->map);
3692                        	m_freem(tx_buffer->m_head);
3693                        	tx_buffer->m_head = NULL;
3694                	}
3695			tx_buffer->next_eop = -1;
3696			txr->watchdog_time = ticks;
3697
3698	                if (++first == adapter->num_tx_desc)
3699				first = 0;
3700
3701	                tx_buffer = &txr->tx_buffers[first];
3702			tx_desc = &txr->tx_base[first];
3703		}
3704		++ifp->if_opackets;
3705		/* See if we can continue to the next packet */
3706		last = tx_buffer->next_eop;
3707		if (last != -1) {
3708        		eop_desc = &txr->tx_base[last];
3709			/* Get new done point */
3710			if (++last == adapter->num_tx_desc) last = 0;
3711			done = last;
3712		} else
3713			break;
3714        }
3715        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3716            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3717
3718        txr->next_to_clean = first;
3719
3720	/*
3721	** Watchdog calculation, we know there's
3722	** work outstanding or the first return
3723	** would have been taken, so none processed
3724	** for too long indicates a hang. local timer
3725	** will examine this and do a reset if needed.
3726	*/
3727	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3728		txr->queue_status = EM_QUEUE_HUNG;
3729
3730        /*
3731         * If we have enough room, clear IFF_DRV_OACTIVE
3732         * to tell the stack that it is OK to send packets.
3733         */
3734        if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3735                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3736		/* Disable watchdog if all clean */
3737                if (txr->tx_avail == adapter->num_tx_desc) {
3738			txr->queue_status = EM_QUEUE_IDLE;
3739			return (FALSE);
3740		}
3741        }
3742
3743	return (TRUE);
3744}
3745
3746
3747/*********************************************************************
3748 *
3749 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3750 *
3751 **********************************************************************/
3752static void
3753em_refresh_mbufs(struct rx_ring *rxr, int limit)
3754{
3755	struct adapter		*adapter = rxr->adapter;
3756	struct mbuf		*m;
3757	bus_dma_segment_t	segs[1];
3758	struct em_buffer	*rxbuf;
3759	int			i, error, nsegs, cleaned;
3760
3761	i = rxr->next_to_refresh;
3762	cleaned = -1;
3763	while (i != limit) {
3764		rxbuf = &rxr->rx_buffers[i];
3765		/*
3766		** Just skip entries with a buffer,
3767		** they can only be due to an error
3768		** and are to be reused.
3769		*/
3770		if (rxbuf->m_head != NULL)
3771			goto reuse;
3772		m = m_getjcl(M_DONTWAIT, MT_DATA,
3773		    M_PKTHDR, adapter->rx_mbuf_sz);
3774		/*
3775		** If we have a temporary resource shortage
3776		** that causes a failure, just abort refresh
3777		** for now, we will return to this point when
3778		** reinvoked from em_rxeof.
3779		*/
3780		if (m == NULL)
3781			goto update;
3782		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3783
3784		/* Use bus_dma machinery to setup the memory mapping  */
3785		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3786		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3787		if (error != 0) {
3788			m_free(m);
3789			goto update;
3790		}
3791
3792		/* If nsegs is wrong then the stack is corrupt. */
3793		KASSERT(nsegs == 1, ("Too many segments returned!"));
3794
3795		bus_dmamap_sync(rxr->rxtag,
3796		    rxbuf->map, BUS_DMASYNC_PREREAD);
3797		rxbuf->m_head = m;
3798		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3799reuse:
3800		cleaned = i;
3801		/* Calculate next index */
3802		if (++i == adapter->num_rx_desc)
3803			i = 0;
3804		/* This is the work marker for refresh */
3805		rxr->next_to_refresh = i;
3806	}
3807update:
3808	/*
3809	** Update the tail pointer only if,
3810	** and as far as we have refreshed.
3811	*/
3812	if (cleaned != -1) /* Update tail index */
3813		E1000_WRITE_REG(&adapter->hw,
3814		    E1000_RDT(rxr->me), cleaned);
3815
3816	return;
3817}
3818
3819
3820/*********************************************************************
3821 *
3822 *  Allocate memory for rx_buffer structures. Since we use one
3823 *  rx_buffer per received packet, the maximum number of rx_buffer's
3824 *  that we'll need is equal to the number of receive descriptors
3825 *  that we've allocated.
3826 *
3827 **********************************************************************/
3828static int
3829em_allocate_receive_buffers(struct rx_ring *rxr)
3830{
3831	struct adapter		*adapter = rxr->adapter;
3832	device_t		dev = adapter->dev;
3833	struct em_buffer	*rxbuf;
3834	int			error;
3835
3836	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3837	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3838	if (rxr->rx_buffers == NULL) {
3839		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3840		return (ENOMEM);
3841	}
3842
3843	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3844				1, 0,			/* alignment, bounds */
3845				BUS_SPACE_MAXADDR,	/* lowaddr */
3846				BUS_SPACE_MAXADDR,	/* highaddr */
3847				NULL, NULL,		/* filter, filterarg */
3848				MJUM9BYTES,		/* maxsize */
3849				1,			/* nsegments */
3850				MJUM9BYTES,		/* maxsegsize */
3851				0,			/* flags */
3852				NULL,			/* lockfunc */
3853				NULL,			/* lockarg */
3854				&rxr->rxtag);
3855	if (error) {
3856		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3857		    __func__, error);
3858		goto fail;
3859	}
3860
3861	rxbuf = rxr->rx_buffers;
3862	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3863		rxbuf = &rxr->rx_buffers[i];
3864		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3865		    &rxbuf->map);
3866		if (error) {
3867			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3868			    __func__, error);
3869			goto fail;
3870		}
3871	}
3872
3873	return (0);
3874
3875fail:
3876	em_free_receive_structures(adapter);
3877	return (error);
3878}
3879
3880
3881/*********************************************************************
3882 *
3883 *  Initialize a receive ring and its buffers.
3884 *
3885 **********************************************************************/
3886static int
3887em_setup_receive_ring(struct rx_ring *rxr)
3888{
3889	struct	adapter 	*adapter = rxr->adapter;
3890	struct em_buffer	*rxbuf;
3891	bus_dma_segment_t	seg[1];
3892	int			rsize, nsegs, error;
3893
3894
3895	/* Clear the ring contents */
3896	EM_RX_LOCK(rxr);
3897	rsize = roundup2(adapter->num_rx_desc *
3898	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3899	bzero((void *)rxr->rx_base, rsize);
3900
3901	/*
3902	** Free current RX buffer structs and their mbufs
3903	*/
3904	for (int i = 0; i < adapter->num_rx_desc; i++) {
3905		rxbuf = &rxr->rx_buffers[i];
3906		if (rxbuf->m_head != NULL) {
3907			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3908			    BUS_DMASYNC_POSTREAD);
3909			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3910			m_freem(rxbuf->m_head);
3911		}
3912	}
3913
3914	/* Now replenish the mbufs */
3915	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3916
3917		rxbuf = &rxr->rx_buffers[j];
3918		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3919		    M_PKTHDR, adapter->rx_mbuf_sz);
3920		if (rxbuf->m_head == NULL)
3921			return (ENOBUFS);
3922		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3923		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3924		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3925
3926		/* Get the memory mapping */
3927		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3928		    rxbuf->map, rxbuf->m_head, seg,
3929		    &nsegs, BUS_DMA_NOWAIT);
3930		if (error != 0) {
3931			m_freem(rxbuf->m_head);
3932			rxbuf->m_head = NULL;
3933			return (error);
3934		}
3935		bus_dmamap_sync(rxr->rxtag,
3936		    rxbuf->map, BUS_DMASYNC_PREREAD);
3937
3938		/* Update descriptor */
3939		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3940	}
3941
3942
3943	/* Setup our descriptor indices */
3944	rxr->next_to_check = 0;
3945	rxr->next_to_refresh = 0;
3946
3947	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3948	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3949
3950	EM_RX_UNLOCK(rxr);
3951	return (0);
3952}
3953
3954/*********************************************************************
3955 *
3956 *  Initialize all receive rings.
3957 *
3958 **********************************************************************/
3959static int
3960em_setup_receive_structures(struct adapter *adapter)
3961{
3962	struct rx_ring *rxr = adapter->rx_rings;
3963	int j;
3964
3965	for (j = 0; j < adapter->num_queues; j++, rxr++)
3966		if (em_setup_receive_ring(rxr))
3967			goto fail;
3968
3969	return (0);
3970fail:
3971	/*
3972	 * Free RX buffers allocated so far, we will only handle
3973	 * the rings that completed, the failing case will have
3974	 * cleaned up for itself. 'j' failed, so its the terminus.
3975	 */
3976	for (int i = 0; i < j; ++i) {
3977		rxr = &adapter->rx_rings[i];
3978		for (int n = 0; n < adapter->num_rx_desc; n++) {
3979			struct em_buffer *rxbuf;
3980			rxbuf = &rxr->rx_buffers[n];
3981			if (rxbuf->m_head != NULL) {
3982				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3983			  	  BUS_DMASYNC_POSTREAD);
3984				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3985				m_freem(rxbuf->m_head);
3986				rxbuf->m_head = NULL;
3987			}
3988		}
3989	}
3990
3991	return (ENOBUFS);
3992}
3993
3994/*********************************************************************
3995 *
3996 *  Free all receive rings.
3997 *
3998 **********************************************************************/
3999static void
4000em_free_receive_structures(struct adapter *adapter)
4001{
4002	struct rx_ring *rxr = adapter->rx_rings;
4003
4004	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4005		em_free_receive_buffers(rxr);
4006		/* Free the ring memory as well */
4007		em_dma_free(adapter, &rxr->rxdma);
4008		EM_RX_LOCK_DESTROY(rxr);
4009	}
4010
4011	free(adapter->rx_rings, M_DEVBUF);
4012}
4013
4014
4015/*********************************************************************
4016 *
4017 *  Free receive ring data structures
4018 *
4019 **********************************************************************/
4020static void
4021em_free_receive_buffers(struct rx_ring *rxr)
4022{
4023	struct adapter		*adapter = rxr->adapter;
4024	struct em_buffer	*rxbuf = NULL;
4025
4026	INIT_DEBUGOUT("free_receive_buffers: begin");
4027
4028	if (rxr->rx_buffers != NULL) {
4029		for (int i = 0; i < adapter->num_rx_desc; i++) {
4030			rxbuf = &rxr->rx_buffers[i];
4031			if (rxbuf->map != NULL) {
4032				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4033				    BUS_DMASYNC_POSTREAD);
4034				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4035				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4036			}
4037			if (rxbuf->m_head != NULL) {
4038				m_freem(rxbuf->m_head);
4039				rxbuf->m_head = NULL;
4040			}
4041		}
4042		free(rxr->rx_buffers, M_DEVBUF);
4043		rxr->rx_buffers = NULL;
4044	}
4045
4046	if (rxr->rxtag != NULL) {
4047		bus_dma_tag_destroy(rxr->rxtag);
4048		rxr->rxtag = NULL;
4049	}
4050
4051	return;
4052}
4053
4054
4055/*********************************************************************
4056 *
4057 *  Enable receive unit.
4058 *
4059 **********************************************************************/
4060#define MAX_INTS_PER_SEC	8000
4061#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4062
4063static void
4064em_initialize_receive_unit(struct adapter *adapter)
4065{
4066	struct rx_ring	*rxr = adapter->rx_rings;
4067	struct ifnet	*ifp = adapter->ifp;
4068	struct e1000_hw	*hw = &adapter->hw;
4069	u64	bus_addr;
4070	u32	rctl, rxcsum;
4071
4072	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4073
4074	/*
4075	 * Make sure receives are disabled while setting
4076	 * up the descriptor ring
4077	 */
4078	rctl = E1000_READ_REG(hw, E1000_RCTL);
4079	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4080
4081	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4082	    adapter->rx_abs_int_delay.value);
4083	/*
4084	 * Set the interrupt throttling rate. Value is calculated
4085	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4086	 */
4087	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4088
4089	/*
4090	** When using MSIX interrupts we need to throttle
4091	** using the EITR register (82574 only)
4092	*/
4093	if (hw->mac.type == e1000_82574)
4094		for (int i = 0; i < 4; i++)
4095			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4096			    DEFAULT_ITR);
4097
4098	/* Disable accelerated ackknowledge */
4099	if (adapter->hw.mac.type == e1000_82574)
4100		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4101
4102	if (ifp->if_capenable & IFCAP_RXCSUM) {
4103		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4104		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4105		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4106	}
4107
4108	/*
4109	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4110	** long latencies are observed, like Lenovo X60. This
4111	** change eliminates the problem, but since having positive
4112	** values in RDTR is a known source of problems on other
4113	** platforms another solution is being sought.
4114	*/
4115	if (hw->mac.type == e1000_82573)
4116		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4117
4118	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4119		/* Setup the Base and Length of the Rx Descriptor Ring */
4120		bus_addr = rxr->rxdma.dma_paddr;
4121		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4122		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4123		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4124		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4125		/* Setup the Head and Tail Descriptor Pointers */
4126		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4127		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4128	}
4129
4130	/* Set early receive threshold on appropriate hw */
4131	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4132	    (adapter->hw.mac.type == e1000_pch2lan) ||
4133	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4134	    (ifp->if_mtu > ETHERMTU)) {
4135		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4136		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4137		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4138	}
4139
4140	if (adapter->hw.mac.type == e1000_pch2lan) {
4141		if (ifp->if_mtu > ETHERMTU)
4142			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4143		else
4144			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4145	}
4146
4147	/* Setup the Receive Control Register */
4148	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4149	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4150	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4151	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4152
4153        /* Strip the CRC */
4154        rctl |= E1000_RCTL_SECRC;
4155
4156        /* Make sure VLAN Filters are off */
4157        rctl &= ~E1000_RCTL_VFE;
4158	rctl &= ~E1000_RCTL_SBP;
4159
4160	if (adapter->rx_mbuf_sz == MCLBYTES)
4161		rctl |= E1000_RCTL_SZ_2048;
4162	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4163		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4164	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4165		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4166
4167	if (ifp->if_mtu > ETHERMTU)
4168		rctl |= E1000_RCTL_LPE;
4169	else
4170		rctl &= ~E1000_RCTL_LPE;
4171
4172	/* Write out the settings */
4173	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4174
4175	return;
4176}
4177
4178
4179/*********************************************************************
4180 *
4181 *  This routine executes in interrupt context. It replenishes
4182 *  the mbufs in the descriptor and sends data which has been
4183 *  dma'ed into host memory to upper layer.
4184 *
4185 *  We loop at most count times if count is > 0, or until done if
4186 *  count < 0.
4187 *
4188 *  For polling we also now return the number of cleaned packets
4189 *********************************************************************/
4190static bool
4191em_rxeof(struct rx_ring *rxr, int count, int *done)
4192{
4193	struct adapter		*adapter = rxr->adapter;
4194	struct ifnet		*ifp = adapter->ifp;
4195	struct mbuf		*mp, *sendmp;
4196	u8			status = 0;
4197	u16 			len;
4198	int			i, processed, rxdone = 0;
4199	bool			eop;
4200	struct e1000_rx_desc	*cur;
4201
4202	EM_RX_LOCK(rxr);
4203
4204	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4205
4206		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4207			break;
4208
4209		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4210		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4211
4212		cur = &rxr->rx_base[i];
4213		status = cur->status;
4214		mp = sendmp = NULL;
4215
4216		if ((status & E1000_RXD_STAT_DD) == 0)
4217			break;
4218
4219		len = le16toh(cur->length);
4220		eop = (status & E1000_RXD_STAT_EOP) != 0;
4221		count--;
4222
4223		if (((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) &&
4224		    (rxr->discard == FALSE)) {
4225
4226			/* Assign correct length to the current fragment */
4227			mp = rxr->rx_buffers[i].m_head;
4228			mp->m_len = len;
4229
4230			/* Trigger for refresh */
4231			rxr->rx_buffers[i].m_head = NULL;
4232
4233			if (rxr->fmp == NULL) {
4234				mp->m_pkthdr.len = len;
4235				rxr->fmp = mp; /* Store the first mbuf */
4236				rxr->lmp = mp;
4237			} else {
4238				/* Chain mbuf's together */
4239				mp->m_flags &= ~M_PKTHDR;
4240				rxr->lmp->m_next = mp;
4241				rxr->lmp = rxr->lmp->m_next;
4242				rxr->fmp->m_pkthdr.len += len;
4243			}
4244
4245			if (eop) {
4246				rxr->fmp->m_pkthdr.rcvif = ifp;
4247				ifp->if_ipackets++;
4248				em_receive_checksum(cur, rxr->fmp);
4249#ifndef __NO_STRICT_ALIGNMENT
4250				if (adapter->max_frame_size >
4251				    (MCLBYTES - ETHER_ALIGN) &&
4252				    em_fixup_rx(rxr) != 0)
4253					goto skip;
4254#endif
4255				if (status & E1000_RXD_STAT_VP) {
4256					rxr->fmp->m_pkthdr.ether_vtag =
4257					    (le16toh(cur->special) &
4258					    E1000_RXD_SPC_VLAN_MASK);
4259					rxr->fmp->m_flags |= M_VLANTAG;
4260				}
4261#ifdef EM_MULTIQUEUE
4262				rxr->fmp->m_pkthdr.flowid = rxr->msix;
4263				rxr->fmp->m_flags |= M_FLOWID;
4264#endif
4265#ifndef __NO_STRICT_ALIGNMENT
4266skip:
4267#endif
4268				sendmp = rxr->fmp;
4269				rxr->fmp = NULL;
4270				rxr->lmp = NULL;
4271			}
4272		} else {
4273			ifp->if_ierrors++;
4274			++rxr->rx_discarded;
4275			if (!eop) /* Catch subsequent segs */
4276				rxr->discard = TRUE;
4277			else
4278				rxr->discard = FALSE;
4279			em_rx_discard(rxr, i);
4280			sendmp = NULL;
4281		}
4282
4283		/* Zero out the receive descriptors status. */
4284		cur->status = 0;
4285		++rxdone;	/* cumulative for POLL */
4286		++processed;
4287
4288		/* Advance our pointers to the next descriptor. */
4289		if (++i == adapter->num_rx_desc)
4290			i = 0;
4291
4292		/* Send to the stack */
4293		if (sendmp != NULL) {
4294			rxr->next_to_check = i;
4295			EM_RX_UNLOCK(rxr);
4296			(*ifp->if_input)(ifp, sendmp);
4297			EM_RX_LOCK(rxr);
4298			i = rxr->next_to_check;
4299		}
4300
4301		/* Only refresh mbufs every 8 descriptors */
4302		if (processed == 8) {
4303			em_refresh_mbufs(rxr, i);
4304			processed = 0;
4305		}
4306	}
4307
4308	/* Catch any remaining refresh work */
4309	if (processed != 0) {
4310		em_refresh_mbufs(rxr, i);
4311		processed = 0;
4312	}
4313
4314	rxr->next_to_check = i;
4315	if (done != NULL)
4316		*done = rxdone;
4317	EM_RX_UNLOCK(rxr);
4318
4319	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4320}
4321
4322static __inline void
4323em_rx_discard(struct rx_ring *rxr, int i)
4324{
4325	struct adapter		*adapter = rxr->adapter;
4326	struct em_buffer	*rbuf;
4327	struct mbuf		*m;
4328
4329	rbuf = &rxr->rx_buffers[i];
4330	/* Free any previous pieces */
4331	if (rxr->fmp != NULL) {
4332		rxr->fmp->m_flags |= M_PKTHDR;
4333		m_freem(rxr->fmp);
4334		rxr->fmp = NULL;
4335		rxr->lmp = NULL;
4336	}
4337
4338	/* Reset state, keep loaded DMA map and reuse */
4339	m = rbuf->m_head;
4340	m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4341	m->m_flags |= M_PKTHDR;
4342	m->m_data = m->m_ext.ext_buf;
4343	m->m_next = NULL;
4344
4345	return;
4346}
4347
4348#ifndef __NO_STRICT_ALIGNMENT
4349/*
4350 * When jumbo frames are enabled we should realign entire payload on
4351 * architecures with strict alignment. This is serious design mistake of 8254x
4352 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4353 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4354 * payload. On architecures without strict alignment restrictions 8254x still
4355 * performs unaligned memory access which would reduce the performance too.
4356 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4357 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4358 * existing mbuf chain.
4359 *
4360 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4361 * not used at all on architectures with strict alignment.
4362 */
4363static int
4364em_fixup_rx(struct rx_ring *rxr)
4365{
4366	struct adapter *adapter = rxr->adapter;
4367	struct mbuf *m, *n;
4368	int error;
4369
4370	error = 0;
4371	m = rxr->fmp;
4372	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4373		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4374		m->m_data += ETHER_HDR_LEN;
4375	} else {
4376		MGETHDR(n, M_DONTWAIT, MT_DATA);
4377		if (n != NULL) {
4378			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4379			m->m_data += ETHER_HDR_LEN;
4380			m->m_len -= ETHER_HDR_LEN;
4381			n->m_len = ETHER_HDR_LEN;
4382			M_MOVE_PKTHDR(n, m);
4383			n->m_next = m;
4384			rxr->fmp = n;
4385		} else {
4386			adapter->dropped_pkts++;
4387			m_freem(rxr->fmp);
4388			rxr->fmp = NULL;
4389			error = ENOMEM;
4390		}
4391	}
4392
4393	return (error);
4394}
4395#endif
4396
4397/*********************************************************************
4398 *
4399 *  Verify that the hardware indicated that the checksum is valid.
4400 *  Inform the stack about the status of checksum so that stack
4401 *  doesn't spend time verifying the checksum.
4402 *
4403 *********************************************************************/
4404static void
4405em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4406{
4407	/* Ignore Checksum bit is set */
4408	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4409		mp->m_pkthdr.csum_flags = 0;
4410		return;
4411	}
4412
4413	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4414		/* Did it pass? */
4415		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4416			/* IP Checksum Good */
4417			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4418			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4419
4420		} else {
4421			mp->m_pkthdr.csum_flags = 0;
4422		}
4423	}
4424
4425	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4426		/* Did it pass? */
4427		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4428			mp->m_pkthdr.csum_flags |=
4429			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4430			mp->m_pkthdr.csum_data = htons(0xffff);
4431		}
4432	}
4433}
4434
4435/*
4436 * This routine is run via an vlan
4437 * config EVENT
4438 */
4439static void
4440em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4441{
4442	struct adapter	*adapter = ifp->if_softc;
4443	u32		index, bit;
4444
4445	if (ifp->if_softc !=  arg)   /* Not our event */
4446		return;
4447
4448	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4449                return;
4450
4451	EM_CORE_LOCK(adapter);
4452	index = (vtag >> 5) & 0x7F;
4453	bit = vtag & 0x1F;
4454	adapter->shadow_vfta[index] |= (1 << bit);
4455	++adapter->num_vlans;
4456	/* Re-init to load the changes */
4457	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4458		em_init_locked(adapter);
4459	EM_CORE_UNLOCK(adapter);
4460}
4461
4462/*
4463 * This routine is run via an vlan
4464 * unconfig EVENT
4465 */
4466static void
4467em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4468{
4469	struct adapter	*adapter = ifp->if_softc;
4470	u32		index, bit;
4471
4472	if (ifp->if_softc !=  arg)
4473		return;
4474
4475	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4476                return;
4477
4478	EM_CORE_LOCK(adapter);
4479	index = (vtag >> 5) & 0x7F;
4480	bit = vtag & 0x1F;
4481	adapter->shadow_vfta[index] &= ~(1 << bit);
4482	--adapter->num_vlans;
4483	/* Re-init to load the changes */
4484	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4485		em_init_locked(adapter);
4486	EM_CORE_UNLOCK(adapter);
4487}
4488
4489static void
4490em_setup_vlan_hw_support(struct adapter *adapter)
4491{
4492	struct e1000_hw *hw = &adapter->hw;
4493	u32             reg;
4494
4495	/*
4496	** We get here thru init_locked, meaning
4497	** a soft reset, this has already cleared
4498	** the VFTA and other state, so if there
4499	** have been no vlan's registered do nothing.
4500	*/
4501	if (adapter->num_vlans == 0)
4502                return;
4503
4504	/*
4505	** A soft reset zero's out the VFTA, so
4506	** we need to repopulate it now.
4507	*/
4508	for (int i = 0; i < EM_VFTA_SIZE; i++)
4509                if (adapter->shadow_vfta[i] != 0)
4510			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4511                            i, adapter->shadow_vfta[i]);
4512
4513	reg = E1000_READ_REG(hw, E1000_CTRL);
4514	reg |= E1000_CTRL_VME;
4515	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4516
4517	/* Enable the Filter Table */
4518	reg = E1000_READ_REG(hw, E1000_RCTL);
4519	reg &= ~E1000_RCTL_CFIEN;
4520	reg |= E1000_RCTL_VFE;
4521	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4522}
4523
4524static void
4525em_enable_intr(struct adapter *adapter)
4526{
4527	struct e1000_hw *hw = &adapter->hw;
4528	u32 ims_mask = IMS_ENABLE_MASK;
4529
4530	if (hw->mac.type == e1000_82574) {
4531		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4532		ims_mask |= EM_MSIX_MASK;
4533	}
4534	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4535}
4536
4537static void
4538em_disable_intr(struct adapter *adapter)
4539{
4540	struct e1000_hw *hw = &adapter->hw;
4541
4542	if (hw->mac.type == e1000_82574)
4543		E1000_WRITE_REG(hw, EM_EIAC, 0);
4544	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4545}
4546
4547/*
4548 * Bit of a misnomer, what this really means is
4549 * to enable OS management of the system... aka
4550 * to disable special hardware management features
4551 */
4552static void
4553em_init_manageability(struct adapter *adapter)
4554{
4555	/* A shared code workaround */
4556#define E1000_82542_MANC2H E1000_MANC2H
4557	if (adapter->has_manage) {
4558		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4559		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4560
4561		/* disable hardware interception of ARP */
4562		manc &= ~(E1000_MANC_ARP_EN);
4563
4564                /* enable receiving management packets to the host */
4565		manc |= E1000_MANC_EN_MNG2HOST;
4566#define E1000_MNG2HOST_PORT_623 (1 << 5)
4567#define E1000_MNG2HOST_PORT_664 (1 << 6)
4568		manc2h |= E1000_MNG2HOST_PORT_623;
4569		manc2h |= E1000_MNG2HOST_PORT_664;
4570		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4571		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4572	}
4573}
4574
4575/*
4576 * Give control back to hardware management
4577 * controller if there is one.
4578 */
4579static void
4580em_release_manageability(struct adapter *adapter)
4581{
4582	if (adapter->has_manage) {
4583		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4584
4585		/* re-enable hardware interception of ARP */
4586		manc |= E1000_MANC_ARP_EN;
4587		manc &= ~E1000_MANC_EN_MNG2HOST;
4588
4589		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4590	}
4591}
4592
4593/*
4594 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4595 * For ASF and Pass Through versions of f/w this means
4596 * that the driver is loaded. For AMT version type f/w
4597 * this means that the network i/f is open.
4598 */
4599static void
4600em_get_hw_control(struct adapter *adapter)
4601{
4602	u32 ctrl_ext, swsm;
4603
4604	if (adapter->hw.mac.type == e1000_82573) {
4605		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4606		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4607		    swsm | E1000_SWSM_DRV_LOAD);
4608		return;
4609	}
4610	/* else */
4611	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4612	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4613	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4614	return;
4615}
4616
4617/*
4618 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4619 * For ASF and Pass Through versions of f/w this means that
4620 * the driver is no longer loaded. For AMT versions of the
4621 * f/w this means that the network i/f is closed.
4622 */
4623static void
4624em_release_hw_control(struct adapter *adapter)
4625{
4626	u32 ctrl_ext, swsm;
4627
4628	if (!adapter->has_manage)
4629		return;
4630
4631	if (adapter->hw.mac.type == e1000_82573) {
4632		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4633		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4634		    swsm & ~E1000_SWSM_DRV_LOAD);
4635		return;
4636	}
4637	/* else */
4638	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4639	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4640	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4641	return;
4642}
4643
4644static int
4645em_is_valid_ether_addr(u8 *addr)
4646{
4647	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4648
4649	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4650		return (FALSE);
4651	}
4652
4653	return (TRUE);
4654}
4655
4656/*
4657** Parse the interface capabilities with regard
4658** to both system management and wake-on-lan for
4659** later use.
4660*/
4661static void
4662em_get_wakeup(device_t dev)
4663{
4664	struct adapter	*adapter = device_get_softc(dev);
4665	u16		eeprom_data = 0, device_id, apme_mask;
4666
4667	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4668	apme_mask = EM_EEPROM_APME;
4669
4670	switch (adapter->hw.mac.type) {
4671	case e1000_82573:
4672	case e1000_82583:
4673		adapter->has_amt = TRUE;
4674		/* Falls thru */
4675	case e1000_82571:
4676	case e1000_82572:
4677	case e1000_80003es2lan:
4678		if (adapter->hw.bus.func == 1) {
4679			e1000_read_nvm(&adapter->hw,
4680			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4681			break;
4682		} else
4683			e1000_read_nvm(&adapter->hw,
4684			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4685		break;
4686	case e1000_ich8lan:
4687	case e1000_ich9lan:
4688	case e1000_ich10lan:
4689	case e1000_pchlan:
4690	case e1000_pch2lan:
4691		apme_mask = E1000_WUC_APME;
4692		adapter->has_amt = TRUE;
4693		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4694		break;
4695	default:
4696		e1000_read_nvm(&adapter->hw,
4697		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4698		break;
4699	}
4700	if (eeprom_data & apme_mask)
4701		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4702	/*
4703         * We have the eeprom settings, now apply the special cases
4704         * where the eeprom may be wrong or the board won't support
4705         * wake on lan on a particular port
4706	 */
4707	device_id = pci_get_device(dev);
4708        switch (device_id) {
4709	case E1000_DEV_ID_82571EB_FIBER:
4710		/* Wake events only supported on port A for dual fiber
4711		 * regardless of eeprom setting */
4712		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4713		    E1000_STATUS_FUNC_1)
4714			adapter->wol = 0;
4715		break;
4716	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4717	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4718	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4719                /* if quad port adapter, disable WoL on all but port A */
4720		if (global_quad_port_a != 0)
4721			adapter->wol = 0;
4722		/* Reset for multiple quad port adapters */
4723		if (++global_quad_port_a == 4)
4724			global_quad_port_a = 0;
4725                break;
4726	}
4727	return;
4728}
4729
4730
4731/*
4732 * Enable PCI Wake On Lan capability
4733 */
4734static void
4735em_enable_wakeup(device_t dev)
4736{
4737	struct adapter	*adapter = device_get_softc(dev);
4738	struct ifnet	*ifp = adapter->ifp;
4739	u32		pmc, ctrl, ctrl_ext, rctl;
4740	u16     	status;
4741
4742	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4743		return;
4744
4745	/* Advertise the wakeup capability */
4746	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4747	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4748	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4749	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4750
4751	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4752	    (adapter->hw.mac.type == e1000_pchlan) ||
4753	    (adapter->hw.mac.type == e1000_ich9lan) ||
4754	    (adapter->hw.mac.type == e1000_ich10lan)) {
4755		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4756		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4757	}
4758
4759	/* Keep the laser running on Fiber adapters */
4760	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4761	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4762		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4763		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4764		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4765	}
4766
4767	/*
4768	** Determine type of Wakeup: note that wol
4769	** is set with all bits on by default.
4770	*/
4771	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4772		adapter->wol &= ~E1000_WUFC_MAG;
4773
4774	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4775		adapter->wol &= ~E1000_WUFC_MC;
4776	else {
4777		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4778		rctl |= E1000_RCTL_MPE;
4779		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4780	}
4781
4782	if ((adapter->hw.mac.type == e1000_pchlan) ||
4783	    (adapter->hw.mac.type == e1000_pch2lan)) {
4784		if (em_enable_phy_wakeup(adapter))
4785			return;
4786	} else {
4787		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4788		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4789	}
4790
4791	if (adapter->hw.phy.type == e1000_phy_igp_3)
4792		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4793
4794        /* Request PME */
4795        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4796	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4797	if (ifp->if_capenable & IFCAP_WOL)
4798		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4799        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4800
4801	return;
4802}
4803
4804/*
4805** WOL in the newer chipset interfaces (pchlan)
4806** require thing to be copied into the phy
4807*/
4808static int
4809em_enable_phy_wakeup(struct adapter *adapter)
4810{
4811	struct e1000_hw *hw = &adapter->hw;
4812	u32 mreg, ret = 0;
4813	u16 preg;
4814
4815	/* copy MAC RARs to PHY RARs */
4816	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4817
4818	/* copy MAC MTA to PHY MTA */
4819	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4820		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4821		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4822		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4823		    (u16)((mreg >> 16) & 0xFFFF));
4824	}
4825
4826	/* configure PHY Rx Control register */
4827	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4828	mreg = E1000_READ_REG(hw, E1000_RCTL);
4829	if (mreg & E1000_RCTL_UPE)
4830		preg |= BM_RCTL_UPE;
4831	if (mreg & E1000_RCTL_MPE)
4832		preg |= BM_RCTL_MPE;
4833	preg &= ~(BM_RCTL_MO_MASK);
4834	if (mreg & E1000_RCTL_MO_3)
4835		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4836				<< BM_RCTL_MO_SHIFT);
4837	if (mreg & E1000_RCTL_BAM)
4838		preg |= BM_RCTL_BAM;
4839	if (mreg & E1000_RCTL_PMCF)
4840		preg |= BM_RCTL_PMCF;
4841	mreg = E1000_READ_REG(hw, E1000_CTRL);
4842	if (mreg & E1000_CTRL_RFCE)
4843		preg |= BM_RCTL_RFCE;
4844	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4845
4846	/* enable PHY wakeup in MAC register */
4847	E1000_WRITE_REG(hw, E1000_WUC,
4848	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4849	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4850
4851	/* configure and enable PHY wakeup in PHY registers */
4852	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4853	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4854
4855	/* activate PHY wakeup */
4856	ret = hw->phy.ops.acquire(hw);
4857	if (ret) {
4858		printf("Could not acquire PHY\n");
4859		return ret;
4860	}
4861	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4862	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4863	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4864	if (ret) {
4865		printf("Could not read PHY page 769\n");
4866		goto out;
4867	}
4868	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4869	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4870	if (ret)
4871		printf("Could not set PHY Host Wakeup bit\n");
4872out:
4873	hw->phy.ops.release(hw);
4874
4875	return ret;
4876}
4877
4878static void
4879em_led_func(void *arg, int onoff)
4880{
4881	struct adapter	*adapter = arg;
4882
4883	EM_CORE_LOCK(adapter);
4884	if (onoff) {
4885		e1000_setup_led(&adapter->hw);
4886		e1000_led_on(&adapter->hw);
4887	} else {
4888		e1000_led_off(&adapter->hw);
4889		e1000_cleanup_led(&adapter->hw);
4890	}
4891	EM_CORE_UNLOCK(adapter);
4892}
4893
4894/**********************************************************************
4895 *
4896 *  Update the board statistics counters.
4897 *
4898 **********************************************************************/
4899static void
4900em_update_stats_counters(struct adapter *adapter)
4901{
4902	struct ifnet   *ifp;
4903
4904	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4905	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4906		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4907		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4908	}
4909	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4910	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4911	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4912	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4913
4914	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4915	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4916	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4917	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4918	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4919	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4920	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4921	/*
4922	** For watchdog management we need to know if we have been
4923	** paused during the last interval, so capture that here.
4924	*/
4925	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4926	adapter->stats.xoffrxc += adapter->pause_frames;
4927	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4928	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4929	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4930	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4931	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4932	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4933	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4934	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4935	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4936	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4937	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4938	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4939
4940	/* For the 64-bit byte counters the low dword must be read first. */
4941	/* Both registers clear on the read of the high dword */
4942
4943	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4944	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4945	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4946	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4947
4948	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4949	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4950	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4951	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4952	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4953
4954	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4955	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4956
4957	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4958	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4959	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4960	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4961	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4962	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4963	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4964	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4965	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4966	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4967
4968	/* Interrupt Counts */
4969
4970	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4971	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4972	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4973	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4974	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4975	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4976	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4977	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4978	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4979
4980	if (adapter->hw.mac.type >= e1000_82543) {
4981		adapter->stats.algnerrc +=
4982		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4983		adapter->stats.rxerrc +=
4984		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4985		adapter->stats.tncrs +=
4986		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4987		adapter->stats.cexterr +=
4988		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4989		adapter->stats.tsctc +=
4990		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4991		adapter->stats.tsctfc +=
4992		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4993	}
4994	ifp = adapter->ifp;
4995
4996	ifp->if_collisions = adapter->stats.colc;
4997
4998	/* Rx Errors */
4999	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5000	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5001	    adapter->stats.ruc + adapter->stats.roc +
5002	    adapter->stats.mpc + adapter->stats.cexterr;
5003
5004	/* Tx Errors */
5005	ifp->if_oerrors = adapter->stats.ecol +
5006	    adapter->stats.latecol + adapter->watchdog_events;
5007}
5008
5009/* Export a single 32-bit register via a read-only sysctl. */
5010static int
5011em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5012{
5013	struct adapter *adapter;
5014	u_int val;
5015
5016	adapter = oidp->oid_arg1;
5017	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5018	return (sysctl_handle_int(oidp, &val, 0, req));
5019}
5020
5021/*
5022 * Add sysctl variables, one per statistic, to the system.
5023 */
5024static void
5025em_add_hw_stats(struct adapter *adapter)
5026{
5027	device_t dev = adapter->dev;
5028
5029	struct tx_ring *txr = adapter->tx_rings;
5030	struct rx_ring *rxr = adapter->rx_rings;
5031
5032	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5033	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5034	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5035	struct e1000_hw_stats *stats = &adapter->stats;
5036
5037	struct sysctl_oid *stat_node, *queue_node, *int_node;
5038	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5039
5040#define QUEUE_NAME_LEN 32
5041	char namebuf[QUEUE_NAME_LEN];
5042
5043	/* Driver Statistics */
5044	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5045			CTLFLAG_RD, &adapter->link_irq, 0,
5046			"Link MSIX IRQ Handled");
5047	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5048			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5049			 "Std mbuf failed");
5050	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5051			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5052			 "Std mbuf cluster failed");
5053	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5054			CTLFLAG_RD, &adapter->dropped_pkts,
5055			"Driver dropped packets");
5056	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5057			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5058			"Driver tx dma failure in xmit");
5059	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5060			CTLFLAG_RD, &adapter->rx_overruns,
5061			"RX overruns");
5062	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5063			CTLFLAG_RD, &adapter->watchdog_events,
5064			"Watchdog timeouts");
5065
5066	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5067			CTLFLAG_RD, adapter, E1000_CTRL,
5068			em_sysctl_reg_handler, "IU",
5069			"Device Control Register");
5070	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5071			CTLFLAG_RD, adapter, E1000_RCTL,
5072			em_sysctl_reg_handler, "IU",
5073			"Receiver Control Register");
5074	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5075			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5076			"Flow Control High Watermark");
5077	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5078			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5079			"Flow Control Low Watermark");
5080
5081	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5082		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5083		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5084					    CTLFLAG_RD, NULL, "Queue Name");
5085		queue_list = SYSCTL_CHILDREN(queue_node);
5086
5087		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5088				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5089				em_sysctl_reg_handler, "IU",
5090 				"Transmit Descriptor Head");
5091		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5092				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5093				em_sysctl_reg_handler, "IU",
5094 				"Transmit Descriptor Tail");
5095		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5096				CTLFLAG_RD, &txr->tx_irq,
5097				"Queue MSI-X Transmit Interrupts");
5098		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5099				CTLFLAG_RD, &txr->no_desc_avail,
5100				"Queue No Descriptor Available");
5101
5102		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5103				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5104				em_sysctl_reg_handler, "IU",
5105				"Receive Descriptor Head");
5106		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5107				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5108				em_sysctl_reg_handler, "IU",
5109				"Receive Descriptor Tail");
5110		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5111				CTLFLAG_RD, &rxr->rx_irq,
5112				"Queue MSI-X Receive Interrupts");
5113	}
5114
5115	/* MAC stats get their own sub node */
5116
5117	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5118				    CTLFLAG_RD, NULL, "Statistics");
5119	stat_list = SYSCTL_CHILDREN(stat_node);
5120
5121	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5122			CTLFLAG_RD, &stats->ecol,
5123			"Excessive collisions");
5124	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5125			CTLFLAG_RD, &stats->scc,
5126			"Single collisions");
5127	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5128			CTLFLAG_RD, &stats->mcc,
5129			"Multiple collisions");
5130	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5131			CTLFLAG_RD, &stats->latecol,
5132			"Late collisions");
5133	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5134			CTLFLAG_RD, &stats->colc,
5135			"Collision Count");
5136	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5137			CTLFLAG_RD, &adapter->stats.symerrs,
5138			"Symbol Errors");
5139	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5140			CTLFLAG_RD, &adapter->stats.sec,
5141			"Sequence Errors");
5142	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5143			CTLFLAG_RD, &adapter->stats.dc,
5144			"Defer Count");
5145	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5146			CTLFLAG_RD, &adapter->stats.mpc,
5147			"Missed Packets");
5148	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5149			CTLFLAG_RD, &adapter->stats.rnbc,
5150			"Receive No Buffers");
5151	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5152			CTLFLAG_RD, &adapter->stats.ruc,
5153			"Receive Undersize");
5154	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5155			CTLFLAG_RD, &adapter->stats.rfc,
5156			"Fragmented Packets Received ");
5157	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5158			CTLFLAG_RD, &adapter->stats.roc,
5159			"Oversized Packets Received");
5160	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5161			CTLFLAG_RD, &adapter->stats.rjc,
5162			"Recevied Jabber");
5163	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5164			CTLFLAG_RD, &adapter->stats.rxerrc,
5165			"Receive Errors");
5166	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5167			CTLFLAG_RD, &adapter->stats.crcerrs,
5168			"CRC errors");
5169	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5170			CTLFLAG_RD, &adapter->stats.algnerrc,
5171			"Alignment Errors");
5172	/* On 82575 these are collision counts */
5173	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5174			CTLFLAG_RD, &adapter->stats.cexterr,
5175			"Collision/Carrier extension errors");
5176	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5177			CTLFLAG_RD, &adapter->stats.xonrxc,
5178			"XON Received");
5179	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5180			CTLFLAG_RD, &adapter->stats.xontxc,
5181			"XON Transmitted");
5182	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5183			CTLFLAG_RD, &adapter->stats.xoffrxc,
5184			"XOFF Received");
5185	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5186			CTLFLAG_RD, &adapter->stats.xofftxc,
5187			"XOFF Transmitted");
5188
5189	/* Packet Reception Stats */
5190	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5191			CTLFLAG_RD, &adapter->stats.tpr,
5192			"Total Packets Received ");
5193	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5194			CTLFLAG_RD, &adapter->stats.gprc,
5195			"Good Packets Received");
5196	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5197			CTLFLAG_RD, &adapter->stats.bprc,
5198			"Broadcast Packets Received");
5199	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5200			CTLFLAG_RD, &adapter->stats.mprc,
5201			"Multicast Packets Received");
5202	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5203			CTLFLAG_RD, &adapter->stats.prc64,
5204			"64 byte frames received ");
5205	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5206			CTLFLAG_RD, &adapter->stats.prc127,
5207			"65-127 byte frames received");
5208	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5209			CTLFLAG_RD, &adapter->stats.prc255,
5210			"128-255 byte frames received");
5211	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5212			CTLFLAG_RD, &adapter->stats.prc511,
5213			"256-511 byte frames received");
5214	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5215			CTLFLAG_RD, &adapter->stats.prc1023,
5216			"512-1023 byte frames received");
5217	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5218			CTLFLAG_RD, &adapter->stats.prc1522,
5219			"1023-1522 byte frames received");
5220 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5221 			CTLFLAG_RD, &adapter->stats.gorc,
5222 			"Good Octets Received");
5223
5224	/* Packet Transmission Stats */
5225 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5226 			CTLFLAG_RD, &adapter->stats.gotc,
5227 			"Good Octets Transmitted");
5228	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5229			CTLFLAG_RD, &adapter->stats.tpt,
5230			"Total Packets Transmitted");
5231	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5232			CTLFLAG_RD, &adapter->stats.gptc,
5233			"Good Packets Transmitted");
5234	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5235			CTLFLAG_RD, &adapter->stats.bptc,
5236			"Broadcast Packets Transmitted");
5237	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5238			CTLFLAG_RD, &adapter->stats.mptc,
5239			"Multicast Packets Transmitted");
5240	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5241			CTLFLAG_RD, &adapter->stats.ptc64,
5242			"64 byte frames transmitted ");
5243	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5244			CTLFLAG_RD, &adapter->stats.ptc127,
5245			"65-127 byte frames transmitted");
5246	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5247			CTLFLAG_RD, &adapter->stats.ptc255,
5248			"128-255 byte frames transmitted");
5249	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5250			CTLFLAG_RD, &adapter->stats.ptc511,
5251			"256-511 byte frames transmitted");
5252	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5253			CTLFLAG_RD, &adapter->stats.ptc1023,
5254			"512-1023 byte frames transmitted");
5255	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5256			CTLFLAG_RD, &adapter->stats.ptc1522,
5257			"1024-1522 byte frames transmitted");
5258	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5259			CTLFLAG_RD, &adapter->stats.tsctc,
5260			"TSO Contexts Transmitted");
5261	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5262			CTLFLAG_RD, &adapter->stats.tsctfc,
5263			"TSO Contexts Failed");
5264
5265
5266	/* Interrupt Stats */
5267
5268	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5269				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5270	int_list = SYSCTL_CHILDREN(int_node);
5271
5272	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5273			CTLFLAG_RD, &adapter->stats.iac,
5274			"Interrupt Assertion Count");
5275
5276	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5277			CTLFLAG_RD, &adapter->stats.icrxptc,
5278			"Interrupt Cause Rx Pkt Timer Expire Count");
5279
5280	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5281			CTLFLAG_RD, &adapter->stats.icrxatc,
5282			"Interrupt Cause Rx Abs Timer Expire Count");
5283
5284	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5285			CTLFLAG_RD, &adapter->stats.ictxptc,
5286			"Interrupt Cause Tx Pkt Timer Expire Count");
5287
5288	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5289			CTLFLAG_RD, &adapter->stats.ictxatc,
5290			"Interrupt Cause Tx Abs Timer Expire Count");
5291
5292	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5293			CTLFLAG_RD, &adapter->stats.ictxqec,
5294			"Interrupt Cause Tx Queue Empty Count");
5295
5296	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5297			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5298			"Interrupt Cause Tx Queue Min Thresh Count");
5299
5300	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5301			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5302			"Interrupt Cause Rx Desc Min Thresh Count");
5303
5304	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5305			CTLFLAG_RD, &adapter->stats.icrxoc,
5306			"Interrupt Cause Receiver Overrun Count");
5307}
5308
5309/**********************************************************************
5310 *
5311 *  This routine provides a way to dump out the adapter eeprom,
5312 *  often a useful debug/service tool. This only dumps the first
5313 *  32 words, stuff that matters is in that extent.
5314 *
5315 **********************************************************************/
5316static int
5317em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5318{
5319	struct adapter *adapter;
5320	int error;
5321	int result;
5322
5323	result = -1;
5324	error = sysctl_handle_int(oidp, &result, 0, req);
5325
5326	if (error || !req->newptr)
5327		return (error);
5328
5329	/*
5330	 * This value will cause a hex dump of the
5331	 * first 32 16-bit words of the EEPROM to
5332	 * the screen.
5333	 */
5334	if (result == 1) {
5335		adapter = (struct adapter *)arg1;
5336		em_print_nvm_info(adapter);
5337        }
5338
5339	return (error);
5340}
5341
5342static void
5343em_print_nvm_info(struct adapter *adapter)
5344{
5345	u16	eeprom_data;
5346	int	i, j, row = 0;
5347
5348	/* Its a bit crude, but it gets the job done */
5349	printf("\nInterface EEPROM Dump:\n");
5350	printf("Offset\n0x0000  ");
5351	for (i = 0, j = 0; i < 32; i++, j++) {
5352		if (j == 8) { /* Make the offset block */
5353			j = 0; ++row;
5354			printf("\n0x00%x0  ",row);
5355		}
5356		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5357		printf("%04x ", eeprom_data);
5358	}
5359	printf("\n");
5360}
5361
5362static int
5363em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5364{
5365	struct em_int_delay_info *info;
5366	struct adapter *adapter;
5367	u32 regval;
5368	int error, usecs, ticks;
5369
5370	info = (struct em_int_delay_info *)arg1;
5371	usecs = info->value;
5372	error = sysctl_handle_int(oidp, &usecs, 0, req);
5373	if (error != 0 || req->newptr == NULL)
5374		return (error);
5375	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5376		return (EINVAL);
5377	info->value = usecs;
5378	ticks = EM_USECS_TO_TICKS(usecs);
5379
5380	adapter = info->adapter;
5381
5382	EM_CORE_LOCK(adapter);
5383	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5384	regval = (regval & ~0xffff) | (ticks & 0xffff);
5385	/* Handle a few special cases. */
5386	switch (info->offset) {
5387	case E1000_RDTR:
5388		break;
5389	case E1000_TIDV:
5390		if (ticks == 0) {
5391			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5392			/* Don't write 0 into the TIDV register. */
5393			regval++;
5394		} else
5395			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5396		break;
5397	}
5398	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5399	EM_CORE_UNLOCK(adapter);
5400	return (0);
5401}
5402
5403static void
5404em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5405	const char *description, struct em_int_delay_info *info,
5406	int offset, int value)
5407{
5408	info->adapter = adapter;
5409	info->offset = offset;
5410	info->value = value;
5411	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5412	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5413	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5414	    info, 0, em_sysctl_int_delay, "I", description);
5415}
5416
5417static void
5418em_add_rx_process_limit(struct adapter *adapter, const char *name,
5419	const char *description, int *limit, int value)
5420{
5421	*limit = value;
5422	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5423	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5424	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5425}
5426
5427static void
5428em_set_flow_cntrl(struct adapter *adapter, const char *name,
5429	const char *description, int *limit, int value)
5430{
5431	*limit = value;
5432	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5433	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5434	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5435}
5436
5437static int
5438em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5439{
5440	struct adapter *adapter;
5441	int error;
5442	int result;
5443
5444	result = -1;
5445	error = sysctl_handle_int(oidp, &result, 0, req);
5446
5447	if (error || !req->newptr)
5448		return (error);
5449
5450	if (result == 1) {
5451		adapter = (struct adapter *)arg1;
5452		em_print_debug_info(adapter);
5453        }
5454
5455	return (error);
5456}
5457
5458/*
5459** This routine is meant to be fluid, add whatever is
5460** needed for debugging a problem.  -jfv
5461*/
5462static void
5463em_print_debug_info(struct adapter *adapter)
5464{
5465	device_t dev = adapter->dev;
5466	struct tx_ring *txr = adapter->tx_rings;
5467	struct rx_ring *rxr = adapter->rx_rings;
5468
5469	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5470		printf("Interface is RUNNING ");
5471	else
5472		printf("Interface is NOT RUNNING\n");
5473	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5474		printf("and ACTIVE\n");
5475	else
5476		printf("and INACTIVE\n");
5477
5478	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5479	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5480	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5481	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5482	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5483	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5484	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5485	device_printf(dev, "TX descriptors avail = %d\n",
5486	    txr->tx_avail);
5487	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5488	    txr->no_desc_avail);
5489	device_printf(dev, "RX discarded packets = %ld\n",
5490	    rxr->rx_discarded);
5491	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5492	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5493}
5494