if_em.c revision 215808
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 215808 2010-11-24 22:24:07Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.1.8";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275static void	em_disable_aspm(struct adapter *);
276
277static int	em_irq_fast(void *);
278
279/* MSIX handlers */
280static void	em_msix_tx(void *);
281static void	em_msix_rx(void *);
282static void	em_msix_link(void *);
283static void	em_handle_tx(void *context, int pending);
284static void	em_handle_rx(void *context, int pending);
285static void	em_handle_link(void *context, int pending);
286
287static void	em_add_rx_process_limit(struct adapter *, const char *,
288		    const char *, int *, int);
289static void	em_set_flow_cntrl(struct adapter *, const char *,
290		    const char *, int *, int);
291
292static __inline void em_rx_discard(struct rx_ring *, int);
293
294#ifdef DEVICE_POLLING
295static poll_handler_t em_poll;
296#endif /* POLLING */
297
298/*********************************************************************
299 *  FreeBSD Device Interface Entry Points
300 *********************************************************************/
301
302static device_method_t em_methods[] = {
303	/* Device interface */
304	DEVMETHOD(device_probe, em_probe),
305	DEVMETHOD(device_attach, em_attach),
306	DEVMETHOD(device_detach, em_detach),
307	DEVMETHOD(device_shutdown, em_shutdown),
308	DEVMETHOD(device_suspend, em_suspend),
309	DEVMETHOD(device_resume, em_resume),
310	{0, 0}
311};
312
313static driver_t em_driver = {
314	"em", em_methods, sizeof(struct adapter),
315};
316
317devclass_t em_devclass;
318DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319MODULE_DEPEND(em, pci, 1, 1, 1);
320MODULE_DEPEND(em, ether, 1, 1, 1);
321
322/*********************************************************************
323 *  Tunable default values.
324 *********************************************************************/
325
326#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
327#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
328#define M_TSO_LEN			66
329
330/* Allow common code without TSO */
331#ifndef CSUM_TSO
332#define CSUM_TSO	0
333#endif
334
335static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344
345static int em_rxd = EM_DEFAULT_RXD;
346static int em_txd = EM_DEFAULT_TXD;
347TUNABLE_INT("hw.em.rxd", &em_rxd);
348TUNABLE_INT("hw.em.txd", &em_txd);
349
350static int em_smart_pwr_down = FALSE;
351TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352
353/* Controls whether promiscuous also shows bad packets */
354static int em_debug_sbp = FALSE;
355TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356
357static int em_enable_msix = TRUE;
358TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359
360/* How many packets rxeof tries to clean at a time */
361static int em_rx_process_limit = 100;
362TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364/* Flow control setting - default to FULL */
365static int em_fc_setting = e1000_fc_full;
366TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368/* Global used in WOL setup with multiport cards */
369static int global_quad_port_a = 0;
370
371/*********************************************************************
372 *  Device identification routine
373 *
374 *  em_probe determines if the driver should be loaded on
375 *  adapter based on PCI vendor/device id of the adapter.
376 *
377 *  return BUS_PROBE_DEFAULT on success, positive on failure
378 *********************************************************************/
379
380static int
381em_probe(device_t dev)
382{
383	char		adapter_name[60];
384	u16		pci_vendor_id = 0;
385	u16		pci_device_id = 0;
386	u16		pci_subvendor_id = 0;
387	u16		pci_subdevice_id = 0;
388	em_vendor_info_t *ent;
389
390	INIT_DEBUGOUT("em_probe: begin");
391
392	pci_vendor_id = pci_get_vendor(dev);
393	if (pci_vendor_id != EM_VENDOR_ID)
394		return (ENXIO);
395
396	pci_device_id = pci_get_device(dev);
397	pci_subvendor_id = pci_get_subvendor(dev);
398	pci_subdevice_id = pci_get_subdevice(dev);
399
400	ent = em_vendor_info_array;
401	while (ent->vendor_id != 0) {
402		if ((pci_vendor_id == ent->vendor_id) &&
403		    (pci_device_id == ent->device_id) &&
404
405		    ((pci_subvendor_id == ent->subvendor_id) ||
406		    (ent->subvendor_id == PCI_ANY_ID)) &&
407
408		    ((pci_subdevice_id == ent->subdevice_id) ||
409		    (ent->subdevice_id == PCI_ANY_ID))) {
410			sprintf(adapter_name, "%s %s",
411				em_strings[ent->index],
412				em_driver_version);
413			device_set_desc_copy(dev, adapter_name);
414			return (BUS_PROBE_DEFAULT);
415		}
416		ent++;
417	}
418
419	return (ENXIO);
420}
421
422/*********************************************************************
423 *  Device initialization routine
424 *
425 *  The attach entry point is called when the driver is being loaded.
426 *  This routine identifies the type of hardware, allocates all resources
427 *  and initializes the hardware.
428 *
429 *  return 0 on success, positive on failure
430 *********************************************************************/
431
432static int
433em_attach(device_t dev)
434{
435	struct adapter	*adapter;
436	int		error = 0;
437
438	INIT_DEBUGOUT("em_attach: begin");
439
440	adapter = device_get_softc(dev);
441	adapter->dev = adapter->osdep.dev = dev;
442	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443
444	/* SYSCTL stuff */
445	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448	    em_sysctl_nvm_info, "I", "NVM Information");
449
450	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453	    em_sysctl_debug_info, "I", "Debug Information");
454
455	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457	/* Determine hardware and mac info */
458	em_identify_hardware(adapter);
459
460	/* Setup PCI resources */
461	if (em_allocate_pci_resources(adapter)) {
462		device_printf(dev, "Allocation of PCI resources failed\n");
463		error = ENXIO;
464		goto err_pci;
465	}
466
467	/*
468	** For ICH8 and family we need to
469	** map the flash memory, and this
470	** must happen after the MAC is
471	** identified
472	*/
473	if ((adapter->hw.mac.type == e1000_ich8lan) ||
474	    (adapter->hw.mac.type == e1000_ich9lan) ||
475	    (adapter->hw.mac.type == e1000_ich10lan) ||
476	    (adapter->hw.mac.type == e1000_pchlan) ||
477	    (adapter->hw.mac.type == e1000_pch2lan)) {
478		int rid = EM_BAR_TYPE_FLASH;
479		adapter->flash = bus_alloc_resource_any(dev,
480		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481		if (adapter->flash == NULL) {
482			device_printf(dev, "Mapping of Flash failed\n");
483			error = ENXIO;
484			goto err_pci;
485		}
486		/* This is used in the shared code */
487		adapter->hw.flash_address = (u8 *)adapter->flash;
488		adapter->osdep.flash_bus_space_tag =
489		    rman_get_bustag(adapter->flash);
490		adapter->osdep.flash_bus_space_handle =
491		    rman_get_bushandle(adapter->flash);
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Set up some sysctls for the tunable interrupt delays */
504	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511	    "receive interrupt delay limit in usecs",
512	    &adapter->rx_abs_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514	    em_rx_abs_int_delay_dflt);
515	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516	    "transmit interrupt delay limit in usecs",
517	    &adapter->tx_abs_int_delay,
518	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519	    em_tx_abs_int_delay_dflt);
520
521	/* Sysctl for limiting the amount of work done in the taskqueue */
522	em_add_rx_process_limit(adapter, "rx_processing_limit",
523	    "max number of rx packets to process", &adapter->rx_process_limit,
524	    em_rx_process_limit);
525
526	/* Sysctl for setting the interface flow control */
527	em_set_flow_cntrl(adapter, "flow_control",
528	    "max number of rx packets to process",
529	    &adapter->fc_setting, em_fc_setting);
530
531	/*
532	 * Validate number of transmit and receive descriptors. It
533	 * must not exceed hardware maximum, and must be multiple
534	 * of E1000_DBA_ALIGN.
535	 */
536	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539		    EM_DEFAULT_TXD, em_txd);
540		adapter->num_tx_desc = EM_DEFAULT_TXD;
541	} else
542		adapter->num_tx_desc = em_txd;
543
544	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547		    EM_DEFAULT_RXD, em_rxd);
548		adapter->num_rx_desc = EM_DEFAULT_RXD;
549	} else
550		adapter->num_rx_desc = em_rxd;
551
552	adapter->hw.mac.autoneg = DO_AUTO_NEG;
553	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555
556	/* Copper options */
557	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558		adapter->hw.phy.mdix = AUTO_ALL_MODES;
559		adapter->hw.phy.disable_polarity_correction = FALSE;
560		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561	}
562
563	/*
564	 * Set the frame limits assuming
565	 * standard ethernet sized frames.
566	 */
567	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569
570	/*
571	 * This controls when hardware reports transmit completion
572	 * status.
573	 */
574	adapter->hw.mac.report_tx_early = 1;
575
576	/*
577	** Get queue/ring memory
578	*/
579	if (em_allocate_queues(adapter)) {
580		error = ENOMEM;
581		goto err_pci;
582	}
583
584	/* Allocate multicast array memory. */
585	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587	if (adapter->mta == NULL) {
588		device_printf(dev, "Can not allocate multicast setup array\n");
589		error = ENOMEM;
590		goto err_late;
591	}
592
593	/* Check SOL/IDER usage */
594	if (e1000_check_reset_block(&adapter->hw))
595		device_printf(dev, "PHY reset is blocked"
596		    " due to SOL/IDER session.\n");
597
598	/*
599	** Start from a known state, this is
600	** important in reading the nvm and
601	** mac from that.
602	*/
603	e1000_reset_hw(&adapter->hw);
604
605	/* Make sure we have a good EEPROM before we read from it */
606	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607		/*
608		** Some PCI-E parts fail the first check due to
609		** the link being in sleep state, call it again,
610		** if it fails a second time its a real issue.
611		*/
612		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613			device_printf(dev,
614			    "The EEPROM Checksum Is Not Valid\n");
615			error = EIO;
616			goto err_late;
617		}
618	}
619
620	/* Copy the permanent MAC address out of the EEPROM */
621	if (e1000_read_mac_addr(&adapter->hw) < 0) {
622		device_printf(dev, "EEPROM read error while reading MAC"
623		    " address\n");
624		error = EIO;
625		goto err_late;
626	}
627
628	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629		device_printf(dev, "Invalid MAC address\n");
630		error = EIO;
631		goto err_late;
632	}
633
634	/*
635	**  Do interrupt configuration
636	*/
637	if (adapter->msix > 1) /* Do MSIX */
638		error = em_allocate_msix(adapter);
639	else  /* MSI or Legacy */
640		error = em_allocate_legacy(adapter);
641	if (error)
642		goto err_late;
643
644	/*
645	 * Get Wake-on-Lan and Management info for later use
646	 */
647	em_get_wakeup(dev);
648
649	/* Setup OS specific network interface */
650	if (em_setup_interface(dev, adapter) != 0)
651		goto err_late;
652
653	em_reset(adapter);
654
655	/* Initialize statistics */
656	em_update_stats_counters(adapter);
657
658	adapter->hw.mac.get_link_status = 1;
659	em_update_link_status(adapter);
660
661	/* Register for VLAN events */
662	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
666
667	em_add_hw_stats(adapter);
668
669	/* Non-AMT based hardware can now take control from firmware */
670	if (adapter->has_manage && !adapter->has_amt)
671		em_get_hw_control(adapter);
672
673	/* Tell the stack that the interface is not active */
674	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675
676	adapter->led_dev = led_create(em_led_func, adapter,
677	    device_get_nameunit(dev));
678
679	INIT_DEBUGOUT("em_attach: end");
680
681	return (0);
682
683err_late:
684	em_free_transmit_structures(adapter);
685	em_free_receive_structures(adapter);
686	em_release_hw_control(adapter);
687	if (adapter->ifp != NULL)
688		if_free(adapter->ifp);
689err_pci:
690	em_free_pci_resources(adapter);
691	free(adapter->mta, M_DEVBUF);
692	EM_CORE_LOCK_DESTROY(adapter);
693
694	return (error);
695}
696
697/*********************************************************************
698 *  Device removal routine
699 *
700 *  The detach entry point is called when the driver is being removed.
701 *  This routine stops the adapter and deallocates all the resources
702 *  that were allocated for driver operation.
703 *
704 *  return 0 on success, positive on failure
705 *********************************************************************/
706
707static int
708em_detach(device_t dev)
709{
710	struct adapter	*adapter = device_get_softc(dev);
711	struct ifnet	*ifp = adapter->ifp;
712
713	INIT_DEBUGOUT("em_detach: begin");
714
715	/* Make sure VLANS are not using driver */
716	if (adapter->ifp->if_vlantrunk != NULL) {
717		device_printf(dev,"Vlan in use, detach first\n");
718		return (EBUSY);
719	}
720
721#ifdef DEVICE_POLLING
722	if (ifp->if_capenable & IFCAP_POLLING)
723		ether_poll_deregister(ifp);
724#endif
725
726	if (adapter->led_dev != NULL)
727		led_destroy(adapter->led_dev);
728
729	EM_CORE_LOCK(adapter);
730	adapter->in_detach = 1;
731	em_stop(adapter);
732	EM_CORE_UNLOCK(adapter);
733	EM_CORE_LOCK_DESTROY(adapter);
734
735	e1000_phy_hw_reset(&adapter->hw);
736
737	em_release_manageability(adapter);
738	em_release_hw_control(adapter);
739
740	/* Unregister VLAN events */
741	if (adapter->vlan_attach != NULL)
742		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743	if (adapter->vlan_detach != NULL)
744		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
745
746	ether_ifdetach(adapter->ifp);
747	callout_drain(&adapter->timer);
748
749	em_free_pci_resources(adapter);
750	bus_generic_detach(dev);
751	if_free(ifp);
752
753	em_free_transmit_structures(adapter);
754	em_free_receive_structures(adapter);
755
756	em_release_hw_control(adapter);
757	free(adapter->mta, M_DEVBUF);
758
759	return (0);
760}
761
762/*********************************************************************
763 *
764 *  Shutdown entry point
765 *
766 **********************************************************************/
767
768static int
769em_shutdown(device_t dev)
770{
771	return em_suspend(dev);
772}
773
774/*
775 * Suspend/resume device methods.
776 */
777static int
778em_suspend(device_t dev)
779{
780	struct adapter *adapter = device_get_softc(dev);
781
782	EM_CORE_LOCK(adapter);
783
784        em_release_manageability(adapter);
785	em_release_hw_control(adapter);
786	em_enable_wakeup(dev);
787
788	EM_CORE_UNLOCK(adapter);
789
790	return bus_generic_suspend(dev);
791}
792
793static int
794em_resume(device_t dev)
795{
796	struct adapter *adapter = device_get_softc(dev);
797	struct ifnet *ifp = adapter->ifp;
798
799	EM_CORE_LOCK(adapter);
800	em_init_locked(adapter);
801	em_init_manageability(adapter);
802	EM_CORE_UNLOCK(adapter);
803	em_start(ifp);
804
805	return bus_generic_resume(dev);
806}
807
808
809/*********************************************************************
810 *  Transmit entry point
811 *
812 *  em_start is called by the stack to initiate a transmit.
813 *  The driver will remain in this routine as long as there are
814 *  packets to transmit and transmit resources are available.
815 *  In case resources are not available stack is notified and
816 *  the packet is requeued.
817 **********************************************************************/
818
819#ifdef EM_MULTIQUEUE
820static int
821em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822{
823	struct adapter  *adapter = txr->adapter;
824        struct mbuf     *next;
825        int             err = 0, enq = 0;
826
827	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828	    IFF_DRV_RUNNING || adapter->link_active == 0) {
829		if (m != NULL)
830			err = drbr_enqueue(ifp, txr->br, m);
831		return (err);
832	}
833
834        /* Call cleanup if number of TX descriptors low */
835	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836		em_txeof(txr);
837
838	enq = 0;
839	if (m == NULL) {
840		next = drbr_dequeue(ifp, txr->br);
841	} else if (drbr_needs_enqueue(ifp, txr->br)) {
842		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843			return (err);
844		next = drbr_dequeue(ifp, txr->br);
845	} else
846		next = m;
847
848	/* Process the queue */
849	while (next != NULL) {
850		if ((err = em_xmit(txr, &next)) != 0) {
851                        if (next != NULL)
852                                err = drbr_enqueue(ifp, txr->br, next);
853                        break;
854		}
855		enq++;
856		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857		ETHER_BPF_MTAP(ifp, next);
858		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                        break;
860		if (txr->tx_avail < EM_MAX_SCATTER) {
861			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862			break;
863		}
864		next = drbr_dequeue(ifp, txr->br);
865	}
866
867	if (enq > 0) {
868                /* Set the watchdog */
869                txr->queue_status = EM_QUEUE_WORKING;
870		txr->watchdog_time = ticks;
871	}
872	return (err);
873}
874
875/*
876** Multiqueue capable stack interface
877*/
878static int
879em_mq_start(struct ifnet *ifp, struct mbuf *m)
880{
881	struct adapter	*adapter = ifp->if_softc;
882	struct tx_ring	*txr = adapter->tx_rings;
883	int 		error;
884
885	if (EM_TX_TRYLOCK(txr)) {
886		error = em_mq_start_locked(ifp, txr, m);
887		EM_TX_UNLOCK(txr);
888	} else
889		error = drbr_enqueue(ifp, txr->br, m);
890
891	return (error);
892}
893
894/*
895** Flush all ring buffers
896*/
897static void
898em_qflush(struct ifnet *ifp)
899{
900	struct adapter  *adapter = ifp->if_softc;
901	struct tx_ring  *txr = adapter->tx_rings;
902	struct mbuf     *m;
903
904	for (int i = 0; i < adapter->num_queues; i++, txr++) {
905		EM_TX_LOCK(txr);
906		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907			m_freem(m);
908		EM_TX_UNLOCK(txr);
909	}
910	if_qflush(ifp);
911}
912
913#endif /* EM_MULTIQUEUE */
914
915static void
916em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917{
918	struct adapter	*adapter = ifp->if_softc;
919	struct mbuf	*m_head;
920
921	EM_TX_LOCK_ASSERT(txr);
922
923	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924	    IFF_DRV_RUNNING)
925		return;
926
927	if (!adapter->link_active)
928		return;
929
930        /* Call cleanup if number of TX descriptors low */
931	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932		em_txeof(txr);
933
934	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935		if (txr->tx_avail < EM_MAX_SCATTER) {
936			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937			break;
938		}
939                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940		if (m_head == NULL)
941			break;
942		/*
943		 *  Encapsulation can modify our pointer, and or make it
944		 *  NULL on failure.  In that event, we can't requeue.
945		 */
946		if (em_xmit(txr, &m_head)) {
947			if (m_head == NULL)
948				break;
949			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951			break;
952		}
953
954		/* Send a copy of the frame to the BPF listener */
955		ETHER_BPF_MTAP(ifp, m_head);
956
957		/* Set timeout in case hardware has problems transmitting. */
958		txr->watchdog_time = ticks;
959                txr->queue_status = EM_QUEUE_WORKING;
960	}
961
962	return;
963}
964
965static void
966em_start(struct ifnet *ifp)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970
971	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972		EM_TX_LOCK(txr);
973		em_start_locked(ifp, txr);
974		EM_TX_UNLOCK(txr);
975	}
976	return;
977}
978
979/*********************************************************************
980 *  Ioctl entry point
981 *
982 *  em_ioctl is called when the user wants to configure the
983 *  interface.
984 *
985 *  return 0 on success, positive on failure
986 **********************************************************************/
987
988static int
989em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990{
991	struct adapter	*adapter = ifp->if_softc;
992	struct ifreq *ifr = (struct ifreq *)data;
993#ifdef INET
994	struct ifaddr *ifa = (struct ifaddr *)data;
995#endif
996	int error = 0;
997
998	if (adapter->in_detach)
999		return (error);
1000
1001	switch (command) {
1002	case SIOCSIFADDR:
1003#ifdef INET
1004		if (ifa->ifa_addr->sa_family == AF_INET) {
1005			/*
1006			 * XXX
1007			 * Since resetting hardware takes a very long time
1008			 * and results in link renegotiation we only
1009			 * initialize the hardware only when it is absolutely
1010			 * required.
1011			 */
1012			ifp->if_flags |= IFF_UP;
1013			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014				EM_CORE_LOCK(adapter);
1015				em_init_locked(adapter);
1016				EM_CORE_UNLOCK(adapter);
1017			}
1018			arp_ifinit(ifp, ifa);
1019		} else
1020#endif
1021			error = ether_ioctl(ifp, command, data);
1022		break;
1023	case SIOCSIFMTU:
1024	    {
1025		int max_frame_size;
1026
1027		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028
1029		EM_CORE_LOCK(adapter);
1030		switch (adapter->hw.mac.type) {
1031		case e1000_82571:
1032		case e1000_82572:
1033		case e1000_ich9lan:
1034		case e1000_ich10lan:
1035		case e1000_pch2lan:
1036		case e1000_82574:
1037		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1038			max_frame_size = 9234;
1039			break;
1040		case e1000_pchlan:
1041			max_frame_size = 4096;
1042			break;
1043			/* Adapters that do not support jumbo frames */
1044		case e1000_82583:
1045		case e1000_ich8lan:
1046			max_frame_size = ETHER_MAX_LEN;
1047			break;
1048		default:
1049			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050		}
1051		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052		    ETHER_CRC_LEN) {
1053			EM_CORE_UNLOCK(adapter);
1054			error = EINVAL;
1055			break;
1056		}
1057
1058		ifp->if_mtu = ifr->ifr_mtu;
1059		adapter->max_frame_size =
1060		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061		em_init_locked(adapter);
1062		EM_CORE_UNLOCK(adapter);
1063		break;
1064	    }
1065	case SIOCSIFFLAGS:
1066		IOCTL_DEBUGOUT("ioctl rcv'd:\
1067		    SIOCSIFFLAGS (Set Interface Flags)");
1068		EM_CORE_LOCK(adapter);
1069		if (ifp->if_flags & IFF_UP) {
1070			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071				if ((ifp->if_flags ^ adapter->if_flags) &
1072				    (IFF_PROMISC | IFF_ALLMULTI)) {
1073					em_disable_promisc(adapter);
1074					em_set_promisc(adapter);
1075				}
1076			} else
1077				em_init_locked(adapter);
1078		} else
1079			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080				em_stop(adapter);
1081		adapter->if_flags = ifp->if_flags;
1082		EM_CORE_UNLOCK(adapter);
1083		break;
1084	case SIOCADDMULTI:
1085	case SIOCDELMULTI:
1086		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088			EM_CORE_LOCK(adapter);
1089			em_disable_intr(adapter);
1090			em_set_multi(adapter);
1091#ifdef DEVICE_POLLING
1092			if (!(ifp->if_capenable & IFCAP_POLLING))
1093#endif
1094				em_enable_intr(adapter);
1095			EM_CORE_UNLOCK(adapter);
1096		}
1097		break;
1098	case SIOCSIFMEDIA:
1099		/*
1100		** As the speed/duplex settings are being
1101		** changed, we need to reset the PHY.
1102		*/
1103		adapter->hw.phy.reset_disable = FALSE;
1104		/* Check SOL/IDER usage */
1105		EM_CORE_LOCK(adapter);
1106		if (e1000_check_reset_block(&adapter->hw)) {
1107			EM_CORE_UNLOCK(adapter);
1108			device_printf(adapter->dev, "Media change is"
1109			    " blocked due to SOL/IDER session.\n");
1110			break;
1111		}
1112		EM_CORE_UNLOCK(adapter);
1113		/* falls thru */
1114	case SIOCGIFMEDIA:
1115		IOCTL_DEBUGOUT("ioctl rcv'd: \
1116		    SIOCxIFMEDIA (Get/Set Interface Media)");
1117		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118		break;
1119	case SIOCSIFCAP:
1120	    {
1121		int mask, reinit;
1122
1123		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124		reinit = 0;
1125		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126#ifdef DEVICE_POLLING
1127		if (mask & IFCAP_POLLING) {
1128			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129				error = ether_poll_register(em_poll, ifp);
1130				if (error)
1131					return (error);
1132				EM_CORE_LOCK(adapter);
1133				em_disable_intr(adapter);
1134				ifp->if_capenable |= IFCAP_POLLING;
1135				EM_CORE_UNLOCK(adapter);
1136			} else {
1137				error = ether_poll_deregister(ifp);
1138				/* Enable interrupt even in error case */
1139				EM_CORE_LOCK(adapter);
1140				em_enable_intr(adapter);
1141				ifp->if_capenable &= ~IFCAP_POLLING;
1142				EM_CORE_UNLOCK(adapter);
1143			}
1144		}
1145#endif
1146		if (mask & IFCAP_HWCSUM) {
1147			ifp->if_capenable ^= IFCAP_HWCSUM;
1148			reinit = 1;
1149		}
1150		if (mask & IFCAP_TSO4) {
1151			ifp->if_capenable ^= IFCAP_TSO4;
1152			reinit = 1;
1153		}
1154		if (mask & IFCAP_VLAN_HWTAGGING) {
1155			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156			reinit = 1;
1157		}
1158		if (mask & IFCAP_VLAN_HWFILTER) {
1159			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160			reinit = 1;
1161		}
1162		if ((mask & IFCAP_WOL) &&
1163		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164			if (mask & IFCAP_WOL_MCAST)
1165				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166			if (mask & IFCAP_WOL_MAGIC)
1167				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168		}
1169		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170			em_init(adapter);
1171		VLAN_CAPABILITIES(ifp);
1172		break;
1173	    }
1174
1175	default:
1176		error = ether_ioctl(ifp, command, data);
1177		break;
1178	}
1179
1180	return (error);
1181}
1182
1183
1184/*********************************************************************
1185 *  Init entry point
1186 *
1187 *  This routine is used in two ways. It is used by the stack as
1188 *  init entry point in network interface structure. It is also used
1189 *  by the driver as a hw/sw initialization routine to get to a
1190 *  consistent state.
1191 *
1192 *  return 0 on success, positive on failure
1193 **********************************************************************/
1194
1195static void
1196em_init_locked(struct adapter *adapter)
1197{
1198	struct ifnet	*ifp = adapter->ifp;
1199	device_t	dev = adapter->dev;
1200	u32		pba;
1201
1202	INIT_DEBUGOUT("em_init: begin");
1203
1204	EM_CORE_LOCK_ASSERT(adapter);
1205
1206	em_disable_intr(adapter);
1207	callout_stop(&adapter->timer);
1208
1209	/*
1210	 * Packet Buffer Allocation (PBA)
1211	 * Writing PBA sets the receive portion of the buffer
1212	 * the remainder is used for the transmit buffer.
1213	 */
1214	switch (adapter->hw.mac.type) {
1215	/* Total Packet Buffer on these is 48K */
1216	case e1000_82571:
1217	case e1000_82572:
1218	case e1000_80003es2lan:
1219			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220		break;
1221	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223		break;
1224	case e1000_82574:
1225	case e1000_82583:
1226			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227		break;
1228	case e1000_ich8lan:
1229		pba = E1000_PBA_8K;
1230		break;
1231	case e1000_ich9lan:
1232	case e1000_ich10lan:
1233		pba = E1000_PBA_10K;
1234		break;
1235	case e1000_pchlan:
1236	case e1000_pch2lan:
1237		pba = E1000_PBA_26K;
1238		break;
1239	default:
1240		if (adapter->max_frame_size > 8192)
1241			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242		else
1243			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244	}
1245
1246	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248
1249	/* Get the latest mac address, User can use a LAA */
1250        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251              ETHER_ADDR_LEN);
1252
1253	/* Put the address into the Receive Address Array */
1254	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255
1256	/*
1257	 * With the 82571 adapter, RAR[0] may be overwritten
1258	 * when the other port is reset, we make a duplicate
1259	 * in RAR[14] for that eventuality, this assures
1260	 * the interface continues to function.
1261	 */
1262	if (adapter->hw.mac.type == e1000_82571) {
1263		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265		    E1000_RAR_ENTRIES - 1);
1266	}
1267
1268	/* Initialize the hardware */
1269	em_reset(adapter);
1270	em_update_link_status(adapter);
1271
1272	/* Setup VLAN support, basic and offload if available */
1273	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274
1275	/* Set hardware offload abilities */
1276	ifp->if_hwassist = 0;
1277	if (ifp->if_capenable & IFCAP_TXCSUM)
1278		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279	if (ifp->if_capenable & IFCAP_TSO4)
1280		ifp->if_hwassist |= CSUM_TSO;
1281
1282	/* Configure for OS presence */
1283	em_init_manageability(adapter);
1284
1285	/* Prepare transmit descriptors and buffers */
1286	em_setup_transmit_structures(adapter);
1287	em_initialize_transmit_unit(adapter);
1288
1289	/* Setup Multicast table */
1290	em_set_multi(adapter);
1291
1292	/*
1293	** Figure out the desired mbuf
1294	** pool for doing jumbos
1295	*/
1296	if (adapter->max_frame_size <= 2048)
1297		adapter->rx_mbuf_sz = MCLBYTES;
1298	else if (adapter->max_frame_size <= 4096)
1299		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300	else
1301		adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Use real VLAN Filter support? */
1312	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314			/* Use real VLAN Filter support */
1315			em_setup_vlan_hw_support(adapter);
1316		else {
1317			u32 ctrl;
1318			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319			ctrl |= E1000_CTRL_VME;
1320			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321		}
1322	}
1323
1324	/* Don't lose promiscuous settings */
1325	em_set_promisc(adapter);
1326
1327	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329
1330	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332
1333	/* MSI/X configuration for 82574 */
1334	if (adapter->hw.mac.type == e1000_82574) {
1335		int tmp;
1336		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337		tmp |= E1000_CTRL_EXT_PBA_CLR;
1338		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339		/* Set the IVAR - interrupt vector routing. */
1340		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341	}
1342
1343#ifdef DEVICE_POLLING
1344	/*
1345	 * Only enable interrupts if we are not polling, make sure
1346	 * they are off otherwise.
1347	 */
1348	if (ifp->if_capenable & IFCAP_POLLING)
1349		em_disable_intr(adapter);
1350	else
1351#endif /* DEVICE_POLLING */
1352		em_enable_intr(adapter);
1353
1354	/* AMT based hardware can now take control from firmware */
1355	if (adapter->has_manage && adapter->has_amt)
1356		em_get_hw_control(adapter);
1357
1358	/* Don't reset the phy next time init gets called */
1359	adapter->hw.phy.reset_disable = TRUE;
1360}
1361
1362static void
1363em_init(void *arg)
1364{
1365	struct adapter *adapter = arg;
1366
1367	EM_CORE_LOCK(adapter);
1368	em_init_locked(adapter);
1369	EM_CORE_UNLOCK(adapter);
1370}
1371
1372
1373#ifdef DEVICE_POLLING
1374/*********************************************************************
1375 *
1376 *  Legacy polling routine: note this only works with single queue
1377 *
1378 *********************************************************************/
1379static int
1380em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381{
1382	struct adapter *adapter = ifp->if_softc;
1383	struct tx_ring	*txr = adapter->tx_rings;
1384	struct rx_ring	*rxr = adapter->rx_rings;
1385	u32		reg_icr;
1386	int		rx_done;
1387
1388	EM_CORE_LOCK(adapter);
1389	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390		EM_CORE_UNLOCK(adapter);
1391		return (0);
1392	}
1393
1394	if (cmd == POLL_AND_CHECK_STATUS) {
1395		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397			callout_stop(&adapter->timer);
1398			adapter->hw.mac.get_link_status = 1;
1399			em_update_link_status(adapter);
1400			callout_reset(&adapter->timer, hz,
1401			    em_local_timer, adapter);
1402		}
1403	}
1404	EM_CORE_UNLOCK(adapter);
1405
1406	em_rxeof(rxr, count, &rx_done);
1407
1408	EM_TX_LOCK(txr);
1409	em_txeof(txr);
1410#ifdef EM_MULTIQUEUE
1411	if (!drbr_empty(ifp, txr->br))
1412		em_mq_start_locked(ifp, txr, NULL);
1413#else
1414	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415		em_start_locked(ifp, txr);
1416#endif
1417	EM_TX_UNLOCK(txr);
1418
1419	return (rx_done);
1420}
1421#endif /* DEVICE_POLLING */
1422
1423
1424/*********************************************************************
1425 *
1426 *  Fast Legacy/MSI Combined Interrupt Service routine
1427 *
1428 *********************************************************************/
1429static int
1430em_irq_fast(void *arg)
1431{
1432	struct adapter	*adapter = arg;
1433	struct ifnet	*ifp;
1434	u32		reg_icr;
1435
1436	ifp = adapter->ifp;
1437
1438	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439
1440	/* Hot eject?  */
1441	if (reg_icr == 0xffffffff)
1442		return FILTER_STRAY;
1443
1444	/* Definitely not our interrupt.  */
1445	if (reg_icr == 0x0)
1446		return FILTER_STRAY;
1447
1448	/*
1449	 * Starting with the 82571 chip, bit 31 should be used to
1450	 * determine whether the interrupt belongs to us.
1451	 */
1452	if (adapter->hw.mac.type >= e1000_82571 &&
1453	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454		return FILTER_STRAY;
1455
1456	em_disable_intr(adapter);
1457	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458
1459	/* Link status change */
1460	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461		adapter->hw.mac.get_link_status = 1;
1462		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463	}
1464
1465	if (reg_icr & E1000_ICR_RXO)
1466		adapter->rx_overruns++;
1467	return FILTER_HANDLED;
1468}
1469
1470/* Combined RX/TX handler, used by Legacy and MSI */
1471static void
1472em_handle_que(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp = adapter->ifp;
1476	struct tx_ring	*txr = adapter->tx_rings;
1477	struct rx_ring	*rxr = adapter->rx_rings;
1478	bool		more;
1479
1480
1481	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483
1484		EM_TX_LOCK(txr);
1485		em_txeof(txr);
1486#ifdef EM_MULTIQUEUE
1487		if (!drbr_empty(ifp, txr->br))
1488			em_mq_start_locked(ifp, txr, NULL);
1489#else
1490		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491			em_start_locked(ifp, txr);
1492#endif
1493		em_txeof(txr);
1494		EM_TX_UNLOCK(txr);
1495		if (more) {
1496			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497			return;
1498		}
1499	}
1500
1501	em_enable_intr(adapter);
1502	return;
1503}
1504
1505
1506/*********************************************************************
1507 *
1508 *  MSIX Interrupt Service Routines
1509 *
1510 **********************************************************************/
1511static void
1512em_msix_tx(void *arg)
1513{
1514	struct tx_ring *txr = arg;
1515	struct adapter *adapter = txr->adapter;
1516	bool		more;
1517
1518	++txr->tx_irq;
1519	EM_TX_LOCK(txr);
1520	more = em_txeof(txr);
1521	EM_TX_UNLOCK(txr);
1522	if (more)
1523		taskqueue_enqueue(txr->tq, &txr->tx_task);
1524	else
1525		/* Reenable this interrupt */
1526		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527	return;
1528}
1529
1530/*********************************************************************
1531 *
1532 *  MSIX RX Interrupt Service routine
1533 *
1534 **********************************************************************/
1535
1536static void
1537em_msix_rx(void *arg)
1538{
1539	struct rx_ring	*rxr = arg;
1540	struct adapter	*adapter = rxr->adapter;
1541	bool		more;
1542
1543	++rxr->rx_irq;
1544	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545	if (more)
1546		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547	else
1548		/* Reenable this interrupt */
1549		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550	return;
1551}
1552
1553/*********************************************************************
1554 *
1555 *  MSIX Link Fast Interrupt Service routine
1556 *
1557 **********************************************************************/
1558static void
1559em_msix_link(void *arg)
1560{
1561	struct adapter	*adapter = arg;
1562	u32		reg_icr;
1563
1564	++adapter->link_irq;
1565	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566
1567	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568		adapter->hw.mac.get_link_status = 1;
1569		em_handle_link(adapter, 0);
1570	} else
1571		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572		    EM_MSIX_LINK | E1000_IMS_LSC);
1573	return;
1574}
1575
1576static void
1577em_handle_rx(void *context, int pending)
1578{
1579	struct rx_ring	*rxr = context;
1580	struct adapter	*adapter = rxr->adapter;
1581        bool            more;
1582
1583	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584	if (more)
1585		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586	else
1587		/* Reenable this interrupt */
1588		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589}
1590
1591static void
1592em_handle_tx(void *context, int pending)
1593{
1594	struct tx_ring	*txr = context;
1595	struct adapter	*adapter = txr->adapter;
1596	struct ifnet	*ifp = adapter->ifp;
1597
1598	EM_TX_LOCK(txr);
1599	em_txeof(txr);
1600#ifdef EM_MULTIQUEUE
1601	if (!drbr_empty(ifp, txr->br))
1602		em_mq_start_locked(ifp, txr, NULL);
1603#else
1604	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605		em_start_locked(ifp, txr);
1606#endif
1607	em_txeof(txr);
1608	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609	EM_TX_UNLOCK(txr);
1610}
1611
1612static void
1613em_handle_link(void *context, int pending)
1614{
1615	struct adapter	*adapter = context;
1616	struct ifnet *ifp = adapter->ifp;
1617
1618	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619		return;
1620
1621	EM_CORE_LOCK(adapter);
1622	callout_stop(&adapter->timer);
1623	em_update_link_status(adapter);
1624	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626	    EM_MSIX_LINK | E1000_IMS_LSC);
1627	EM_CORE_UNLOCK(adapter);
1628}
1629
1630
1631/*********************************************************************
1632 *
1633 *  Media Ioctl callback
1634 *
1635 *  This routine is called whenever the user queries the status of
1636 *  the interface using ifconfig.
1637 *
1638 **********************************************************************/
1639static void
1640em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641{
1642	struct adapter *adapter = ifp->if_softc;
1643	u_char fiber_type = IFM_1000_SX;
1644
1645	INIT_DEBUGOUT("em_media_status: begin");
1646
1647	EM_CORE_LOCK(adapter);
1648	em_update_link_status(adapter);
1649
1650	ifmr->ifm_status = IFM_AVALID;
1651	ifmr->ifm_active = IFM_ETHER;
1652
1653	if (!adapter->link_active) {
1654		EM_CORE_UNLOCK(adapter);
1655		return;
1656	}
1657
1658	ifmr->ifm_status |= IFM_ACTIVE;
1659
1660	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662		ifmr->ifm_active |= fiber_type | IFM_FDX;
1663	} else {
1664		switch (adapter->link_speed) {
1665		case 10:
1666			ifmr->ifm_active |= IFM_10_T;
1667			break;
1668		case 100:
1669			ifmr->ifm_active |= IFM_100_TX;
1670			break;
1671		case 1000:
1672			ifmr->ifm_active |= IFM_1000_T;
1673			break;
1674		}
1675		if (adapter->link_duplex == FULL_DUPLEX)
1676			ifmr->ifm_active |= IFM_FDX;
1677		else
1678			ifmr->ifm_active |= IFM_HDX;
1679	}
1680	EM_CORE_UNLOCK(adapter);
1681}
1682
1683/*********************************************************************
1684 *
1685 *  Media Ioctl callback
1686 *
1687 *  This routine is called when the user changes speed/duplex using
1688 *  media/mediopt option with ifconfig.
1689 *
1690 **********************************************************************/
1691static int
1692em_media_change(struct ifnet *ifp)
1693{
1694	struct adapter *adapter = ifp->if_softc;
1695	struct ifmedia  *ifm = &adapter->media;
1696
1697	INIT_DEBUGOUT("em_media_change: begin");
1698
1699	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700		return (EINVAL);
1701
1702	EM_CORE_LOCK(adapter);
1703	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704	case IFM_AUTO:
1705		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707		break;
1708	case IFM_1000_LX:
1709	case IFM_1000_SX:
1710	case IFM_1000_T:
1711		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713		break;
1714	case IFM_100_TX:
1715		adapter->hw.mac.autoneg = FALSE;
1716		adapter->hw.phy.autoneg_advertised = 0;
1717		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719		else
1720			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721		break;
1722	case IFM_10_T:
1723		adapter->hw.mac.autoneg = FALSE;
1724		adapter->hw.phy.autoneg_advertised = 0;
1725		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727		else
1728			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729		break;
1730	default:
1731		device_printf(adapter->dev, "Unsupported media type\n");
1732	}
1733
1734	em_init_locked(adapter);
1735	EM_CORE_UNLOCK(adapter);
1736
1737	return (0);
1738}
1739
1740/*********************************************************************
1741 *
1742 *  This routine maps the mbufs to tx descriptors.
1743 *
1744 *  return 0 on success, positive on failure
1745 **********************************************************************/
1746
1747static int
1748em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749{
1750	struct adapter		*adapter = txr->adapter;
1751	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1752	bus_dmamap_t		map;
1753	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1754	struct e1000_tx_desc	*ctxd = NULL;
1755	struct mbuf		*m_head;
1756	struct ether_header	*eh;
1757	struct ip		*ip = NULL;
1758	struct tcphdr		*tp = NULL;
1759	u32			txd_upper, txd_lower, txd_used, txd_saved;
1760	int			ip_off, poff;
1761	int			nsegs, i, j, first, last = 0;
1762	int			error, do_tso, tso_desc = 0;
1763
1764	m_head = *m_headp;
1765	txd_upper = txd_lower = txd_used = txd_saved = 0;
1766	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767	ip_off = poff = 0;
1768
1769	/*
1770	 * Intel recommends entire IP/TCP header length reside in a single
1771	 * buffer. If multiple descriptors are used to describe the IP and
1772	 * TCP header, each descriptor should describe one or more
1773	 * complete headers; descriptors referencing only parts of headers
1774	 * are not supported. If all layer headers are not coalesced into
1775	 * a single buffer, each buffer should not cross a 4KB boundary,
1776	 * or be larger than the maximum read request size.
1777	 * Controller also requires modifing IP/TCP header to make TSO work
1778	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1779	 * IP/TCP header into a single buffer to meet the requirement of
1780	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1781	 * which also has similiar restrictions.
1782	 */
1783	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784		if (do_tso || (m_head->m_next != NULL &&
1785		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786			if (M_WRITABLE(*m_headp) == 0) {
1787				m_head = m_dup(*m_headp, M_DONTWAIT);
1788				m_freem(*m_headp);
1789				if (m_head == NULL) {
1790					*m_headp = NULL;
1791					return (ENOBUFS);
1792				}
1793				*m_headp = m_head;
1794			}
1795		}
1796		/*
1797		 * XXX
1798		 * Assume IPv4, we don't have TSO/checksum offload support
1799		 * for IPv6 yet.
1800		 */
1801		ip_off = sizeof(struct ether_header);
1802		m_head = m_pullup(m_head, ip_off);
1803		if (m_head == NULL) {
1804			*m_headp = NULL;
1805			return (ENOBUFS);
1806		}
1807		eh = mtod(m_head, struct ether_header *);
1808		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809			ip_off = sizeof(struct ether_vlan_header);
1810			m_head = m_pullup(m_head, ip_off);
1811			if (m_head == NULL) {
1812				*m_headp = NULL;
1813				return (ENOBUFS);
1814			}
1815		}
1816		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817		if (m_head == NULL) {
1818			*m_headp = NULL;
1819			return (ENOBUFS);
1820		}
1821		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822		poff = ip_off + (ip->ip_hl << 2);
1823		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1824		if (m_head == NULL) {
1825			*m_headp = NULL;
1826			return (ENOBUFS);
1827		}
1828		if (do_tso) {
1829			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830			/*
1831			 * TSO workaround:
1832			 *   pull 4 more bytes of data into it.
1833			 */
1834			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835			if (m_head == NULL) {
1836				*m_headp = NULL;
1837				return (ENOBUFS);
1838			}
1839			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840			ip->ip_len = 0;
1841			ip->ip_sum = 0;
1842			/*
1843			 * The pseudo TCP checksum does not include TCP payload
1844			 * length so driver should recompute the checksum here
1845			 * what hardware expect to see. This is adherence of
1846			 * Microsoft's Large Send specification.
1847			 */
1848			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1853			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1854			if (m_head == NULL) {
1855				*m_headp = NULL;
1856				return (ENOBUFS);
1857			}
1858			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1859			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1861			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1862			if (m_head == NULL) {
1863				*m_headp = NULL;
1864				return (ENOBUFS);
1865			}
1866			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867		}
1868		*m_headp = m_head;
1869	}
1870
1871	/*
1872	 * Map the packet for DMA
1873	 *
1874	 * Capture the first descriptor index,
1875	 * this descriptor will have the index
1876	 * of the EOP which is the only one that
1877	 * now gets a DONE bit writeback.
1878	 */
1879	first = txr->next_avail_desc;
1880	tx_buffer = &txr->tx_buffers[first];
1881	tx_buffer_mapped = tx_buffer;
1882	map = tx_buffer->map;
1883
1884	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1885	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1886
1887	/*
1888	 * There are two types of errors we can (try) to handle:
1889	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1890	 *   out of segments.  Defragment the mbuf chain and try again.
1891	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1892	 *   at this point in time.  Defer sending and try again later.
1893	 * All other errors, in particular EINVAL, are fatal and prevent the
1894	 * mbuf chain from ever going through.  Drop it and report error.
1895	 */
1896	if (error == EFBIG) {
1897		struct mbuf *m;
1898
1899		m = m_defrag(*m_headp, M_DONTWAIT);
1900		if (m == NULL) {
1901			adapter->mbuf_alloc_failed++;
1902			m_freem(*m_headp);
1903			*m_headp = NULL;
1904			return (ENOBUFS);
1905		}
1906		*m_headp = m;
1907
1908		/* Try it again */
1909		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1910		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1911
1912		if (error) {
1913			adapter->no_tx_dma_setup++;
1914			m_freem(*m_headp);
1915			*m_headp = NULL;
1916			return (error);
1917		}
1918	} else if (error != 0) {
1919		adapter->no_tx_dma_setup++;
1920		return (error);
1921	}
1922
1923	/*
1924	 * TSO Hardware workaround, if this packet is not
1925	 * TSO, and is only a single descriptor long, and
1926	 * it follows a TSO burst, then we need to add a
1927	 * sentinel descriptor to prevent premature writeback.
1928	 */
1929	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1930		if (nsegs == 1)
1931			tso_desc = TRUE;
1932		txr->tx_tso = FALSE;
1933	}
1934
1935        if (nsegs > (txr->tx_avail - 2)) {
1936                txr->no_desc_avail++;
1937		bus_dmamap_unload(txr->txtag, map);
1938		return (ENOBUFS);
1939        }
1940	m_head = *m_headp;
1941
1942	/* Do hardware assists */
1943	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1944		em_tso_setup(txr, m_head, ip_off, ip, tp,
1945		    &txd_upper, &txd_lower);
1946		/* we need to make a final sentinel transmit desc */
1947		tso_desc = TRUE;
1948	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1949		em_transmit_checksum_setup(txr, m_head,
1950		    ip_off, ip, &txd_upper, &txd_lower);
1951
1952	i = txr->next_avail_desc;
1953
1954	/* Set up our transmit descriptors */
1955	for (j = 0; j < nsegs; j++) {
1956		bus_size_t seg_len;
1957		bus_addr_t seg_addr;
1958
1959		tx_buffer = &txr->tx_buffers[i];
1960		ctxd = &txr->tx_base[i];
1961		seg_addr = segs[j].ds_addr;
1962		seg_len  = segs[j].ds_len;
1963		/*
1964		** TSO Workaround:
1965		** If this is the last descriptor, we want to
1966		** split it so we have a small final sentinel
1967		*/
1968		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1969			seg_len -= 4;
1970			ctxd->buffer_addr = htole64(seg_addr);
1971			ctxd->lower.data = htole32(
1972			adapter->txd_cmd | txd_lower | seg_len);
1973			ctxd->upper.data =
1974			    htole32(txd_upper);
1975			if (++i == adapter->num_tx_desc)
1976				i = 0;
1977			/* Now make the sentinel */
1978			++txd_used; /* using an extra txd */
1979			ctxd = &txr->tx_base[i];
1980			tx_buffer = &txr->tx_buffers[i];
1981			ctxd->buffer_addr =
1982			    htole64(seg_addr + seg_len);
1983			ctxd->lower.data = htole32(
1984			adapter->txd_cmd | txd_lower | 4);
1985			ctxd->upper.data =
1986			    htole32(txd_upper);
1987			last = i;
1988			if (++i == adapter->num_tx_desc)
1989				i = 0;
1990		} else {
1991			ctxd->buffer_addr = htole64(seg_addr);
1992			ctxd->lower.data = htole32(
1993			adapter->txd_cmd | txd_lower | seg_len);
1994			ctxd->upper.data =
1995			    htole32(txd_upper);
1996			last = i;
1997			if (++i == adapter->num_tx_desc)
1998				i = 0;
1999		}
2000		tx_buffer->m_head = NULL;
2001		tx_buffer->next_eop = -1;
2002	}
2003
2004	txr->next_avail_desc = i;
2005	txr->tx_avail -= nsegs;
2006	if (tso_desc) /* TSO used an extra for sentinel */
2007		txr->tx_avail -= txd_used;
2008
2009	if (m_head->m_flags & M_VLANTAG) {
2010		/* Set the vlan id. */
2011		ctxd->upper.fields.special =
2012		    htole16(m_head->m_pkthdr.ether_vtag);
2013                /* Tell hardware to add tag */
2014                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2015        }
2016
2017        tx_buffer->m_head = m_head;
2018	tx_buffer_mapped->map = tx_buffer->map;
2019	tx_buffer->map = map;
2020        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2021
2022        /*
2023         * Last Descriptor of Packet
2024	 * needs End Of Packet (EOP)
2025	 * and Report Status (RS)
2026         */
2027        ctxd->lower.data |=
2028	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2029	/*
2030	 * Keep track in the first buffer which
2031	 * descriptor will be written back
2032	 */
2033	tx_buffer = &txr->tx_buffers[first];
2034	tx_buffer->next_eop = last;
2035	/* Update the watchdog time early and often */
2036	txr->watchdog_time = ticks;
2037
2038	/*
2039	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2040	 * that this frame is available to transmit.
2041	 */
2042	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2043	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2044	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2045
2046	return (0);
2047}
2048
2049static void
2050em_set_promisc(struct adapter *adapter)
2051{
2052	struct ifnet	*ifp = adapter->ifp;
2053	u32		reg_rctl;
2054
2055	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2056
2057	if (ifp->if_flags & IFF_PROMISC) {
2058		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2059		/* Turn this on if you want to see bad packets */
2060		if (em_debug_sbp)
2061			reg_rctl |= E1000_RCTL_SBP;
2062		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2063	} else if (ifp->if_flags & IFF_ALLMULTI) {
2064		reg_rctl |= E1000_RCTL_MPE;
2065		reg_rctl &= ~E1000_RCTL_UPE;
2066		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2067	}
2068}
2069
2070static void
2071em_disable_promisc(struct adapter *adapter)
2072{
2073	u32	reg_rctl;
2074
2075	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2076
2077	reg_rctl &=  (~E1000_RCTL_UPE);
2078	reg_rctl &=  (~E1000_RCTL_MPE);
2079	reg_rctl &=  (~E1000_RCTL_SBP);
2080	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081}
2082
2083
2084/*********************************************************************
2085 *  Multicast Update
2086 *
2087 *  This routine is called whenever multicast address list is updated.
2088 *
2089 **********************************************************************/
2090
2091static void
2092em_set_multi(struct adapter *adapter)
2093{
2094	struct ifnet	*ifp = adapter->ifp;
2095	struct ifmultiaddr *ifma;
2096	u32 reg_rctl = 0;
2097	u8  *mta; /* Multicast array memory */
2098	int mcnt = 0;
2099
2100	IOCTL_DEBUGOUT("em_set_multi: begin");
2101
2102	mta = adapter->mta;
2103	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2104
2105	if (adapter->hw.mac.type == e1000_82542 &&
2106	    adapter->hw.revision_id == E1000_REVISION_2) {
2107		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2108		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2109			e1000_pci_clear_mwi(&adapter->hw);
2110		reg_rctl |= E1000_RCTL_RST;
2111		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2112		msec_delay(5);
2113	}
2114
2115#if __FreeBSD_version < 800000
2116	IF_ADDR_LOCK(ifp);
2117#else
2118	if_maddr_rlock(ifp);
2119#endif
2120	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2121		if (ifma->ifma_addr->sa_family != AF_LINK)
2122			continue;
2123
2124		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2125			break;
2126
2127		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2128		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2129		mcnt++;
2130	}
2131#if __FreeBSD_version < 800000
2132	IF_ADDR_UNLOCK(ifp);
2133#else
2134	if_maddr_runlock(ifp);
2135#endif
2136	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2137		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2138		reg_rctl |= E1000_RCTL_MPE;
2139		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2140	} else
2141		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2142
2143	if (adapter->hw.mac.type == e1000_82542 &&
2144	    adapter->hw.revision_id == E1000_REVISION_2) {
2145		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2146		reg_rctl &= ~E1000_RCTL_RST;
2147		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2148		msec_delay(5);
2149		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2150			e1000_pci_set_mwi(&adapter->hw);
2151	}
2152}
2153
2154
2155/*********************************************************************
2156 *  Timer routine
2157 *
2158 *  This routine checks for link status and updates statistics.
2159 *
2160 **********************************************************************/
2161
2162static void
2163em_local_timer(void *arg)
2164{
2165	struct adapter	*adapter = arg;
2166	struct ifnet	*ifp = adapter->ifp;
2167	struct tx_ring	*txr = adapter->tx_rings;
2168
2169	EM_CORE_LOCK_ASSERT(adapter);
2170
2171	em_update_link_status(adapter);
2172	em_update_stats_counters(adapter);
2173
2174	/* Reset LAA into RAR[0] on 82571 */
2175	if ((adapter->hw.mac.type == e1000_82571) &&
2176	    e1000_get_laa_state_82571(&adapter->hw))
2177		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2178
2179	/*
2180	** Don't do TX watchdog check if we've been paused
2181	*/
2182	if (adapter->pause_frames) {
2183		adapter->pause_frames = 0;
2184		goto out;
2185	}
2186	/*
2187	** Check on the state of the TX queue(s), this
2188	** can be done without the lock because its RO
2189	** and the HUNG state will be static if set.
2190	*/
2191	for (int i = 0; i < adapter->num_queues; i++, txr++)
2192		if (txr->queue_status == EM_QUEUE_HUNG)
2193			goto hung;
2194out:
2195	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2196	return;
2197hung:
2198	/* Looks like we're hung */
2199	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2200	device_printf(adapter->dev,
2201	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2202	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2203	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2204	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2205	    "Next TX to Clean = %d\n",
2206	    txr->me, txr->tx_avail, txr->next_to_clean);
2207	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2208	adapter->watchdog_events++;
2209	EM_TX_UNLOCK(txr);
2210	em_init_locked(adapter);
2211}
2212
2213
2214static void
2215em_update_link_status(struct adapter *adapter)
2216{
2217	struct e1000_hw *hw = &adapter->hw;
2218	struct ifnet *ifp = adapter->ifp;
2219	device_t dev = adapter->dev;
2220	struct tx_ring *txr = adapter->tx_rings;
2221	u32 link_check = 0;
2222
2223	/* Get the cached link value or read phy for real */
2224	switch (hw->phy.media_type) {
2225	case e1000_media_type_copper:
2226		if (hw->mac.get_link_status) {
2227			/* Do the work to read phy */
2228			e1000_check_for_link(hw);
2229			link_check = !hw->mac.get_link_status;
2230			if (link_check) /* ESB2 fix */
2231				e1000_cfg_on_link_up(hw);
2232		} else
2233			link_check = TRUE;
2234		break;
2235	case e1000_media_type_fiber:
2236		e1000_check_for_link(hw);
2237		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2238                                 E1000_STATUS_LU);
2239		break;
2240	case e1000_media_type_internal_serdes:
2241		e1000_check_for_link(hw);
2242		link_check = adapter->hw.mac.serdes_has_link;
2243		break;
2244	default:
2245	case e1000_media_type_unknown:
2246		break;
2247	}
2248
2249	/* Now check for a transition */
2250	if (link_check && (adapter->link_active == 0)) {
2251		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2252		    &adapter->link_duplex);
2253		/* Check if we must disable SPEED_MODE bit on PCI-E */
2254		if ((adapter->link_speed != SPEED_1000) &&
2255		    ((hw->mac.type == e1000_82571) ||
2256		    (hw->mac.type == e1000_82572))) {
2257			int tarc0;
2258			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2259			tarc0 &= ~SPEED_MODE_BIT;
2260			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2261		}
2262		if (bootverbose)
2263			device_printf(dev, "Link is up %d Mbps %s\n",
2264			    adapter->link_speed,
2265			    ((adapter->link_duplex == FULL_DUPLEX) ?
2266			    "Full Duplex" : "Half Duplex"));
2267		adapter->link_active = 1;
2268		adapter->smartspeed = 0;
2269		ifp->if_baudrate = adapter->link_speed * 1000000;
2270		if_link_state_change(ifp, LINK_STATE_UP);
2271	} else if (!link_check && (adapter->link_active == 1)) {
2272		ifp->if_baudrate = adapter->link_speed = 0;
2273		adapter->link_duplex = 0;
2274		if (bootverbose)
2275			device_printf(dev, "Link is Down\n");
2276		adapter->link_active = 0;
2277		/* Link down, disable watchdog */
2278		for (int i = 0; i < adapter->num_queues; i++, txr++)
2279			txr->queue_status = EM_QUEUE_IDLE;
2280		if_link_state_change(ifp, LINK_STATE_DOWN);
2281	}
2282}
2283
2284/*********************************************************************
2285 *
2286 *  This routine disables all traffic on the adapter by issuing a
2287 *  global reset on the MAC and deallocates TX/RX buffers.
2288 *
2289 *  This routine should always be called with BOTH the CORE
2290 *  and TX locks.
2291 **********************************************************************/
2292
2293static void
2294em_stop(void *arg)
2295{
2296	struct adapter	*adapter = arg;
2297	struct ifnet	*ifp = adapter->ifp;
2298	struct tx_ring	*txr = adapter->tx_rings;
2299
2300	EM_CORE_LOCK_ASSERT(adapter);
2301
2302	INIT_DEBUGOUT("em_stop: begin");
2303
2304	em_disable_intr(adapter);
2305	callout_stop(&adapter->timer);
2306
2307	/* Tell the stack that the interface is no longer active */
2308	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2309
2310        /* Unarm watchdog timer. */
2311	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2312		EM_TX_LOCK(txr);
2313		txr->queue_status = EM_QUEUE_IDLE;
2314		EM_TX_UNLOCK(txr);
2315	}
2316
2317	e1000_reset_hw(&adapter->hw);
2318	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2319
2320	e1000_led_off(&adapter->hw);
2321	e1000_cleanup_led(&adapter->hw);
2322}
2323
2324
2325/*********************************************************************
2326 *
2327 *  Determine hardware revision.
2328 *
2329 **********************************************************************/
2330static void
2331em_identify_hardware(struct adapter *adapter)
2332{
2333	device_t dev = adapter->dev;
2334
2335	/* Make sure our PCI config space has the necessary stuff set */
2336	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2337	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2338	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2339		device_printf(dev, "Memory Access and/or Bus Master bits "
2340		    "were not set!\n");
2341		adapter->hw.bus.pci_cmd_word |=
2342		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2343		pci_write_config(dev, PCIR_COMMAND,
2344		    adapter->hw.bus.pci_cmd_word, 2);
2345	}
2346
2347	/* Save off the information about this board */
2348	adapter->hw.vendor_id = pci_get_vendor(dev);
2349	adapter->hw.device_id = pci_get_device(dev);
2350	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2351	adapter->hw.subsystem_vendor_id =
2352	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2353	adapter->hw.subsystem_device_id =
2354	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2355
2356	/* Do Shared Code Init and Setup */
2357	if (e1000_set_mac_type(&adapter->hw)) {
2358		device_printf(dev, "Setup init failure\n");
2359		return;
2360	}
2361}
2362
2363static int
2364em_allocate_pci_resources(struct adapter *adapter)
2365{
2366	device_t	dev = adapter->dev;
2367	int		rid;
2368
2369	rid = PCIR_BAR(0);
2370	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2371	    &rid, RF_ACTIVE);
2372	if (adapter->memory == NULL) {
2373		device_printf(dev, "Unable to allocate bus resource: memory\n");
2374		return (ENXIO);
2375	}
2376	adapter->osdep.mem_bus_space_tag =
2377	    rman_get_bustag(adapter->memory);
2378	adapter->osdep.mem_bus_space_handle =
2379	    rman_get_bushandle(adapter->memory);
2380	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2381
2382	/* Default to a single queue */
2383	adapter->num_queues = 1;
2384
2385	/*
2386	 * Setup MSI/X or MSI if PCI Express
2387	 */
2388	adapter->msix = em_setup_msix(adapter);
2389
2390	adapter->hw.back = &adapter->osdep;
2391
2392	return (0);
2393}
2394
2395/*********************************************************************
2396 *
2397 *  Setup the Legacy or MSI Interrupt handler
2398 *
2399 **********************************************************************/
2400int
2401em_allocate_legacy(struct adapter *adapter)
2402{
2403	device_t dev = adapter->dev;
2404	int error, rid = 0;
2405
2406	/* Manually turn off all interrupts */
2407	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2408
2409	if (adapter->msix == 1) /* using MSI */
2410		rid = 1;
2411	/* We allocate a single interrupt resource */
2412	adapter->res = bus_alloc_resource_any(dev,
2413	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2414	if (adapter->res == NULL) {
2415		device_printf(dev, "Unable to allocate bus resource: "
2416		    "interrupt\n");
2417		return (ENXIO);
2418	}
2419
2420	/*
2421	 * Allocate a fast interrupt and the associated
2422	 * deferred processing contexts.
2423	 */
2424	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2425	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2426	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2427	    taskqueue_thread_enqueue, &adapter->tq);
2428	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2429	    device_get_nameunit(adapter->dev));
2430	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2431	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2432		device_printf(dev, "Failed to register fast interrupt "
2433			    "handler: %d\n", error);
2434		taskqueue_free(adapter->tq);
2435		adapter->tq = NULL;
2436		return (error);
2437	}
2438
2439	return (0);
2440}
2441
2442/*********************************************************************
2443 *
2444 *  Setup the MSIX Interrupt handlers
2445 *   This is not really Multiqueue, rather
2446 *   its just multiple interrupt vectors.
2447 *
2448 **********************************************************************/
2449int
2450em_allocate_msix(struct adapter *adapter)
2451{
2452	device_t	dev = adapter->dev;
2453	struct		tx_ring *txr = adapter->tx_rings;
2454	struct		rx_ring *rxr = adapter->rx_rings;
2455	int		error, rid, vector = 0;
2456
2457
2458	/* Make sure all interrupts are disabled */
2459	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2460
2461	/* First set up ring resources */
2462	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2463
2464		/* RX ring */
2465		rid = vector + 1;
2466
2467		rxr->res = bus_alloc_resource_any(dev,
2468		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2469		if (rxr->res == NULL) {
2470			device_printf(dev,
2471			    "Unable to allocate bus resource: "
2472			    "RX MSIX Interrupt %d\n", i);
2473			return (ENXIO);
2474		}
2475		if ((error = bus_setup_intr(dev, rxr->res,
2476		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2477		    rxr, &rxr->tag)) != 0) {
2478			device_printf(dev, "Failed to register RX handler");
2479			return (error);
2480		}
2481#if __FreeBSD_version >= 800504
2482		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2483#endif
2484		rxr->msix = vector++; /* NOTE increment vector for TX */
2485		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2486		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2487		    taskqueue_thread_enqueue, &rxr->tq);
2488		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2489		    device_get_nameunit(adapter->dev));
2490		/*
2491		** Set the bit to enable interrupt
2492		** in E1000_IMS -- bits 20 and 21
2493		** are for RX0 and RX1, note this has
2494		** NOTHING to do with the MSIX vector
2495		*/
2496		rxr->ims = 1 << (20 + i);
2497		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2498
2499		/* TX ring */
2500		rid = vector + 1;
2501		txr->res = bus_alloc_resource_any(dev,
2502		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2503		if (txr->res == NULL) {
2504			device_printf(dev,
2505			    "Unable to allocate bus resource: "
2506			    "TX MSIX Interrupt %d\n", i);
2507			return (ENXIO);
2508		}
2509		if ((error = bus_setup_intr(dev, txr->res,
2510		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2511		    txr, &txr->tag)) != 0) {
2512			device_printf(dev, "Failed to register TX handler");
2513			return (error);
2514		}
2515#if __FreeBSD_version >= 800504
2516		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2517#endif
2518		txr->msix = vector++; /* Increment vector for next pass */
2519		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2520		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2521		    taskqueue_thread_enqueue, &txr->tq);
2522		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2523		    device_get_nameunit(adapter->dev));
2524		/*
2525		** Set the bit to enable interrupt
2526		** in E1000_IMS -- bits 22 and 23
2527		** are for TX0 and TX1, note this has
2528		** NOTHING to do with the MSIX vector
2529		*/
2530		txr->ims = 1 << (22 + i);
2531		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2532	}
2533
2534	/* Link interrupt */
2535	++rid;
2536	adapter->res = bus_alloc_resource_any(dev,
2537	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2538	if (!adapter->res) {
2539		device_printf(dev,"Unable to allocate "
2540		    "bus resource: Link interrupt [%d]\n", rid);
2541		return (ENXIO);
2542        }
2543	/* Set the link handler function */
2544	error = bus_setup_intr(dev, adapter->res,
2545	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2546	    em_msix_link, adapter, &adapter->tag);
2547	if (error) {
2548		adapter->res = NULL;
2549		device_printf(dev, "Failed to register LINK handler");
2550		return (error);
2551	}
2552#if __FreeBSD_version >= 800504
2553		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2554#endif
2555	adapter->linkvec = vector;
2556	adapter->ivars |=  (8 | vector) << 16;
2557	adapter->ivars |= 0x80000000;
2558
2559	return (0);
2560}
2561
2562
2563static void
2564em_free_pci_resources(struct adapter *adapter)
2565{
2566	device_t	dev = adapter->dev;
2567	struct tx_ring	*txr;
2568	struct rx_ring	*rxr;
2569	int		rid;
2570
2571
2572	/*
2573	** Release all the queue interrupt resources:
2574	*/
2575	for (int i = 0; i < adapter->num_queues; i++) {
2576		txr = &adapter->tx_rings[i];
2577		rxr = &adapter->rx_rings[i];
2578		/* an early abort? */
2579		if ((txr == NULL) || (rxr == NULL))
2580			break;
2581		rid = txr->msix +1;
2582		if (txr->tag != NULL) {
2583			bus_teardown_intr(dev, txr->res, txr->tag);
2584			txr->tag = NULL;
2585		}
2586		if (txr->res != NULL)
2587			bus_release_resource(dev, SYS_RES_IRQ,
2588			    rid, txr->res);
2589		rid = rxr->msix +1;
2590		if (rxr->tag != NULL) {
2591			bus_teardown_intr(dev, rxr->res, rxr->tag);
2592			rxr->tag = NULL;
2593		}
2594		if (rxr->res != NULL)
2595			bus_release_resource(dev, SYS_RES_IRQ,
2596			    rid, rxr->res);
2597	}
2598
2599        if (adapter->linkvec) /* we are doing MSIX */
2600                rid = adapter->linkvec + 1;
2601        else
2602                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2603
2604	if (adapter->tag != NULL) {
2605		bus_teardown_intr(dev, adapter->res, adapter->tag);
2606		adapter->tag = NULL;
2607	}
2608
2609	if (adapter->res != NULL)
2610		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2611
2612
2613	if (adapter->msix)
2614		pci_release_msi(dev);
2615
2616	if (adapter->msix_mem != NULL)
2617		bus_release_resource(dev, SYS_RES_MEMORY,
2618		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2619
2620	if (adapter->memory != NULL)
2621		bus_release_resource(dev, SYS_RES_MEMORY,
2622		    PCIR_BAR(0), adapter->memory);
2623
2624	if (adapter->flash != NULL)
2625		bus_release_resource(dev, SYS_RES_MEMORY,
2626		    EM_FLASH, adapter->flash);
2627}
2628
2629/*
2630 * Setup MSI or MSI/X
2631 */
2632static int
2633em_setup_msix(struct adapter *adapter)
2634{
2635	device_t dev = adapter->dev;
2636	int val = 0;
2637
2638
2639	/*
2640	** Setup MSI/X for Hartwell: tests have shown
2641	** use of two queues to be unstable, and to
2642	** provide no great gain anyway, so we simply
2643	** seperate the interrupts and use a single queue.
2644	*/
2645	if ((adapter->hw.mac.type == e1000_82574) &&
2646	    (em_enable_msix == TRUE)) {
2647		/* Map the MSIX BAR */
2648		int rid = PCIR_BAR(EM_MSIX_BAR);
2649		adapter->msix_mem = bus_alloc_resource_any(dev,
2650		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2651       		if (!adapter->msix_mem) {
2652			/* May not be enabled */
2653               		device_printf(adapter->dev,
2654			    "Unable to map MSIX table \n");
2655			goto msi;
2656       		}
2657		val = pci_msix_count(dev);
2658		if (val < 3) {
2659			bus_release_resource(dev, SYS_RES_MEMORY,
2660			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2661			adapter->msix_mem = NULL;
2662               		device_printf(adapter->dev,
2663			    "MSIX: insufficient vectors, using MSI\n");
2664			goto msi;
2665		}
2666		val = 3;
2667		adapter->num_queues = 1;
2668		if (pci_alloc_msix(dev, &val) == 0) {
2669			device_printf(adapter->dev,
2670			    "Using MSIX interrupts "
2671			    "with %d vectors\n", val);
2672		}
2673
2674		return (val);
2675	}
2676msi:
2677       	val = pci_msi_count(dev);
2678       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2679               	adapter->msix = 1;
2680               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2681		return (val);
2682	}
2683	/* Should only happen due to manual configuration */
2684	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2685	return (0);
2686}
2687
2688
2689/*********************************************************************
2690 *
2691 *  Initialize the hardware to a configuration
2692 *  as specified by the adapter structure.
2693 *
2694 **********************************************************************/
2695static void
2696em_reset(struct adapter *adapter)
2697{
2698	device_t	dev = adapter->dev;
2699	struct ifnet	*ifp = adapter->ifp;
2700	struct e1000_hw	*hw = &adapter->hw;
2701	u16		rx_buffer_size;
2702
2703	INIT_DEBUGOUT("em_reset: begin");
2704
2705	/* Set up smart power down as default off on newer adapters. */
2706	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2707	    hw->mac.type == e1000_82572)) {
2708		u16 phy_tmp = 0;
2709
2710		/* Speed up time to link by disabling smart power down. */
2711		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2712		phy_tmp &= ~IGP02E1000_PM_SPD;
2713		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2714	}
2715
2716	/*
2717	 * These parameters control the automatic generation (Tx) and
2718	 * response (Rx) to Ethernet PAUSE frames.
2719	 * - High water mark should allow for at least two frames to be
2720	 *   received after sending an XOFF.
2721	 * - Low water mark works best when it is very near the high water mark.
2722	 *   This allows the receiver to restart by sending XON when it has
2723	 *   drained a bit. Here we use an arbitary value of 1500 which will
2724	 *   restart after one full frame is pulled from the buffer. There
2725	 *   could be several smaller frames in the buffer and if so they will
2726	 *   not trigger the XON until their total number reduces the buffer
2727	 *   by 1500.
2728	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2729	 */
2730	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2731
2732	hw->fc.high_water = rx_buffer_size -
2733	    roundup2(adapter->max_frame_size, 1024);
2734	hw->fc.low_water = hw->fc.high_water - 1500;
2735
2736	if (hw->mac.type == e1000_80003es2lan)
2737		hw->fc.pause_time = 0xFFFF;
2738	else
2739		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2740
2741	hw->fc.send_xon = TRUE;
2742
2743        /* Set Flow control, use the tunable location if sane */
2744	hw->fc.requested_mode = adapter->fc_setting;
2745
2746	/* Workaround: no TX flow ctrl for PCH */
2747	if (hw->mac.type == e1000_pchlan)
2748                hw->fc.requested_mode = e1000_fc_rx_pause;
2749
2750	/* Override - settings for PCH2LAN, ya its magic :) */
2751	if (hw->mac.type == e1000_pch2lan) {
2752		hw->fc.high_water = 0x5C20;
2753		hw->fc.low_water = 0x5048;
2754		hw->fc.pause_time = 0x0650;
2755		hw->fc.refresh_time = 0x0400;
2756		/* Jumbos need adjusted PBA */
2757		if (ifp->if_mtu > ETHERMTU)
2758			E1000_WRITE_REG(hw, E1000_PBA, 12);
2759		else
2760			E1000_WRITE_REG(hw, E1000_PBA, 26);
2761	}
2762
2763	/* Issue a global reset */
2764	e1000_reset_hw(hw);
2765	E1000_WRITE_REG(hw, E1000_WUC, 0);
2766	em_disable_aspm(adapter);
2767
2768	if (e1000_init_hw(hw) < 0) {
2769		device_printf(dev, "Hardware Initialization Failed\n");
2770		return;
2771	}
2772
2773	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2774	e1000_get_phy_info(hw);
2775	e1000_check_for_link(hw);
2776	return;
2777}
2778
2779/*********************************************************************
2780 *
2781 *  Setup networking device structure and register an interface.
2782 *
2783 **********************************************************************/
2784static int
2785em_setup_interface(device_t dev, struct adapter *adapter)
2786{
2787	struct ifnet   *ifp;
2788
2789	INIT_DEBUGOUT("em_setup_interface: begin");
2790
2791	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2792	if (ifp == NULL) {
2793		device_printf(dev, "can not allocate ifnet structure\n");
2794		return (-1);
2795	}
2796	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2797	ifp->if_mtu = ETHERMTU;
2798	ifp->if_init =  em_init;
2799	ifp->if_softc = adapter;
2800	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2801	ifp->if_ioctl = em_ioctl;
2802	ifp->if_start = em_start;
2803	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2804	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2805	IFQ_SET_READY(&ifp->if_snd);
2806
2807	ether_ifattach(ifp, adapter->hw.mac.addr);
2808
2809	ifp->if_capabilities = ifp->if_capenable = 0;
2810
2811#ifdef EM_MULTIQUEUE
2812	/* Multiqueue tx functions */
2813	ifp->if_transmit = em_mq_start;
2814	ifp->if_qflush = em_qflush;
2815#endif
2816
2817	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2818	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2819
2820	/* Enable TSO by default, can disable with ifconfig */
2821	ifp->if_capabilities |= IFCAP_TSO4;
2822	ifp->if_capenable |= IFCAP_TSO4;
2823
2824	/*
2825	 * Tell the upper layer(s) we
2826	 * support full VLAN capability
2827	 */
2828	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2829	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2830	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2831
2832	/*
2833	** Dont turn this on by default, if vlans are
2834	** created on another pseudo device (eg. lagg)
2835	** then vlan events are not passed thru, breaking
2836	** operation, but with HW FILTER off it works. If
2837	** using vlans directly on the em driver you can
2838	** enable this and get full hardware tag filtering.
2839	*/
2840	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2841
2842#ifdef DEVICE_POLLING
2843	ifp->if_capabilities |= IFCAP_POLLING;
2844#endif
2845
2846	/* Enable only WOL MAGIC by default */
2847	if (adapter->wol) {
2848		ifp->if_capabilities |= IFCAP_WOL;
2849		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2850	}
2851
2852	/*
2853	 * Specify the media types supported by this adapter and register
2854	 * callbacks to update media and link information
2855	 */
2856	ifmedia_init(&adapter->media, IFM_IMASK,
2857	    em_media_change, em_media_status);
2858	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2859	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2860		u_char fiber_type = IFM_1000_SX;	/* default type */
2861
2862		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2863			    0, NULL);
2864		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2865	} else {
2866		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2867		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2868			    0, NULL);
2869		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2870			    0, NULL);
2871		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2872			    0, NULL);
2873		if (adapter->hw.phy.type != e1000_phy_ife) {
2874			ifmedia_add(&adapter->media,
2875				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2876			ifmedia_add(&adapter->media,
2877				IFM_ETHER | IFM_1000_T, 0, NULL);
2878		}
2879	}
2880	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2881	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2882	return (0);
2883}
2884
2885
2886/*
2887 * Manage DMA'able memory.
2888 */
2889static void
2890em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2891{
2892	if (error)
2893		return;
2894	*(bus_addr_t *) arg = segs[0].ds_addr;
2895}
2896
2897static int
2898em_dma_malloc(struct adapter *adapter, bus_size_t size,
2899        struct em_dma_alloc *dma, int mapflags)
2900{
2901	int error;
2902
2903	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2904				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2905				BUS_SPACE_MAXADDR,	/* lowaddr */
2906				BUS_SPACE_MAXADDR,	/* highaddr */
2907				NULL, NULL,		/* filter, filterarg */
2908				size,			/* maxsize */
2909				1,			/* nsegments */
2910				size,			/* maxsegsize */
2911				0,			/* flags */
2912				NULL,			/* lockfunc */
2913				NULL,			/* lockarg */
2914				&dma->dma_tag);
2915	if (error) {
2916		device_printf(adapter->dev,
2917		    "%s: bus_dma_tag_create failed: %d\n",
2918		    __func__, error);
2919		goto fail_0;
2920	}
2921
2922	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2923	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2924	if (error) {
2925		device_printf(adapter->dev,
2926		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2927		    __func__, (uintmax_t)size, error);
2928		goto fail_2;
2929	}
2930
2931	dma->dma_paddr = 0;
2932	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2933	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2934	if (error || dma->dma_paddr == 0) {
2935		device_printf(adapter->dev,
2936		    "%s: bus_dmamap_load failed: %d\n",
2937		    __func__, error);
2938		goto fail_3;
2939	}
2940
2941	return (0);
2942
2943fail_3:
2944	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2945fail_2:
2946	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2947	bus_dma_tag_destroy(dma->dma_tag);
2948fail_0:
2949	dma->dma_map = NULL;
2950	dma->dma_tag = NULL;
2951
2952	return (error);
2953}
2954
2955static void
2956em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2957{
2958	if (dma->dma_tag == NULL)
2959		return;
2960	if (dma->dma_map != NULL) {
2961		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2962		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2963		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2964		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2965		dma->dma_map = NULL;
2966	}
2967	bus_dma_tag_destroy(dma->dma_tag);
2968	dma->dma_tag = NULL;
2969}
2970
2971
2972/*********************************************************************
2973 *
2974 *  Allocate memory for the transmit and receive rings, and then
2975 *  the descriptors associated with each, called only once at attach.
2976 *
2977 **********************************************************************/
2978static int
2979em_allocate_queues(struct adapter *adapter)
2980{
2981	device_t		dev = adapter->dev;
2982	struct tx_ring		*txr = NULL;
2983	struct rx_ring		*rxr = NULL;
2984	int rsize, tsize, error = E1000_SUCCESS;
2985	int txconf = 0, rxconf = 0;
2986
2987
2988	/* Allocate the TX ring struct memory */
2989	if (!(adapter->tx_rings =
2990	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2991	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2992		device_printf(dev, "Unable to allocate TX ring memory\n");
2993		error = ENOMEM;
2994		goto fail;
2995	}
2996
2997	/* Now allocate the RX */
2998	if (!(adapter->rx_rings =
2999	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3000	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3001		device_printf(dev, "Unable to allocate RX ring memory\n");
3002		error = ENOMEM;
3003		goto rx_fail;
3004	}
3005
3006	tsize = roundup2(adapter->num_tx_desc *
3007	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3008	/*
3009	 * Now set up the TX queues, txconf is needed to handle the
3010	 * possibility that things fail midcourse and we need to
3011	 * undo memory gracefully
3012	 */
3013	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3014		/* Set up some basics */
3015		txr = &adapter->tx_rings[i];
3016		txr->adapter = adapter;
3017		txr->me = i;
3018
3019		/* Initialize the TX lock */
3020		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3021		    device_get_nameunit(dev), txr->me);
3022		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3023
3024		if (em_dma_malloc(adapter, tsize,
3025			&txr->txdma, BUS_DMA_NOWAIT)) {
3026			device_printf(dev,
3027			    "Unable to allocate TX Descriptor memory\n");
3028			error = ENOMEM;
3029			goto err_tx_desc;
3030		}
3031		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3032		bzero((void *)txr->tx_base, tsize);
3033
3034        	if (em_allocate_transmit_buffers(txr)) {
3035			device_printf(dev,
3036			    "Critical Failure setting up transmit buffers\n");
3037			error = ENOMEM;
3038			goto err_tx_desc;
3039        	}
3040#if __FreeBSD_version >= 800000
3041		/* Allocate a buf ring */
3042		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3043		    M_WAITOK, &txr->tx_mtx);
3044#endif
3045	}
3046
3047	/*
3048	 * Next the RX queues...
3049	 */
3050	rsize = roundup2(adapter->num_rx_desc *
3051	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3052	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3053		rxr = &adapter->rx_rings[i];
3054		rxr->adapter = adapter;
3055		rxr->me = i;
3056
3057		/* Initialize the RX lock */
3058		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3059		    device_get_nameunit(dev), txr->me);
3060		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3061
3062		if (em_dma_malloc(adapter, rsize,
3063			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3064			device_printf(dev,
3065			    "Unable to allocate RxDescriptor memory\n");
3066			error = ENOMEM;
3067			goto err_rx_desc;
3068		}
3069		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3070		bzero((void *)rxr->rx_base, rsize);
3071
3072        	/* Allocate receive buffers for the ring*/
3073		if (em_allocate_receive_buffers(rxr)) {
3074			device_printf(dev,
3075			    "Critical Failure setting up receive buffers\n");
3076			error = ENOMEM;
3077			goto err_rx_desc;
3078		}
3079	}
3080
3081	return (0);
3082
3083err_rx_desc:
3084	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3085		em_dma_free(adapter, &rxr->rxdma);
3086err_tx_desc:
3087	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3088		em_dma_free(adapter, &txr->txdma);
3089	free(adapter->rx_rings, M_DEVBUF);
3090rx_fail:
3091#if __FreeBSD_version >= 800000
3092	buf_ring_free(txr->br, M_DEVBUF);
3093#endif
3094	free(adapter->tx_rings, M_DEVBUF);
3095fail:
3096	return (error);
3097}
3098
3099
3100/*********************************************************************
3101 *
3102 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3103 *  the information needed to transmit a packet on the wire. This is
3104 *  called only once at attach, setup is done every reset.
3105 *
3106 **********************************************************************/
3107static int
3108em_allocate_transmit_buffers(struct tx_ring *txr)
3109{
3110	struct adapter *adapter = txr->adapter;
3111	device_t dev = adapter->dev;
3112	struct em_buffer *txbuf;
3113	int error, i;
3114
3115	/*
3116	 * Setup DMA descriptor areas.
3117	 */
3118	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3119			       1, 0,			/* alignment, bounds */
3120			       BUS_SPACE_MAXADDR,	/* lowaddr */
3121			       BUS_SPACE_MAXADDR,	/* highaddr */
3122			       NULL, NULL,		/* filter, filterarg */
3123			       EM_TSO_SIZE,		/* maxsize */
3124			       EM_MAX_SCATTER,		/* nsegments */
3125			       PAGE_SIZE,		/* maxsegsize */
3126			       0,			/* flags */
3127			       NULL,			/* lockfunc */
3128			       NULL,			/* lockfuncarg */
3129			       &txr->txtag))) {
3130		device_printf(dev,"Unable to allocate TX DMA tag\n");
3131		goto fail;
3132	}
3133
3134	if (!(txr->tx_buffers =
3135	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3136	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3137		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3138		error = ENOMEM;
3139		goto fail;
3140	}
3141
3142        /* Create the descriptor buffer dma maps */
3143	txbuf = txr->tx_buffers;
3144	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3145		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3146		if (error != 0) {
3147			device_printf(dev, "Unable to create TX DMA map\n");
3148			goto fail;
3149		}
3150	}
3151
3152	return 0;
3153fail:
3154	/* We free all, it handles case where we are in the middle */
3155	em_free_transmit_structures(adapter);
3156	return (error);
3157}
3158
3159/*********************************************************************
3160 *
3161 *  Initialize a transmit ring.
3162 *
3163 **********************************************************************/
3164static void
3165em_setup_transmit_ring(struct tx_ring *txr)
3166{
3167	struct adapter *adapter = txr->adapter;
3168	struct em_buffer *txbuf;
3169	int i;
3170
3171	/* Clear the old descriptor contents */
3172	EM_TX_LOCK(txr);
3173	bzero((void *)txr->tx_base,
3174	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3175	/* Reset indices */
3176	txr->next_avail_desc = 0;
3177	txr->next_to_clean = 0;
3178
3179	/* Free any existing tx buffers. */
3180        txbuf = txr->tx_buffers;
3181	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3182		if (txbuf->m_head != NULL) {
3183			bus_dmamap_sync(txr->txtag, txbuf->map,
3184			    BUS_DMASYNC_POSTWRITE);
3185			bus_dmamap_unload(txr->txtag, txbuf->map);
3186			m_freem(txbuf->m_head);
3187			txbuf->m_head = NULL;
3188		}
3189		/* clear the watch index */
3190		txbuf->next_eop = -1;
3191        }
3192
3193	/* Set number of descriptors available */
3194	txr->tx_avail = adapter->num_tx_desc;
3195	txr->queue_status = EM_QUEUE_IDLE;
3196
3197	/* Clear checksum offload context. */
3198	txr->last_hw_offload = 0;
3199	txr->last_hw_ipcss = 0;
3200	txr->last_hw_ipcso = 0;
3201	txr->last_hw_tucss = 0;
3202	txr->last_hw_tucso = 0;
3203
3204	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3205	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3206	EM_TX_UNLOCK(txr);
3207}
3208
3209/*********************************************************************
3210 *
3211 *  Initialize all transmit rings.
3212 *
3213 **********************************************************************/
3214static void
3215em_setup_transmit_structures(struct adapter *adapter)
3216{
3217	struct tx_ring *txr = adapter->tx_rings;
3218
3219	for (int i = 0; i < adapter->num_queues; i++, txr++)
3220		em_setup_transmit_ring(txr);
3221
3222	return;
3223}
3224
3225/*********************************************************************
3226 *
3227 *  Enable transmit unit.
3228 *
3229 **********************************************************************/
3230static void
3231em_initialize_transmit_unit(struct adapter *adapter)
3232{
3233	struct tx_ring	*txr = adapter->tx_rings;
3234	struct e1000_hw	*hw = &adapter->hw;
3235	u32	tctl, tarc, tipg = 0;
3236
3237	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3238
3239	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3240		u64 bus_addr = txr->txdma.dma_paddr;
3241		/* Base and Len of TX Ring */
3242		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3243	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3244		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3245	    	    (u32)(bus_addr >> 32));
3246		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3247	    	    (u32)bus_addr);
3248		/* Init the HEAD/TAIL indices */
3249		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3250		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3251
3252		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3253		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3254		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3255
3256		txr->queue_status = EM_QUEUE_IDLE;
3257	}
3258
3259	/* Set the default values for the Tx Inter Packet Gap timer */
3260	switch (adapter->hw.mac.type) {
3261	case e1000_82542:
3262		tipg = DEFAULT_82542_TIPG_IPGT;
3263		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3264		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3265		break;
3266	case e1000_80003es2lan:
3267		tipg = DEFAULT_82543_TIPG_IPGR1;
3268		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3269		    E1000_TIPG_IPGR2_SHIFT;
3270		break;
3271	default:
3272		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3273		    (adapter->hw.phy.media_type ==
3274		    e1000_media_type_internal_serdes))
3275			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3276		else
3277			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3278		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3279		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3280	}
3281
3282	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3283	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3284
3285	if(adapter->hw.mac.type >= e1000_82540)
3286		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3287		    adapter->tx_abs_int_delay.value);
3288
3289	if ((adapter->hw.mac.type == e1000_82571) ||
3290	    (adapter->hw.mac.type == e1000_82572)) {
3291		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3292		tarc |= SPEED_MODE_BIT;
3293		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3294	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3295		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3296		tarc |= 1;
3297		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3298		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3299		tarc |= 1;
3300		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3301	}
3302
3303	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3304	if (adapter->tx_int_delay.value > 0)
3305		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3306
3307	/* Program the Transmit Control Register */
3308	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3309	tctl &= ~E1000_TCTL_CT;
3310	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3311		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3312
3313	if (adapter->hw.mac.type >= e1000_82571)
3314		tctl |= E1000_TCTL_MULR;
3315
3316	/* This write will effectively turn on the transmit unit. */
3317	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3318
3319}
3320
3321
3322/*********************************************************************
3323 *
3324 *  Free all transmit rings.
3325 *
3326 **********************************************************************/
3327static void
3328em_free_transmit_structures(struct adapter *adapter)
3329{
3330	struct tx_ring *txr = adapter->tx_rings;
3331
3332	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3333		EM_TX_LOCK(txr);
3334		em_free_transmit_buffers(txr);
3335		em_dma_free(adapter, &txr->txdma);
3336		EM_TX_UNLOCK(txr);
3337		EM_TX_LOCK_DESTROY(txr);
3338	}
3339
3340	free(adapter->tx_rings, M_DEVBUF);
3341}
3342
3343/*********************************************************************
3344 *
3345 *  Free transmit ring related data structures.
3346 *
3347 **********************************************************************/
3348static void
3349em_free_transmit_buffers(struct tx_ring *txr)
3350{
3351	struct adapter		*adapter = txr->adapter;
3352	struct em_buffer	*txbuf;
3353
3354	INIT_DEBUGOUT("free_transmit_ring: begin");
3355
3356	if (txr->tx_buffers == NULL)
3357		return;
3358
3359	for (int i = 0; i < adapter->num_tx_desc; i++) {
3360		txbuf = &txr->tx_buffers[i];
3361		if (txbuf->m_head != NULL) {
3362			bus_dmamap_sync(txr->txtag, txbuf->map,
3363			    BUS_DMASYNC_POSTWRITE);
3364			bus_dmamap_unload(txr->txtag,
3365			    txbuf->map);
3366			m_freem(txbuf->m_head);
3367			txbuf->m_head = NULL;
3368			if (txbuf->map != NULL) {
3369				bus_dmamap_destroy(txr->txtag,
3370				    txbuf->map);
3371				txbuf->map = NULL;
3372			}
3373		} else if (txbuf->map != NULL) {
3374			bus_dmamap_unload(txr->txtag,
3375			    txbuf->map);
3376			bus_dmamap_destroy(txr->txtag,
3377			    txbuf->map);
3378			txbuf->map = NULL;
3379		}
3380	}
3381#if __FreeBSD_version >= 800000
3382	if (txr->br != NULL)
3383		buf_ring_free(txr->br, M_DEVBUF);
3384#endif
3385	if (txr->tx_buffers != NULL) {
3386		free(txr->tx_buffers, M_DEVBUF);
3387		txr->tx_buffers = NULL;
3388	}
3389	if (txr->txtag != NULL) {
3390		bus_dma_tag_destroy(txr->txtag);
3391		txr->txtag = NULL;
3392	}
3393	return;
3394}
3395
3396
3397/*********************************************************************
3398 *  The offload context is protocol specific (TCP/UDP) and thus
3399 *  only needs to be set when the protocol changes. The occasion
3400 *  of a context change can be a performance detriment, and
3401 *  might be better just disabled. The reason arises in the way
3402 *  in which the controller supports pipelined requests from the
3403 *  Tx data DMA. Up to four requests can be pipelined, and they may
3404 *  belong to the same packet or to multiple packets. However all
3405 *  requests for one packet are issued before a request is issued
3406 *  for a subsequent packet and if a request for the next packet
3407 *  requires a context change, that request will be stalled
3408 *  until the previous request completes. This means setting up
3409 *  a new context effectively disables pipelined Tx data DMA which
3410 *  in turn greatly slow down performance to send small sized
3411 *  frames.
3412 **********************************************************************/
3413static void
3414em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3415    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3416{
3417	struct adapter			*adapter = txr->adapter;
3418	struct e1000_context_desc	*TXD = NULL;
3419	struct em_buffer		*tx_buffer;
3420	int				cur, hdr_len;
3421	u32				cmd = 0;
3422	u16				offload = 0;
3423	u8				ipcso, ipcss, tucso, tucss;
3424
3425	ipcss = ipcso = tucss = tucso = 0;
3426	hdr_len = ip_off + (ip->ip_hl << 2);
3427	cur = txr->next_avail_desc;
3428
3429	/* Setup of IP header checksum. */
3430	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3431		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3432		offload |= CSUM_IP;
3433		ipcss = ip_off;
3434		ipcso = ip_off + offsetof(struct ip, ip_sum);
3435		/*
3436		 * Start offset for header checksum calculation.
3437		 * End offset for header checksum calculation.
3438		 * Offset of place to put the checksum.
3439		 */
3440		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3441		TXD->lower_setup.ip_fields.ipcss = ipcss;
3442		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3443		TXD->lower_setup.ip_fields.ipcso = ipcso;
3444		cmd |= E1000_TXD_CMD_IP;
3445	}
3446
3447	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3448 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3449 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3450 		offload |= CSUM_TCP;
3451 		tucss = hdr_len;
3452 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3453 		/*
3454 		 * Setting up new checksum offload context for every frames
3455 		 * takes a lot of processing time for hardware. This also
3456 		 * reduces performance a lot for small sized frames so avoid
3457 		 * it if driver can use previously configured checksum
3458 		 * offload context.
3459 		 */
3460 		if (txr->last_hw_offload == offload) {
3461 			if (offload & CSUM_IP) {
3462 				if (txr->last_hw_ipcss == ipcss &&
3463 				    txr->last_hw_ipcso == ipcso &&
3464 				    txr->last_hw_tucss == tucss &&
3465 				    txr->last_hw_tucso == tucso)
3466 					return;
3467 			} else {
3468 				if (txr->last_hw_tucss == tucss &&
3469 				    txr->last_hw_tucso == tucso)
3470 					return;
3471 			}
3472  		}
3473 		txr->last_hw_offload = offload;
3474 		txr->last_hw_tucss = tucss;
3475 		txr->last_hw_tucso = tucso;
3476 		/*
3477 		 * Start offset for payload checksum calculation.
3478 		 * End offset for payload checksum calculation.
3479 		 * Offset of place to put the checksum.
3480 		 */
3481		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3482 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3483 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3484 		TXD->upper_setup.tcp_fields.tucso = tucso;
3485 		cmd |= E1000_TXD_CMD_TCP;
3486 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3487 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3488 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3489 		tucss = hdr_len;
3490 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3491 		/*
3492 		 * Setting up new checksum offload context for every frames
3493 		 * takes a lot of processing time for hardware. This also
3494 		 * reduces performance a lot for small sized frames so avoid
3495 		 * it if driver can use previously configured checksum
3496 		 * offload context.
3497 		 */
3498 		if (txr->last_hw_offload == offload) {
3499 			if (offload & CSUM_IP) {
3500 				if (txr->last_hw_ipcss == ipcss &&
3501 				    txr->last_hw_ipcso == ipcso &&
3502 				    txr->last_hw_tucss == tucss &&
3503 				    txr->last_hw_tucso == tucso)
3504 					return;
3505 			} else {
3506 				if (txr->last_hw_tucss == tucss &&
3507 				    txr->last_hw_tucso == tucso)
3508 					return;
3509 			}
3510 		}
3511 		txr->last_hw_offload = offload;
3512 		txr->last_hw_tucss = tucss;
3513 		txr->last_hw_tucso = tucso;
3514 		/*
3515 		 * Start offset for header checksum calculation.
3516 		 * End offset for header checksum calculation.
3517 		 * Offset of place to put the checksum.
3518 		 */
3519		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3520 		TXD->upper_setup.tcp_fields.tucss = tucss;
3521 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3522 		TXD->upper_setup.tcp_fields.tucso = tucso;
3523  	}
3524
3525 	if (offload & CSUM_IP) {
3526 		txr->last_hw_ipcss = ipcss;
3527 		txr->last_hw_ipcso = ipcso;
3528  	}
3529
3530	TXD->tcp_seg_setup.data = htole32(0);
3531	TXD->cmd_and_length =
3532	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3533	tx_buffer = &txr->tx_buffers[cur];
3534	tx_buffer->m_head = NULL;
3535	tx_buffer->next_eop = -1;
3536
3537	if (++cur == adapter->num_tx_desc)
3538		cur = 0;
3539
3540	txr->tx_avail--;
3541	txr->next_avail_desc = cur;
3542}
3543
3544
3545/**********************************************************************
3546 *
3547 *  Setup work for hardware segmentation offload (TSO)
3548 *
3549 **********************************************************************/
3550static void
3551em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3552    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3553{
3554	struct adapter			*adapter = txr->adapter;
3555	struct e1000_context_desc	*TXD;
3556	struct em_buffer		*tx_buffer;
3557	int cur, hdr_len;
3558
3559	/*
3560	 * In theory we can use the same TSO context if and only if
3561	 * frame is the same type(IP/TCP) and the same MSS. However
3562	 * checking whether a frame has the same IP/TCP structure is
3563	 * hard thing so just ignore that and always restablish a
3564	 * new TSO context.
3565	 */
3566	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3567	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3568		      E1000_TXD_DTYP_D |	/* Data descr type */
3569		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3570
3571	/* IP and/or TCP header checksum calculation and insertion. */
3572	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3573
3574	cur = txr->next_avail_desc;
3575	tx_buffer = &txr->tx_buffers[cur];
3576	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3577
3578	/*
3579	 * Start offset for header checksum calculation.
3580	 * End offset for header checksum calculation.
3581	 * Offset of place put the checksum.
3582	 */
3583	TXD->lower_setup.ip_fields.ipcss = ip_off;
3584	TXD->lower_setup.ip_fields.ipcse =
3585	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3586	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3587	/*
3588	 * Start offset for payload checksum calculation.
3589	 * End offset for payload checksum calculation.
3590	 * Offset of place to put the checksum.
3591	 */
3592	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3593	TXD->upper_setup.tcp_fields.tucse = 0;
3594	TXD->upper_setup.tcp_fields.tucso =
3595	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3596	/*
3597	 * Payload size per packet w/o any headers.
3598	 * Length of all headers up to payload.
3599	 */
3600	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3601	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3602
3603	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3604				E1000_TXD_CMD_DEXT |	/* Extended descr */
3605				E1000_TXD_CMD_TSE |	/* TSE context */
3606				E1000_TXD_CMD_IP |	/* Do IP csum */
3607				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3608				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3609
3610	tx_buffer->m_head = NULL;
3611	tx_buffer->next_eop = -1;
3612
3613	if (++cur == adapter->num_tx_desc)
3614		cur = 0;
3615
3616	txr->tx_avail--;
3617	txr->next_avail_desc = cur;
3618	txr->tx_tso = TRUE;
3619}
3620
3621
3622/**********************************************************************
3623 *
3624 *  Examine each tx_buffer in the used queue. If the hardware is done
3625 *  processing the packet then free associated resources. The
3626 *  tx_buffer is put back on the free queue.
3627 *
3628 **********************************************************************/
3629static bool
3630em_txeof(struct tx_ring *txr)
3631{
3632	struct adapter	*adapter = txr->adapter;
3633        int first, last, done, processed;
3634        struct em_buffer *tx_buffer;
3635        struct e1000_tx_desc   *tx_desc, *eop_desc;
3636	struct ifnet   *ifp = adapter->ifp;
3637
3638	EM_TX_LOCK_ASSERT(txr);
3639
3640	/* No work, make sure watchdog is off */
3641        if (txr->tx_avail == adapter->num_tx_desc) {
3642		txr->queue_status = EM_QUEUE_IDLE;
3643                return (FALSE);
3644	}
3645
3646	processed = 0;
3647        first = txr->next_to_clean;
3648        tx_desc = &txr->tx_base[first];
3649        tx_buffer = &txr->tx_buffers[first];
3650	last = tx_buffer->next_eop;
3651        eop_desc = &txr->tx_base[last];
3652
3653	/*
3654	 * What this does is get the index of the
3655	 * first descriptor AFTER the EOP of the
3656	 * first packet, that way we can do the
3657	 * simple comparison on the inner while loop.
3658	 */
3659	if (++last == adapter->num_tx_desc)
3660 		last = 0;
3661	done = last;
3662
3663        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3664            BUS_DMASYNC_POSTREAD);
3665
3666        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3667		/* We clean the range of the packet */
3668		while (first != done) {
3669                	tx_desc->upper.data = 0;
3670                	tx_desc->lower.data = 0;
3671                	tx_desc->buffer_addr = 0;
3672                	++txr->tx_avail;
3673			++processed;
3674
3675			if (tx_buffer->m_head) {
3676				bus_dmamap_sync(txr->txtag,
3677				    tx_buffer->map,
3678				    BUS_DMASYNC_POSTWRITE);
3679				bus_dmamap_unload(txr->txtag,
3680				    tx_buffer->map);
3681                        	m_freem(tx_buffer->m_head);
3682                        	tx_buffer->m_head = NULL;
3683                	}
3684			tx_buffer->next_eop = -1;
3685			txr->watchdog_time = ticks;
3686
3687	                if (++first == adapter->num_tx_desc)
3688				first = 0;
3689
3690	                tx_buffer = &txr->tx_buffers[first];
3691			tx_desc = &txr->tx_base[first];
3692		}
3693		++ifp->if_opackets;
3694		/* See if we can continue to the next packet */
3695		last = tx_buffer->next_eop;
3696		if (last != -1) {
3697        		eop_desc = &txr->tx_base[last];
3698			/* Get new done point */
3699			if (++last == adapter->num_tx_desc) last = 0;
3700			done = last;
3701		} else
3702			break;
3703        }
3704        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3705            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3706
3707        txr->next_to_clean = first;
3708
3709	/*
3710	** Watchdog calculation, we know there's
3711	** work outstanding or the first return
3712	** would have been taken, so none processed
3713	** for too long indicates a hang. local timer
3714	** will examine this and do a reset if needed.
3715	*/
3716	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3717		txr->queue_status = EM_QUEUE_HUNG;
3718
3719        /*
3720         * If we have enough room, clear IFF_DRV_OACTIVE
3721         * to tell the stack that it is OK to send packets.
3722         */
3723        if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3724                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3725		/* Disable watchdog if all clean */
3726                if (txr->tx_avail == adapter->num_tx_desc) {
3727			txr->queue_status = EM_QUEUE_IDLE;
3728			return (FALSE);
3729		}
3730        }
3731
3732	return (TRUE);
3733}
3734
3735
3736/*********************************************************************
3737 *
3738 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3739 *
3740 **********************************************************************/
3741static void
3742em_refresh_mbufs(struct rx_ring *rxr, int limit)
3743{
3744	struct adapter		*adapter = rxr->adapter;
3745	struct mbuf		*m;
3746	bus_dma_segment_t	segs[1];
3747	struct em_buffer	*rxbuf;
3748	int			i, error, nsegs, cleaned;
3749
3750	i = rxr->next_to_refresh;
3751	cleaned = -1;
3752	while (i != limit) {
3753		rxbuf = &rxr->rx_buffers[i];
3754		/*
3755		** Just skip entries with a buffer,
3756		** they can only be due to an error
3757		** and are to be reused.
3758		*/
3759		if (rxbuf->m_head != NULL)
3760			goto reuse;
3761		m = m_getjcl(M_DONTWAIT, MT_DATA,
3762		    M_PKTHDR, adapter->rx_mbuf_sz);
3763		/*
3764		** If we have a temporary resource shortage
3765		** that causes a failure, just abort refresh
3766		** for now, we will return to this point when
3767		** reinvoked from em_rxeof.
3768		*/
3769		if (m == NULL)
3770			goto update;
3771		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3772
3773		/* Use bus_dma machinery to setup the memory mapping  */
3774		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3775		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3776		if (error != 0) {
3777			m_free(m);
3778			goto update;
3779		}
3780
3781		/* If nsegs is wrong then the stack is corrupt. */
3782		KASSERT(nsegs == 1, ("Too many segments returned!"));
3783
3784		bus_dmamap_sync(rxr->rxtag,
3785		    rxbuf->map, BUS_DMASYNC_PREREAD);
3786		rxbuf->m_head = m;
3787		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3788reuse:
3789		cleaned = i;
3790		/* Calculate next index */
3791		if (++i == adapter->num_rx_desc)
3792			i = 0;
3793		/* This is the work marker for refresh */
3794		rxr->next_to_refresh = i;
3795	}
3796update:
3797	/*
3798	** Update the tail pointer only if,
3799	** and as far as we have refreshed.
3800	*/
3801	if (cleaned != -1) /* Update tail index */
3802		E1000_WRITE_REG(&adapter->hw,
3803		    E1000_RDT(rxr->me), cleaned);
3804
3805	return;
3806}
3807
3808
3809/*********************************************************************
3810 *
3811 *  Allocate memory for rx_buffer structures. Since we use one
3812 *  rx_buffer per received packet, the maximum number of rx_buffer's
3813 *  that we'll need is equal to the number of receive descriptors
3814 *  that we've allocated.
3815 *
3816 **********************************************************************/
3817static int
3818em_allocate_receive_buffers(struct rx_ring *rxr)
3819{
3820	struct adapter		*adapter = rxr->adapter;
3821	device_t		dev = adapter->dev;
3822	struct em_buffer	*rxbuf;
3823	int			error;
3824
3825	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3826	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3827	if (rxr->rx_buffers == NULL) {
3828		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3829		return (ENOMEM);
3830	}
3831
3832	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3833				1, 0,			/* alignment, bounds */
3834				BUS_SPACE_MAXADDR,	/* lowaddr */
3835				BUS_SPACE_MAXADDR,	/* highaddr */
3836				NULL, NULL,		/* filter, filterarg */
3837				MJUM9BYTES,		/* maxsize */
3838				1,			/* nsegments */
3839				MJUM9BYTES,		/* maxsegsize */
3840				0,			/* flags */
3841				NULL,			/* lockfunc */
3842				NULL,			/* lockarg */
3843				&rxr->rxtag);
3844	if (error) {
3845		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3846		    __func__, error);
3847		goto fail;
3848	}
3849
3850	rxbuf = rxr->rx_buffers;
3851	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3852		rxbuf = &rxr->rx_buffers[i];
3853		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3854		    &rxbuf->map);
3855		if (error) {
3856			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3857			    __func__, error);
3858			goto fail;
3859		}
3860	}
3861
3862	return (0);
3863
3864fail:
3865	em_free_receive_structures(adapter);
3866	return (error);
3867}
3868
3869
3870/*********************************************************************
3871 *
3872 *  Initialize a receive ring and its buffers.
3873 *
3874 **********************************************************************/
3875static int
3876em_setup_receive_ring(struct rx_ring *rxr)
3877{
3878	struct	adapter 	*adapter = rxr->adapter;
3879	struct em_buffer	*rxbuf;
3880	bus_dma_segment_t	seg[1];
3881	int			rsize, nsegs, error;
3882
3883
3884	/* Clear the ring contents */
3885	EM_RX_LOCK(rxr);
3886	rsize = roundup2(adapter->num_rx_desc *
3887	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3888	bzero((void *)rxr->rx_base, rsize);
3889
3890	/*
3891	** Free current RX buffer structs and their mbufs
3892	*/
3893	for (int i = 0; i < adapter->num_rx_desc; i++) {
3894		rxbuf = &rxr->rx_buffers[i];
3895		if (rxbuf->m_head != NULL) {
3896			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3897			    BUS_DMASYNC_POSTREAD);
3898			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3899			m_freem(rxbuf->m_head);
3900		}
3901	}
3902
3903	/* Now replenish the mbufs */
3904	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3905
3906		rxbuf = &rxr->rx_buffers[j];
3907		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3908		    M_PKTHDR, adapter->rx_mbuf_sz);
3909		if (rxbuf->m_head == NULL)
3910			return (ENOBUFS);
3911		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3912		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3913		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3914
3915		/* Get the memory mapping */
3916		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3917		    rxbuf->map, rxbuf->m_head, seg,
3918		    &nsegs, BUS_DMA_NOWAIT);
3919		if (error != 0) {
3920			m_freem(rxbuf->m_head);
3921			rxbuf->m_head = NULL;
3922			return (error);
3923		}
3924		bus_dmamap_sync(rxr->rxtag,
3925		    rxbuf->map, BUS_DMASYNC_PREREAD);
3926
3927		/* Update descriptor */
3928		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3929	}
3930
3931
3932	/* Setup our descriptor indices */
3933	rxr->next_to_check = 0;
3934	rxr->next_to_refresh = 0;
3935
3936	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3937	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3938
3939	EM_RX_UNLOCK(rxr);
3940	return (0);
3941}
3942
3943/*********************************************************************
3944 *
3945 *  Initialize all receive rings.
3946 *
3947 **********************************************************************/
3948static int
3949em_setup_receive_structures(struct adapter *adapter)
3950{
3951	struct rx_ring *rxr = adapter->rx_rings;
3952	int j;
3953
3954	for (j = 0; j < adapter->num_queues; j++, rxr++)
3955		if (em_setup_receive_ring(rxr))
3956			goto fail;
3957
3958	return (0);
3959fail:
3960	/*
3961	 * Free RX buffers allocated so far, we will only handle
3962	 * the rings that completed, the failing case will have
3963	 * cleaned up for itself. 'j' failed, so its the terminus.
3964	 */
3965	for (int i = 0; i < j; ++i) {
3966		rxr = &adapter->rx_rings[i];
3967		for (int n = 0; n < adapter->num_rx_desc; n++) {
3968			struct em_buffer *rxbuf;
3969			rxbuf = &rxr->rx_buffers[n];
3970			if (rxbuf->m_head != NULL) {
3971				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3972			  	  BUS_DMASYNC_POSTREAD);
3973				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3974				m_freem(rxbuf->m_head);
3975				rxbuf->m_head = NULL;
3976			}
3977		}
3978	}
3979
3980	return (ENOBUFS);
3981}
3982
3983/*********************************************************************
3984 *
3985 *  Free all receive rings.
3986 *
3987 **********************************************************************/
3988static void
3989em_free_receive_structures(struct adapter *adapter)
3990{
3991	struct rx_ring *rxr = adapter->rx_rings;
3992
3993	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3994		em_free_receive_buffers(rxr);
3995		/* Free the ring memory as well */
3996		em_dma_free(adapter, &rxr->rxdma);
3997		EM_RX_LOCK_DESTROY(rxr);
3998	}
3999
4000	free(adapter->rx_rings, M_DEVBUF);
4001}
4002
4003
4004/*********************************************************************
4005 *
4006 *  Free receive ring data structures
4007 *
4008 **********************************************************************/
4009static void
4010em_free_receive_buffers(struct rx_ring *rxr)
4011{
4012	struct adapter		*adapter = rxr->adapter;
4013	struct em_buffer	*rxbuf = NULL;
4014
4015	INIT_DEBUGOUT("free_receive_buffers: begin");
4016
4017	if (rxr->rx_buffers != NULL) {
4018		for (int i = 0; i < adapter->num_rx_desc; i++) {
4019			rxbuf = &rxr->rx_buffers[i];
4020			if (rxbuf->map != NULL) {
4021				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4022				    BUS_DMASYNC_POSTREAD);
4023				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4024				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4025			}
4026			if (rxbuf->m_head != NULL) {
4027				m_freem(rxbuf->m_head);
4028				rxbuf->m_head = NULL;
4029			}
4030		}
4031		free(rxr->rx_buffers, M_DEVBUF);
4032		rxr->rx_buffers = NULL;
4033	}
4034
4035	if (rxr->rxtag != NULL) {
4036		bus_dma_tag_destroy(rxr->rxtag);
4037		rxr->rxtag = NULL;
4038	}
4039
4040	return;
4041}
4042
4043
4044/*********************************************************************
4045 *
4046 *  Enable receive unit.
4047 *
4048 **********************************************************************/
4049#define MAX_INTS_PER_SEC	8000
4050#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4051
4052static void
4053em_initialize_receive_unit(struct adapter *adapter)
4054{
4055	struct rx_ring	*rxr = adapter->rx_rings;
4056	struct ifnet	*ifp = adapter->ifp;
4057	struct e1000_hw	*hw = &adapter->hw;
4058	u64	bus_addr;
4059	u32	rctl, rxcsum;
4060
4061	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4062
4063	/*
4064	 * Make sure receives are disabled while setting
4065	 * up the descriptor ring
4066	 */
4067	rctl = E1000_READ_REG(hw, E1000_RCTL);
4068	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4069
4070	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4071	    adapter->rx_abs_int_delay.value);
4072	/*
4073	 * Set the interrupt throttling rate. Value is calculated
4074	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4075	 */
4076	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4077
4078	/*
4079	** When using MSIX interrupts we need to throttle
4080	** using the EITR register (82574 only)
4081	*/
4082	if (hw->mac.type == e1000_82574)
4083		for (int i = 0; i < 4; i++)
4084			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4085			    DEFAULT_ITR);
4086
4087	/* Disable accelerated ackknowledge */
4088	if (adapter->hw.mac.type == e1000_82574)
4089		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4090
4091	if (ifp->if_capenable & IFCAP_RXCSUM) {
4092		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4093		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4094		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4095	}
4096
4097	/*
4098	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4099	** long latencies are observed, like Lenovo X60. This
4100	** change eliminates the problem, but since having positive
4101	** values in RDTR is a known source of problems on other
4102	** platforms another solution is being sought.
4103	*/
4104	if (hw->mac.type == e1000_82573)
4105		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4106
4107	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4108		/* Setup the Base and Length of the Rx Descriptor Ring */
4109		bus_addr = rxr->rxdma.dma_paddr;
4110		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4111		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4112		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4113		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4114		/* Setup the Head and Tail Descriptor Pointers */
4115		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4116		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4117	}
4118
4119	/* Set early receive threshold on appropriate hw */
4120	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4121	    (adapter->hw.mac.type == e1000_pch2lan) ||
4122	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4123	    (ifp->if_mtu > ETHERMTU)) {
4124		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4125		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4126		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4127	}
4128
4129	if (adapter->hw.mac.type == e1000_pch2lan) {
4130		if (ifp->if_mtu > ETHERMTU)
4131			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4132		else
4133			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4134	}
4135
4136	/* Setup the Receive Control Register */
4137	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4138	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4139	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4140	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4141
4142        /* Strip the CRC */
4143        rctl |= E1000_RCTL_SECRC;
4144
4145        /* Make sure VLAN Filters are off */
4146        rctl &= ~E1000_RCTL_VFE;
4147	rctl &= ~E1000_RCTL_SBP;
4148
4149	if (adapter->rx_mbuf_sz == MCLBYTES)
4150		rctl |= E1000_RCTL_SZ_2048;
4151	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4152		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4153	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4154		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4155
4156	if (ifp->if_mtu > ETHERMTU)
4157		rctl |= E1000_RCTL_LPE;
4158	else
4159		rctl &= ~E1000_RCTL_LPE;
4160
4161	/* Write out the settings */
4162	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4163
4164	return;
4165}
4166
4167
4168/*********************************************************************
4169 *
4170 *  This routine executes in interrupt context. It replenishes
4171 *  the mbufs in the descriptor and sends data which has been
4172 *  dma'ed into host memory to upper layer.
4173 *
4174 *  We loop at most count times if count is > 0, or until done if
4175 *  count < 0.
4176 *
4177 *  For polling we also now return the number of cleaned packets
4178 *********************************************************************/
4179static bool
4180em_rxeof(struct rx_ring *rxr, int count, int *done)
4181{
4182	struct adapter		*adapter = rxr->adapter;
4183	struct ifnet		*ifp = adapter->ifp;
4184	struct mbuf		*mp, *sendmp;
4185	u8			status = 0;
4186	u16 			len;
4187	int			i, processed, rxdone = 0;
4188	bool			eop;
4189	struct e1000_rx_desc	*cur;
4190
4191	EM_RX_LOCK(rxr);
4192
4193	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4194
4195		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4196			break;
4197
4198		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4199		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4200
4201		cur = &rxr->rx_base[i];
4202		status = cur->status;
4203		mp = sendmp = NULL;
4204
4205		if ((status & E1000_RXD_STAT_DD) == 0)
4206			break;
4207
4208		len = le16toh(cur->length);
4209		eop = (status & E1000_RXD_STAT_EOP) != 0;
4210
4211		if ((rxr->discard == TRUE) || (cur->errors &
4212		    E1000_RXD_ERR_FRAME_ERR_MASK)) {
4213			ifp->if_ierrors++;
4214			++rxr->rx_discarded;
4215			if (!eop) /* Catch subsequent segs */
4216				rxr->discard = TRUE;
4217			else
4218				rxr->discard = FALSE;
4219			em_rx_discard(rxr, i);
4220			goto next_desc;
4221		}
4222
4223		/* Assign correct length to the current fragment */
4224		mp = rxr->rx_buffers[i].m_head;
4225		mp->m_len = len;
4226
4227		/* Trigger for refresh */
4228		rxr->rx_buffers[i].m_head = NULL;
4229
4230		/* First segment? */
4231		if (rxr->fmp == NULL) {
4232			mp->m_pkthdr.len = len;
4233			rxr->fmp = rxr->lmp = mp;
4234		} else {
4235			/* Chain mbuf's together */
4236			mp->m_flags &= ~M_PKTHDR;
4237			rxr->lmp->m_next = mp;
4238			rxr->lmp = mp;
4239			rxr->fmp->m_pkthdr.len += len;
4240		}
4241
4242		if (eop) {
4243			--count;
4244			sendmp = rxr->fmp;
4245			sendmp->m_pkthdr.rcvif = ifp;
4246			ifp->if_ipackets++;
4247			em_receive_checksum(cur, sendmp);
4248#ifndef __NO_STRICT_ALIGNMENT
4249			if (adapter->max_frame_size >
4250			    (MCLBYTES - ETHER_ALIGN) &&
4251			    em_fixup_rx(rxr) != 0)
4252				goto skip;
4253#endif
4254			if (status & E1000_RXD_STAT_VP) {
4255				sendmp->m_pkthdr.ether_vtag =
4256				    (le16toh(cur->special) &
4257				    E1000_RXD_SPC_VLAN_MASK);
4258				sendmp->m_flags |= M_VLANTAG;
4259			}
4260#ifdef EM_MULTIQUEUE
4261			sendmp->m_pkthdr.flowid = rxr->msix;
4262			sendmp->m_flags |= M_FLOWID;
4263#endif
4264#ifndef __NO_STRICT_ALIGNMENT
4265skip:
4266#endif
4267			rxr->fmp = rxr->lmp = NULL;
4268		}
4269next_desc:
4270		/* Zero out the receive descriptors status. */
4271		cur->status = 0;
4272		++rxdone;	/* cumulative for POLL */
4273		++processed;
4274
4275		/* Advance our pointers to the next descriptor. */
4276		if (++i == adapter->num_rx_desc)
4277			i = 0;
4278
4279		/* Send to the stack */
4280		if (sendmp != NULL) {
4281			rxr->next_to_check = i;
4282			EM_RX_UNLOCK(rxr);
4283			(*ifp->if_input)(ifp, sendmp);
4284			EM_RX_LOCK(rxr);
4285			i = rxr->next_to_check;
4286		}
4287
4288		/* Only refresh mbufs every 8 descriptors */
4289		if (processed == 8) {
4290			em_refresh_mbufs(rxr, i);
4291			processed = 0;
4292		}
4293	}
4294
4295	/* Catch any remaining refresh work */
4296	em_refresh_mbufs(rxr, i);
4297
4298	rxr->next_to_check = i;
4299	if (done != NULL)
4300		*done = rxdone;
4301	EM_RX_UNLOCK(rxr);
4302
4303	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4304}
4305
4306static __inline void
4307em_rx_discard(struct rx_ring *rxr, int i)
4308{
4309	struct adapter		*adapter = rxr->adapter;
4310	struct em_buffer	*rbuf;
4311	struct mbuf		*m;
4312
4313	rbuf = &rxr->rx_buffers[i];
4314	/* Free any previous pieces */
4315	if (rxr->fmp != NULL) {
4316		rxr->fmp->m_flags |= M_PKTHDR;
4317		m_freem(rxr->fmp);
4318		rxr->fmp = NULL;
4319		rxr->lmp = NULL;
4320	}
4321
4322	/* Reset state, keep loaded DMA map and reuse */
4323	m = rbuf->m_head;
4324	m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4325	m->m_flags |= M_PKTHDR;
4326	m->m_data = m->m_ext.ext_buf;
4327	m->m_next = NULL;
4328
4329	return;
4330}
4331
4332#ifndef __NO_STRICT_ALIGNMENT
4333/*
4334 * When jumbo frames are enabled we should realign entire payload on
4335 * architecures with strict alignment. This is serious design mistake of 8254x
4336 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4337 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4338 * payload. On architecures without strict alignment restrictions 8254x still
4339 * performs unaligned memory access which would reduce the performance too.
4340 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4341 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4342 * existing mbuf chain.
4343 *
4344 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4345 * not used at all on architectures with strict alignment.
4346 */
4347static int
4348em_fixup_rx(struct rx_ring *rxr)
4349{
4350	struct adapter *adapter = rxr->adapter;
4351	struct mbuf *m, *n;
4352	int error;
4353
4354	error = 0;
4355	m = rxr->fmp;
4356	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4357		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4358		m->m_data += ETHER_HDR_LEN;
4359	} else {
4360		MGETHDR(n, M_DONTWAIT, MT_DATA);
4361		if (n != NULL) {
4362			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4363			m->m_data += ETHER_HDR_LEN;
4364			m->m_len -= ETHER_HDR_LEN;
4365			n->m_len = ETHER_HDR_LEN;
4366			M_MOVE_PKTHDR(n, m);
4367			n->m_next = m;
4368			rxr->fmp = n;
4369		} else {
4370			adapter->dropped_pkts++;
4371			m_freem(rxr->fmp);
4372			rxr->fmp = NULL;
4373			error = ENOMEM;
4374		}
4375	}
4376
4377	return (error);
4378}
4379#endif
4380
4381/*********************************************************************
4382 *
4383 *  Verify that the hardware indicated that the checksum is valid.
4384 *  Inform the stack about the status of checksum so that stack
4385 *  doesn't spend time verifying the checksum.
4386 *
4387 *********************************************************************/
4388static void
4389em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4390{
4391	/* Ignore Checksum bit is set */
4392	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4393		mp->m_pkthdr.csum_flags = 0;
4394		return;
4395	}
4396
4397	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4398		/* Did it pass? */
4399		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4400			/* IP Checksum Good */
4401			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4402			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4403
4404		} else {
4405			mp->m_pkthdr.csum_flags = 0;
4406		}
4407	}
4408
4409	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4410		/* Did it pass? */
4411		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4412			mp->m_pkthdr.csum_flags |=
4413			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4414			mp->m_pkthdr.csum_data = htons(0xffff);
4415		}
4416	}
4417}
4418
4419/*
4420 * This routine is run via an vlan
4421 * config EVENT
4422 */
4423static void
4424em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4425{
4426	struct adapter	*adapter = ifp->if_softc;
4427	u32		index, bit;
4428
4429	if (ifp->if_softc !=  arg)   /* Not our event */
4430		return;
4431
4432	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4433                return;
4434
4435	EM_CORE_LOCK(adapter);
4436	index = (vtag >> 5) & 0x7F;
4437	bit = vtag & 0x1F;
4438	adapter->shadow_vfta[index] |= (1 << bit);
4439	++adapter->num_vlans;
4440	/* Re-init to load the changes */
4441	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4442		em_init_locked(adapter);
4443	EM_CORE_UNLOCK(adapter);
4444}
4445
4446/*
4447 * This routine is run via an vlan
4448 * unconfig EVENT
4449 */
4450static void
4451em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4452{
4453	struct adapter	*adapter = ifp->if_softc;
4454	u32		index, bit;
4455
4456	if (ifp->if_softc !=  arg)
4457		return;
4458
4459	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4460                return;
4461
4462	EM_CORE_LOCK(adapter);
4463	index = (vtag >> 5) & 0x7F;
4464	bit = vtag & 0x1F;
4465	adapter->shadow_vfta[index] &= ~(1 << bit);
4466	--adapter->num_vlans;
4467	/* Re-init to load the changes */
4468	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4469		em_init_locked(adapter);
4470	EM_CORE_UNLOCK(adapter);
4471}
4472
4473static void
4474em_setup_vlan_hw_support(struct adapter *adapter)
4475{
4476	struct e1000_hw *hw = &adapter->hw;
4477	u32             reg;
4478
4479	/*
4480	** We get here thru init_locked, meaning
4481	** a soft reset, this has already cleared
4482	** the VFTA and other state, so if there
4483	** have been no vlan's registered do nothing.
4484	*/
4485	if (adapter->num_vlans == 0)
4486                return;
4487
4488	/*
4489	** A soft reset zero's out the VFTA, so
4490	** we need to repopulate it now.
4491	*/
4492	for (int i = 0; i < EM_VFTA_SIZE; i++)
4493                if (adapter->shadow_vfta[i] != 0)
4494			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4495                            i, adapter->shadow_vfta[i]);
4496
4497	reg = E1000_READ_REG(hw, E1000_CTRL);
4498	reg |= E1000_CTRL_VME;
4499	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4500
4501	/* Enable the Filter Table */
4502	reg = E1000_READ_REG(hw, E1000_RCTL);
4503	reg &= ~E1000_RCTL_CFIEN;
4504	reg |= E1000_RCTL_VFE;
4505	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4506}
4507
4508static void
4509em_enable_intr(struct adapter *adapter)
4510{
4511	struct e1000_hw *hw = &adapter->hw;
4512	u32 ims_mask = IMS_ENABLE_MASK;
4513
4514	if (hw->mac.type == e1000_82574) {
4515		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4516		ims_mask |= EM_MSIX_MASK;
4517	}
4518	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4519}
4520
4521static void
4522em_disable_intr(struct adapter *adapter)
4523{
4524	struct e1000_hw *hw = &adapter->hw;
4525
4526	if (hw->mac.type == e1000_82574)
4527		E1000_WRITE_REG(hw, EM_EIAC, 0);
4528	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4529}
4530
4531/*
4532 * Bit of a misnomer, what this really means is
4533 * to enable OS management of the system... aka
4534 * to disable special hardware management features
4535 */
4536static void
4537em_init_manageability(struct adapter *adapter)
4538{
4539	/* A shared code workaround */
4540#define E1000_82542_MANC2H E1000_MANC2H
4541	if (adapter->has_manage) {
4542		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4543		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4544
4545		/* disable hardware interception of ARP */
4546		manc &= ~(E1000_MANC_ARP_EN);
4547
4548                /* enable receiving management packets to the host */
4549		manc |= E1000_MANC_EN_MNG2HOST;
4550#define E1000_MNG2HOST_PORT_623 (1 << 5)
4551#define E1000_MNG2HOST_PORT_664 (1 << 6)
4552		manc2h |= E1000_MNG2HOST_PORT_623;
4553		manc2h |= E1000_MNG2HOST_PORT_664;
4554		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4555		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4556	}
4557}
4558
4559/*
4560 * Give control back to hardware management
4561 * controller if there is one.
4562 */
4563static void
4564em_release_manageability(struct adapter *adapter)
4565{
4566	if (adapter->has_manage) {
4567		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4568
4569		/* re-enable hardware interception of ARP */
4570		manc |= E1000_MANC_ARP_EN;
4571		manc &= ~E1000_MANC_EN_MNG2HOST;
4572
4573		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4574	}
4575}
4576
4577/*
4578 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4579 * For ASF and Pass Through versions of f/w this means
4580 * that the driver is loaded. For AMT version type f/w
4581 * this means that the network i/f is open.
4582 */
4583static void
4584em_get_hw_control(struct adapter *adapter)
4585{
4586	u32 ctrl_ext, swsm;
4587
4588	if (adapter->hw.mac.type == e1000_82573) {
4589		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4590		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4591		    swsm | E1000_SWSM_DRV_LOAD);
4592		return;
4593	}
4594	/* else */
4595	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4596	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4597	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4598	return;
4599}
4600
4601/*
4602 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4603 * For ASF and Pass Through versions of f/w this means that
4604 * the driver is no longer loaded. For AMT versions of the
4605 * f/w this means that the network i/f is closed.
4606 */
4607static void
4608em_release_hw_control(struct adapter *adapter)
4609{
4610	u32 ctrl_ext, swsm;
4611
4612	if (!adapter->has_manage)
4613		return;
4614
4615	if (adapter->hw.mac.type == e1000_82573) {
4616		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4617		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4618		    swsm & ~E1000_SWSM_DRV_LOAD);
4619		return;
4620	}
4621	/* else */
4622	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4623	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4624	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4625	return;
4626}
4627
4628static int
4629em_is_valid_ether_addr(u8 *addr)
4630{
4631	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4632
4633	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4634		return (FALSE);
4635	}
4636
4637	return (TRUE);
4638}
4639
4640/*
4641** Parse the interface capabilities with regard
4642** to both system management and wake-on-lan for
4643** later use.
4644*/
4645static void
4646em_get_wakeup(device_t dev)
4647{
4648	struct adapter	*adapter = device_get_softc(dev);
4649	u16		eeprom_data = 0, device_id, apme_mask;
4650
4651	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4652	apme_mask = EM_EEPROM_APME;
4653
4654	switch (adapter->hw.mac.type) {
4655	case e1000_82573:
4656	case e1000_82583:
4657		adapter->has_amt = TRUE;
4658		/* Falls thru */
4659	case e1000_82571:
4660	case e1000_82572:
4661	case e1000_80003es2lan:
4662		if (adapter->hw.bus.func == 1) {
4663			e1000_read_nvm(&adapter->hw,
4664			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4665			break;
4666		} else
4667			e1000_read_nvm(&adapter->hw,
4668			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4669		break;
4670	case e1000_ich8lan:
4671	case e1000_ich9lan:
4672	case e1000_ich10lan:
4673	case e1000_pchlan:
4674	case e1000_pch2lan:
4675		apme_mask = E1000_WUC_APME;
4676		adapter->has_amt = TRUE;
4677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4678		break;
4679	default:
4680		e1000_read_nvm(&adapter->hw,
4681		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4682		break;
4683	}
4684	if (eeprom_data & apme_mask)
4685		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4686	/*
4687         * We have the eeprom settings, now apply the special cases
4688         * where the eeprom may be wrong or the board won't support
4689         * wake on lan on a particular port
4690	 */
4691	device_id = pci_get_device(dev);
4692        switch (device_id) {
4693	case E1000_DEV_ID_82571EB_FIBER:
4694		/* Wake events only supported on port A for dual fiber
4695		 * regardless of eeprom setting */
4696		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4697		    E1000_STATUS_FUNC_1)
4698			adapter->wol = 0;
4699		break;
4700	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4701	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4702	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4703                /* if quad port adapter, disable WoL on all but port A */
4704		if (global_quad_port_a != 0)
4705			adapter->wol = 0;
4706		/* Reset for multiple quad port adapters */
4707		if (++global_quad_port_a == 4)
4708			global_quad_port_a = 0;
4709                break;
4710	}
4711	return;
4712}
4713
4714
4715/*
4716 * Enable PCI Wake On Lan capability
4717 */
4718static void
4719em_enable_wakeup(device_t dev)
4720{
4721	struct adapter	*adapter = device_get_softc(dev);
4722	struct ifnet	*ifp = adapter->ifp;
4723	u32		pmc, ctrl, ctrl_ext, rctl;
4724	u16     	status;
4725
4726	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4727		return;
4728
4729	/* Advertise the wakeup capability */
4730	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4731	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4732	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4733	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4734
4735	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4736	    (adapter->hw.mac.type == e1000_pchlan) ||
4737	    (adapter->hw.mac.type == e1000_ich9lan) ||
4738	    (adapter->hw.mac.type == e1000_ich10lan)) {
4739		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4740		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4741	}
4742
4743	/* Keep the laser running on Fiber adapters */
4744	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4745	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4746		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4747		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4748		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4749	}
4750
4751	/*
4752	** Determine type of Wakeup: note that wol
4753	** is set with all bits on by default.
4754	*/
4755	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4756		adapter->wol &= ~E1000_WUFC_MAG;
4757
4758	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4759		adapter->wol &= ~E1000_WUFC_MC;
4760	else {
4761		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4762		rctl |= E1000_RCTL_MPE;
4763		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4764	}
4765
4766	if ((adapter->hw.mac.type == e1000_pchlan) ||
4767	    (adapter->hw.mac.type == e1000_pch2lan)) {
4768		if (em_enable_phy_wakeup(adapter))
4769			return;
4770	} else {
4771		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4772		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4773	}
4774
4775	if (adapter->hw.phy.type == e1000_phy_igp_3)
4776		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4777
4778        /* Request PME */
4779        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4780	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4781	if (ifp->if_capenable & IFCAP_WOL)
4782		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4783        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4784
4785	return;
4786}
4787
4788/*
4789** WOL in the newer chipset interfaces (pchlan)
4790** require thing to be copied into the phy
4791*/
4792static int
4793em_enable_phy_wakeup(struct adapter *adapter)
4794{
4795	struct e1000_hw *hw = &adapter->hw;
4796	u32 mreg, ret = 0;
4797	u16 preg;
4798
4799	/* copy MAC RARs to PHY RARs */
4800	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4801
4802	/* copy MAC MTA to PHY MTA */
4803	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4804		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4805		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4806		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4807		    (u16)((mreg >> 16) & 0xFFFF));
4808	}
4809
4810	/* configure PHY Rx Control register */
4811	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4812	mreg = E1000_READ_REG(hw, E1000_RCTL);
4813	if (mreg & E1000_RCTL_UPE)
4814		preg |= BM_RCTL_UPE;
4815	if (mreg & E1000_RCTL_MPE)
4816		preg |= BM_RCTL_MPE;
4817	preg &= ~(BM_RCTL_MO_MASK);
4818	if (mreg & E1000_RCTL_MO_3)
4819		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4820				<< BM_RCTL_MO_SHIFT);
4821	if (mreg & E1000_RCTL_BAM)
4822		preg |= BM_RCTL_BAM;
4823	if (mreg & E1000_RCTL_PMCF)
4824		preg |= BM_RCTL_PMCF;
4825	mreg = E1000_READ_REG(hw, E1000_CTRL);
4826	if (mreg & E1000_CTRL_RFCE)
4827		preg |= BM_RCTL_RFCE;
4828	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4829
4830	/* enable PHY wakeup in MAC register */
4831	E1000_WRITE_REG(hw, E1000_WUC,
4832	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4833	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4834
4835	/* configure and enable PHY wakeup in PHY registers */
4836	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4837	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4838
4839	/* activate PHY wakeup */
4840	ret = hw->phy.ops.acquire(hw);
4841	if (ret) {
4842		printf("Could not acquire PHY\n");
4843		return ret;
4844	}
4845	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4846	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4847	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4848	if (ret) {
4849		printf("Could not read PHY page 769\n");
4850		goto out;
4851	}
4852	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4853	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4854	if (ret)
4855		printf("Could not set PHY Host Wakeup bit\n");
4856out:
4857	hw->phy.ops.release(hw);
4858
4859	return ret;
4860}
4861
4862static void
4863em_led_func(void *arg, int onoff)
4864{
4865	struct adapter	*adapter = arg;
4866
4867	EM_CORE_LOCK(adapter);
4868	if (onoff) {
4869		e1000_setup_led(&adapter->hw);
4870		e1000_led_on(&adapter->hw);
4871	} else {
4872		e1000_led_off(&adapter->hw);
4873		e1000_cleanup_led(&adapter->hw);
4874	}
4875	EM_CORE_UNLOCK(adapter);
4876}
4877
4878/*
4879** Disable the L0S and L1 LINK states
4880*/
4881static void
4882em_disable_aspm(struct adapter *adapter)
4883{
4884	int		base, reg;
4885	u16		link_cap,link_ctrl;
4886	device_t	dev = adapter->dev;
4887
4888	switch (adapter->hw.mac.type) {
4889		case e1000_82573:
4890		case e1000_82574:
4891		case e1000_82583:
4892			break;
4893		default:
4894			return;
4895	}
4896	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4897		return;
4898	reg = base + PCIR_EXPRESS_LINK_CAP;
4899	link_cap = pci_read_config(dev, reg, 2);
4900	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4901		return;
4902	reg = base + PCIR_EXPRESS_LINK_CTL;
4903	link_ctrl = pci_read_config(dev, reg, 2);
4904	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4905	pci_write_config(dev, reg, link_ctrl, 2);
4906	return;
4907}
4908
4909/**********************************************************************
4910 *
4911 *  Update the board statistics counters.
4912 *
4913 **********************************************************************/
4914static void
4915em_update_stats_counters(struct adapter *adapter)
4916{
4917	struct ifnet   *ifp;
4918
4919	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4920	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4921		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4922		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4923	}
4924	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4925	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4926	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4927	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4928
4929	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4930	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4931	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4932	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4933	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4934	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4935	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4936	/*
4937	** For watchdog management we need to know if we have been
4938	** paused during the last interval, so capture that here.
4939	*/
4940	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4941	adapter->stats.xoffrxc += adapter->pause_frames;
4942	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4943	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4944	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4945	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4946	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4947	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4948	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4949	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4950	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4951	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4952	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4953	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4954
4955	/* For the 64-bit byte counters the low dword must be read first. */
4956	/* Both registers clear on the read of the high dword */
4957
4958	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4959	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4960	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4961	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4962
4963	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4964	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4965	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4966	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4967	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4968
4969	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4970	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4971
4972	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4973	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4974	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4975	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4976	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4977	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4978	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4979	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4980	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4981	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4982
4983	/* Interrupt Counts */
4984
4985	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4986	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4987	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4988	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4989	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4990	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4991	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4992	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4993	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4994
4995	if (adapter->hw.mac.type >= e1000_82543) {
4996		adapter->stats.algnerrc +=
4997		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4998		adapter->stats.rxerrc +=
4999		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5000		adapter->stats.tncrs +=
5001		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5002		adapter->stats.cexterr +=
5003		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5004		adapter->stats.tsctc +=
5005		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5006		adapter->stats.tsctfc +=
5007		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5008	}
5009	ifp = adapter->ifp;
5010
5011	ifp->if_collisions = adapter->stats.colc;
5012
5013	/* Rx Errors */
5014	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5015	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5016	    adapter->stats.ruc + adapter->stats.roc +
5017	    adapter->stats.mpc + adapter->stats.cexterr;
5018
5019	/* Tx Errors */
5020	ifp->if_oerrors = adapter->stats.ecol +
5021	    adapter->stats.latecol + adapter->watchdog_events;
5022}
5023
5024/* Export a single 32-bit register via a read-only sysctl. */
5025static int
5026em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5027{
5028	struct adapter *adapter;
5029	u_int val;
5030
5031	adapter = oidp->oid_arg1;
5032	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5033	return (sysctl_handle_int(oidp, &val, 0, req));
5034}
5035
5036/*
5037 * Add sysctl variables, one per statistic, to the system.
5038 */
5039static void
5040em_add_hw_stats(struct adapter *adapter)
5041{
5042	device_t dev = adapter->dev;
5043
5044	struct tx_ring *txr = adapter->tx_rings;
5045	struct rx_ring *rxr = adapter->rx_rings;
5046
5047	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5048	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5049	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5050	struct e1000_hw_stats *stats = &adapter->stats;
5051
5052	struct sysctl_oid *stat_node, *queue_node, *int_node;
5053	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5054
5055#define QUEUE_NAME_LEN 32
5056	char namebuf[QUEUE_NAME_LEN];
5057
5058	/* Driver Statistics */
5059	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5060			CTLFLAG_RD, &adapter->link_irq, 0,
5061			"Link MSIX IRQ Handled");
5062	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5063			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5064			 "Std mbuf failed");
5065	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5066			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5067			 "Std mbuf cluster failed");
5068	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5069			CTLFLAG_RD, &adapter->dropped_pkts,
5070			"Driver dropped packets");
5071	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5072			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5073			"Driver tx dma failure in xmit");
5074	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5075			CTLFLAG_RD, &adapter->rx_overruns,
5076			"RX overruns");
5077	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5078			CTLFLAG_RD, &adapter->watchdog_events,
5079			"Watchdog timeouts");
5080
5081	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5082			CTLFLAG_RD, adapter, E1000_CTRL,
5083			em_sysctl_reg_handler, "IU",
5084			"Device Control Register");
5085	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5086			CTLFLAG_RD, adapter, E1000_RCTL,
5087			em_sysctl_reg_handler, "IU",
5088			"Receiver Control Register");
5089	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5090			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5091			"Flow Control High Watermark");
5092	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5093			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5094			"Flow Control Low Watermark");
5095
5096	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5097		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5098		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5099					    CTLFLAG_RD, NULL, "Queue Name");
5100		queue_list = SYSCTL_CHILDREN(queue_node);
5101
5102		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5103				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5104				em_sysctl_reg_handler, "IU",
5105 				"Transmit Descriptor Head");
5106		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5107				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5108				em_sysctl_reg_handler, "IU",
5109 				"Transmit Descriptor Tail");
5110		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5111				CTLFLAG_RD, &txr->tx_irq,
5112				"Queue MSI-X Transmit Interrupts");
5113		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5114				CTLFLAG_RD, &txr->no_desc_avail,
5115				"Queue No Descriptor Available");
5116
5117		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5118				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5119				em_sysctl_reg_handler, "IU",
5120				"Receive Descriptor Head");
5121		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5122				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5123				em_sysctl_reg_handler, "IU",
5124				"Receive Descriptor Tail");
5125		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5126				CTLFLAG_RD, &rxr->rx_irq,
5127				"Queue MSI-X Receive Interrupts");
5128	}
5129
5130	/* MAC stats get their own sub node */
5131
5132	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5133				    CTLFLAG_RD, NULL, "Statistics");
5134	stat_list = SYSCTL_CHILDREN(stat_node);
5135
5136	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5137			CTLFLAG_RD, &stats->ecol,
5138			"Excessive collisions");
5139	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5140			CTLFLAG_RD, &stats->scc,
5141			"Single collisions");
5142	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5143			CTLFLAG_RD, &stats->mcc,
5144			"Multiple collisions");
5145	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5146			CTLFLAG_RD, &stats->latecol,
5147			"Late collisions");
5148	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5149			CTLFLAG_RD, &stats->colc,
5150			"Collision Count");
5151	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5152			CTLFLAG_RD, &adapter->stats.symerrs,
5153			"Symbol Errors");
5154	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5155			CTLFLAG_RD, &adapter->stats.sec,
5156			"Sequence Errors");
5157	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5158			CTLFLAG_RD, &adapter->stats.dc,
5159			"Defer Count");
5160	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5161			CTLFLAG_RD, &adapter->stats.mpc,
5162			"Missed Packets");
5163	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5164			CTLFLAG_RD, &adapter->stats.rnbc,
5165			"Receive No Buffers");
5166	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5167			CTLFLAG_RD, &adapter->stats.ruc,
5168			"Receive Undersize");
5169	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5170			CTLFLAG_RD, &adapter->stats.rfc,
5171			"Fragmented Packets Received ");
5172	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5173			CTLFLAG_RD, &adapter->stats.roc,
5174			"Oversized Packets Received");
5175	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5176			CTLFLAG_RD, &adapter->stats.rjc,
5177			"Recevied Jabber");
5178	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5179			CTLFLAG_RD, &adapter->stats.rxerrc,
5180			"Receive Errors");
5181	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5182			CTLFLAG_RD, &adapter->stats.crcerrs,
5183			"CRC errors");
5184	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5185			CTLFLAG_RD, &adapter->stats.algnerrc,
5186			"Alignment Errors");
5187	/* On 82575 these are collision counts */
5188	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5189			CTLFLAG_RD, &adapter->stats.cexterr,
5190			"Collision/Carrier extension errors");
5191	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5192			CTLFLAG_RD, &adapter->stats.xonrxc,
5193			"XON Received");
5194	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5195			CTLFLAG_RD, &adapter->stats.xontxc,
5196			"XON Transmitted");
5197	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5198			CTLFLAG_RD, &adapter->stats.xoffrxc,
5199			"XOFF Received");
5200	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5201			CTLFLAG_RD, &adapter->stats.xofftxc,
5202			"XOFF Transmitted");
5203
5204	/* Packet Reception Stats */
5205	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5206			CTLFLAG_RD, &adapter->stats.tpr,
5207			"Total Packets Received ");
5208	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5209			CTLFLAG_RD, &adapter->stats.gprc,
5210			"Good Packets Received");
5211	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5212			CTLFLAG_RD, &adapter->stats.bprc,
5213			"Broadcast Packets Received");
5214	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5215			CTLFLAG_RD, &adapter->stats.mprc,
5216			"Multicast Packets Received");
5217	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5218			CTLFLAG_RD, &adapter->stats.prc64,
5219			"64 byte frames received ");
5220	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5221			CTLFLAG_RD, &adapter->stats.prc127,
5222			"65-127 byte frames received");
5223	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5224			CTLFLAG_RD, &adapter->stats.prc255,
5225			"128-255 byte frames received");
5226	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5227			CTLFLAG_RD, &adapter->stats.prc511,
5228			"256-511 byte frames received");
5229	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5230			CTLFLAG_RD, &adapter->stats.prc1023,
5231			"512-1023 byte frames received");
5232	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5233			CTLFLAG_RD, &adapter->stats.prc1522,
5234			"1023-1522 byte frames received");
5235 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5236 			CTLFLAG_RD, &adapter->stats.gorc,
5237 			"Good Octets Received");
5238
5239	/* Packet Transmission Stats */
5240 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5241 			CTLFLAG_RD, &adapter->stats.gotc,
5242 			"Good Octets Transmitted");
5243	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5244			CTLFLAG_RD, &adapter->stats.tpt,
5245			"Total Packets Transmitted");
5246	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5247			CTLFLAG_RD, &adapter->stats.gptc,
5248			"Good Packets Transmitted");
5249	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5250			CTLFLAG_RD, &adapter->stats.bptc,
5251			"Broadcast Packets Transmitted");
5252	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5253			CTLFLAG_RD, &adapter->stats.mptc,
5254			"Multicast Packets Transmitted");
5255	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5256			CTLFLAG_RD, &adapter->stats.ptc64,
5257			"64 byte frames transmitted ");
5258	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5259			CTLFLAG_RD, &adapter->stats.ptc127,
5260			"65-127 byte frames transmitted");
5261	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5262			CTLFLAG_RD, &adapter->stats.ptc255,
5263			"128-255 byte frames transmitted");
5264	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5265			CTLFLAG_RD, &adapter->stats.ptc511,
5266			"256-511 byte frames transmitted");
5267	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5268			CTLFLAG_RD, &adapter->stats.ptc1023,
5269			"512-1023 byte frames transmitted");
5270	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5271			CTLFLAG_RD, &adapter->stats.ptc1522,
5272			"1024-1522 byte frames transmitted");
5273	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5274			CTLFLAG_RD, &adapter->stats.tsctc,
5275			"TSO Contexts Transmitted");
5276	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5277			CTLFLAG_RD, &adapter->stats.tsctfc,
5278			"TSO Contexts Failed");
5279
5280
5281	/* Interrupt Stats */
5282
5283	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5284				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5285	int_list = SYSCTL_CHILDREN(int_node);
5286
5287	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5288			CTLFLAG_RD, &adapter->stats.iac,
5289			"Interrupt Assertion Count");
5290
5291	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5292			CTLFLAG_RD, &adapter->stats.icrxptc,
5293			"Interrupt Cause Rx Pkt Timer Expire Count");
5294
5295	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5296			CTLFLAG_RD, &adapter->stats.icrxatc,
5297			"Interrupt Cause Rx Abs Timer Expire Count");
5298
5299	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5300			CTLFLAG_RD, &adapter->stats.ictxptc,
5301			"Interrupt Cause Tx Pkt Timer Expire Count");
5302
5303	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5304			CTLFLAG_RD, &adapter->stats.ictxatc,
5305			"Interrupt Cause Tx Abs Timer Expire Count");
5306
5307	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5308			CTLFLAG_RD, &adapter->stats.ictxqec,
5309			"Interrupt Cause Tx Queue Empty Count");
5310
5311	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5312			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5313			"Interrupt Cause Tx Queue Min Thresh Count");
5314
5315	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5316			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5317			"Interrupt Cause Rx Desc Min Thresh Count");
5318
5319	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5320			CTLFLAG_RD, &adapter->stats.icrxoc,
5321			"Interrupt Cause Receiver Overrun Count");
5322}
5323
5324/**********************************************************************
5325 *
5326 *  This routine provides a way to dump out the adapter eeprom,
5327 *  often a useful debug/service tool. This only dumps the first
5328 *  32 words, stuff that matters is in that extent.
5329 *
5330 **********************************************************************/
5331static int
5332em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5333{
5334	struct adapter *adapter;
5335	int error;
5336	int result;
5337
5338	result = -1;
5339	error = sysctl_handle_int(oidp, &result, 0, req);
5340
5341	if (error || !req->newptr)
5342		return (error);
5343
5344	/*
5345	 * This value will cause a hex dump of the
5346	 * first 32 16-bit words of the EEPROM to
5347	 * the screen.
5348	 */
5349	if (result == 1) {
5350		adapter = (struct adapter *)arg1;
5351		em_print_nvm_info(adapter);
5352        }
5353
5354	return (error);
5355}
5356
5357static void
5358em_print_nvm_info(struct adapter *adapter)
5359{
5360	u16	eeprom_data;
5361	int	i, j, row = 0;
5362
5363	/* Its a bit crude, but it gets the job done */
5364	printf("\nInterface EEPROM Dump:\n");
5365	printf("Offset\n0x0000  ");
5366	for (i = 0, j = 0; i < 32; i++, j++) {
5367		if (j == 8) { /* Make the offset block */
5368			j = 0; ++row;
5369			printf("\n0x00%x0  ",row);
5370		}
5371		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5372		printf("%04x ", eeprom_data);
5373	}
5374	printf("\n");
5375}
5376
5377static int
5378em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5379{
5380	struct em_int_delay_info *info;
5381	struct adapter *adapter;
5382	u32 regval;
5383	int error, usecs, ticks;
5384
5385	info = (struct em_int_delay_info *)arg1;
5386	usecs = info->value;
5387	error = sysctl_handle_int(oidp, &usecs, 0, req);
5388	if (error != 0 || req->newptr == NULL)
5389		return (error);
5390	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5391		return (EINVAL);
5392	info->value = usecs;
5393	ticks = EM_USECS_TO_TICKS(usecs);
5394
5395	adapter = info->adapter;
5396
5397	EM_CORE_LOCK(adapter);
5398	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5399	regval = (regval & ~0xffff) | (ticks & 0xffff);
5400	/* Handle a few special cases. */
5401	switch (info->offset) {
5402	case E1000_RDTR:
5403		break;
5404	case E1000_TIDV:
5405		if (ticks == 0) {
5406			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5407			/* Don't write 0 into the TIDV register. */
5408			regval++;
5409		} else
5410			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5411		break;
5412	}
5413	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5414	EM_CORE_UNLOCK(adapter);
5415	return (0);
5416}
5417
5418static void
5419em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5420	const char *description, struct em_int_delay_info *info,
5421	int offset, int value)
5422{
5423	info->adapter = adapter;
5424	info->offset = offset;
5425	info->value = value;
5426	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5427	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5428	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5429	    info, 0, em_sysctl_int_delay, "I", description);
5430}
5431
5432static void
5433em_add_rx_process_limit(struct adapter *adapter, const char *name,
5434	const char *description, int *limit, int value)
5435{
5436	*limit = value;
5437	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5438	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5439	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5440}
5441
5442static void
5443em_set_flow_cntrl(struct adapter *adapter, const char *name,
5444	const char *description, int *limit, int value)
5445{
5446	*limit = value;
5447	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5448	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5449	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5450}
5451
5452static int
5453em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5454{
5455	struct adapter *adapter;
5456	int error;
5457	int result;
5458
5459	result = -1;
5460	error = sysctl_handle_int(oidp, &result, 0, req);
5461
5462	if (error || !req->newptr)
5463		return (error);
5464
5465	if (result == 1) {
5466		adapter = (struct adapter *)arg1;
5467		em_print_debug_info(adapter);
5468        }
5469
5470	return (error);
5471}
5472
5473/*
5474** This routine is meant to be fluid, add whatever is
5475** needed for debugging a problem.  -jfv
5476*/
5477static void
5478em_print_debug_info(struct adapter *adapter)
5479{
5480	device_t dev = adapter->dev;
5481	struct tx_ring *txr = adapter->tx_rings;
5482	struct rx_ring *rxr = adapter->rx_rings;
5483
5484	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5485		printf("Interface is RUNNING ");
5486	else
5487		printf("Interface is NOT RUNNING\n");
5488	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5489		printf("and ACTIVE\n");
5490	else
5491		printf("and INACTIVE\n");
5492
5493	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5494	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5495	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5496	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5497	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5498	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5499	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5500	device_printf(dev, "TX descriptors avail = %d\n",
5501	    txr->tx_avail);
5502	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5503	    txr->no_desc_avail);
5504	device_printf(dev, "RX discarded packets = %ld\n",
5505	    rxr->rx_discarded);
5506	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5507	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5508}
5509