if_em.c revision 217591
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 217591 2011-01-19 18:20:11Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.1.9";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275static void	em_disable_aspm(struct adapter *);
276
277static int	em_irq_fast(void *);
278
279/* MSIX handlers */
280static void	em_msix_tx(void *);
281static void	em_msix_rx(void *);
282static void	em_msix_link(void *);
283static void	em_handle_tx(void *context, int pending);
284static void	em_handle_rx(void *context, int pending);
285static void	em_handle_link(void *context, int pending);
286
287static void	em_add_rx_process_limit(struct adapter *, const char *,
288		    const char *, int *, int);
289static void	em_set_flow_cntrl(struct adapter *, const char *,
290		    const char *, int *, int);
291
292static __inline void em_rx_discard(struct rx_ring *, int);
293
294#ifdef DEVICE_POLLING
295static poll_handler_t em_poll;
296#endif /* POLLING */
297
298/*********************************************************************
299 *  FreeBSD Device Interface Entry Points
300 *********************************************************************/
301
302static device_method_t em_methods[] = {
303	/* Device interface */
304	DEVMETHOD(device_probe, em_probe),
305	DEVMETHOD(device_attach, em_attach),
306	DEVMETHOD(device_detach, em_detach),
307	DEVMETHOD(device_shutdown, em_shutdown),
308	DEVMETHOD(device_suspend, em_suspend),
309	DEVMETHOD(device_resume, em_resume),
310	{0, 0}
311};
312
313static driver_t em_driver = {
314	"em", em_methods, sizeof(struct adapter),
315};
316
317devclass_t em_devclass;
318DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319MODULE_DEPEND(em, pci, 1, 1, 1);
320MODULE_DEPEND(em, ether, 1, 1, 1);
321
322/*********************************************************************
323 *  Tunable default values.
324 *********************************************************************/
325
326#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
327#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
328#define M_TSO_LEN			66
329
330/* Allow common code without TSO */
331#ifndef CSUM_TSO
332#define CSUM_TSO	0
333#endif
334
335static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344
345static int em_rxd = EM_DEFAULT_RXD;
346static int em_txd = EM_DEFAULT_TXD;
347TUNABLE_INT("hw.em.rxd", &em_rxd);
348TUNABLE_INT("hw.em.txd", &em_txd);
349
350static int em_smart_pwr_down = FALSE;
351TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352
353/* Controls whether promiscuous also shows bad packets */
354static int em_debug_sbp = FALSE;
355TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356
357static int em_enable_msix = TRUE;
358TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359
360/* How many packets rxeof tries to clean at a time */
361static int em_rx_process_limit = 100;
362TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364/* Flow control setting - default to FULL */
365static int em_fc_setting = e1000_fc_full;
366TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368/* Global used in WOL setup with multiport cards */
369static int global_quad_port_a = 0;
370
371/*********************************************************************
372 *  Device identification routine
373 *
374 *  em_probe determines if the driver should be loaded on
375 *  adapter based on PCI vendor/device id of the adapter.
376 *
377 *  return BUS_PROBE_DEFAULT on success, positive on failure
378 *********************************************************************/
379
380static int
381em_probe(device_t dev)
382{
383	char		adapter_name[60];
384	u16		pci_vendor_id = 0;
385	u16		pci_device_id = 0;
386	u16		pci_subvendor_id = 0;
387	u16		pci_subdevice_id = 0;
388	em_vendor_info_t *ent;
389
390	INIT_DEBUGOUT("em_probe: begin");
391
392	pci_vendor_id = pci_get_vendor(dev);
393	if (pci_vendor_id != EM_VENDOR_ID)
394		return (ENXIO);
395
396	pci_device_id = pci_get_device(dev);
397	pci_subvendor_id = pci_get_subvendor(dev);
398	pci_subdevice_id = pci_get_subdevice(dev);
399
400	ent = em_vendor_info_array;
401	while (ent->vendor_id != 0) {
402		if ((pci_vendor_id == ent->vendor_id) &&
403		    (pci_device_id == ent->device_id) &&
404
405		    ((pci_subvendor_id == ent->subvendor_id) ||
406		    (ent->subvendor_id == PCI_ANY_ID)) &&
407
408		    ((pci_subdevice_id == ent->subdevice_id) ||
409		    (ent->subdevice_id == PCI_ANY_ID))) {
410			sprintf(adapter_name, "%s %s",
411				em_strings[ent->index],
412				em_driver_version);
413			device_set_desc_copy(dev, adapter_name);
414			return (BUS_PROBE_DEFAULT);
415		}
416		ent++;
417	}
418
419	return (ENXIO);
420}
421
422/*********************************************************************
423 *  Device initialization routine
424 *
425 *  The attach entry point is called when the driver is being loaded.
426 *  This routine identifies the type of hardware, allocates all resources
427 *  and initializes the hardware.
428 *
429 *  return 0 on success, positive on failure
430 *********************************************************************/
431
432static int
433em_attach(device_t dev)
434{
435	struct adapter	*adapter;
436	int		error = 0;
437
438	INIT_DEBUGOUT("em_attach: begin");
439
440	adapter = device_get_softc(dev);
441	adapter->dev = adapter->osdep.dev = dev;
442	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443
444	/* SYSCTL stuff */
445	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448	    em_sysctl_nvm_info, "I", "NVM Information");
449
450	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453	    em_sysctl_debug_info, "I", "Debug Information");
454
455	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457	/* Determine hardware and mac info */
458	em_identify_hardware(adapter);
459
460	/* Setup PCI resources */
461	if (em_allocate_pci_resources(adapter)) {
462		device_printf(dev, "Allocation of PCI resources failed\n");
463		error = ENXIO;
464		goto err_pci;
465	}
466
467	/*
468	** For ICH8 and family we need to
469	** map the flash memory, and this
470	** must happen after the MAC is
471	** identified
472	*/
473	if ((adapter->hw.mac.type == e1000_ich8lan) ||
474	    (adapter->hw.mac.type == e1000_ich9lan) ||
475	    (adapter->hw.mac.type == e1000_ich10lan) ||
476	    (adapter->hw.mac.type == e1000_pchlan) ||
477	    (adapter->hw.mac.type == e1000_pch2lan)) {
478		int rid = EM_BAR_TYPE_FLASH;
479		adapter->flash = bus_alloc_resource_any(dev,
480		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481		if (adapter->flash == NULL) {
482			device_printf(dev, "Mapping of Flash failed\n");
483			error = ENXIO;
484			goto err_pci;
485		}
486		/* This is used in the shared code */
487		adapter->hw.flash_address = (u8 *)adapter->flash;
488		adapter->osdep.flash_bus_space_tag =
489		    rman_get_bustag(adapter->flash);
490		adapter->osdep.flash_bus_space_handle =
491		    rman_get_bushandle(adapter->flash);
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Set up some sysctls for the tunable interrupt delays */
504	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511	    "receive interrupt delay limit in usecs",
512	    &adapter->rx_abs_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514	    em_rx_abs_int_delay_dflt);
515	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516	    "transmit interrupt delay limit in usecs",
517	    &adapter->tx_abs_int_delay,
518	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519	    em_tx_abs_int_delay_dflt);
520
521	/* Sysctl for limiting the amount of work done in the taskqueue */
522	em_add_rx_process_limit(adapter, "rx_processing_limit",
523	    "max number of rx packets to process", &adapter->rx_process_limit,
524	    em_rx_process_limit);
525
526	/* Sysctl for setting the interface flow control */
527	em_set_flow_cntrl(adapter, "flow_control",
528	    "configure flow control",
529	    &adapter->fc_setting, em_fc_setting);
530
531	/*
532	 * Validate number of transmit and receive descriptors. It
533	 * must not exceed hardware maximum, and must be multiple
534	 * of E1000_DBA_ALIGN.
535	 */
536	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539		    EM_DEFAULT_TXD, em_txd);
540		adapter->num_tx_desc = EM_DEFAULT_TXD;
541	} else
542		adapter->num_tx_desc = em_txd;
543
544	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547		    EM_DEFAULT_RXD, em_rxd);
548		adapter->num_rx_desc = EM_DEFAULT_RXD;
549	} else
550		adapter->num_rx_desc = em_rxd;
551
552	adapter->hw.mac.autoneg = DO_AUTO_NEG;
553	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555
556	/* Copper options */
557	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558		adapter->hw.phy.mdix = AUTO_ALL_MODES;
559		adapter->hw.phy.disable_polarity_correction = FALSE;
560		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561	}
562
563	/*
564	 * Set the frame limits assuming
565	 * standard ethernet sized frames.
566	 */
567	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569
570	/*
571	 * This controls when hardware reports transmit completion
572	 * status.
573	 */
574	adapter->hw.mac.report_tx_early = 1;
575
576	/*
577	** Get queue/ring memory
578	*/
579	if (em_allocate_queues(adapter)) {
580		error = ENOMEM;
581		goto err_pci;
582	}
583
584	/* Allocate multicast array memory. */
585	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587	if (adapter->mta == NULL) {
588		device_printf(dev, "Can not allocate multicast setup array\n");
589		error = ENOMEM;
590		goto err_late;
591	}
592
593	/* Check SOL/IDER usage */
594	if (e1000_check_reset_block(&adapter->hw))
595		device_printf(dev, "PHY reset is blocked"
596		    " due to SOL/IDER session.\n");
597
598	/*
599	** Start from a known state, this is
600	** important in reading the nvm and
601	** mac from that.
602	*/
603	e1000_reset_hw(&adapter->hw);
604
605	/* Make sure we have a good EEPROM before we read from it */
606	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607		/*
608		** Some PCI-E parts fail the first check due to
609		** the link being in sleep state, call it again,
610		** if it fails a second time its a real issue.
611		*/
612		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613			device_printf(dev,
614			    "The EEPROM Checksum Is Not Valid\n");
615			error = EIO;
616			goto err_late;
617		}
618	}
619
620	/* Copy the permanent MAC address out of the EEPROM */
621	if (e1000_read_mac_addr(&adapter->hw) < 0) {
622		device_printf(dev, "EEPROM read error while reading MAC"
623		    " address\n");
624		error = EIO;
625		goto err_late;
626	}
627
628	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629		device_printf(dev, "Invalid MAC address\n");
630		error = EIO;
631		goto err_late;
632	}
633
634	/*
635	**  Do interrupt configuration
636	*/
637	if (adapter->msix > 1) /* Do MSIX */
638		error = em_allocate_msix(adapter);
639	else  /* MSI or Legacy */
640		error = em_allocate_legacy(adapter);
641	if (error)
642		goto err_late;
643
644	/*
645	 * Get Wake-on-Lan and Management info for later use
646	 */
647	em_get_wakeup(dev);
648
649	/* Setup OS specific network interface */
650	if (em_setup_interface(dev, adapter) != 0)
651		goto err_late;
652
653	em_reset(adapter);
654
655	/* Initialize statistics */
656	em_update_stats_counters(adapter);
657
658	adapter->hw.mac.get_link_status = 1;
659	em_update_link_status(adapter);
660
661	/* Register for VLAN events */
662	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
666
667	em_add_hw_stats(adapter);
668
669	/* Non-AMT based hardware can now take control from firmware */
670	if (adapter->has_manage && !adapter->has_amt)
671		em_get_hw_control(adapter);
672
673	/* Tell the stack that the interface is not active */
674	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675
676	adapter->led_dev = led_create(em_led_func, adapter,
677	    device_get_nameunit(dev));
678
679	INIT_DEBUGOUT("em_attach: end");
680
681	return (0);
682
683err_late:
684	em_free_transmit_structures(adapter);
685	em_free_receive_structures(adapter);
686	em_release_hw_control(adapter);
687	if (adapter->ifp != NULL)
688		if_free(adapter->ifp);
689err_pci:
690	em_free_pci_resources(adapter);
691	free(adapter->mta, M_DEVBUF);
692	EM_CORE_LOCK_DESTROY(adapter);
693
694	return (error);
695}
696
697/*********************************************************************
698 *  Device removal routine
699 *
700 *  The detach entry point is called when the driver is being removed.
701 *  This routine stops the adapter and deallocates all the resources
702 *  that were allocated for driver operation.
703 *
704 *  return 0 on success, positive on failure
705 *********************************************************************/
706
707static int
708em_detach(device_t dev)
709{
710	struct adapter	*adapter = device_get_softc(dev);
711	struct ifnet	*ifp = adapter->ifp;
712
713	INIT_DEBUGOUT("em_detach: begin");
714
715	/* Make sure VLANS are not using driver */
716	if (adapter->ifp->if_vlantrunk != NULL) {
717		device_printf(dev,"Vlan in use, detach first\n");
718		return (EBUSY);
719	}
720
721#ifdef DEVICE_POLLING
722	if (ifp->if_capenable & IFCAP_POLLING)
723		ether_poll_deregister(ifp);
724#endif
725
726	if (adapter->led_dev != NULL)
727		led_destroy(adapter->led_dev);
728
729	EM_CORE_LOCK(adapter);
730	adapter->in_detach = 1;
731	em_stop(adapter);
732	EM_CORE_UNLOCK(adapter);
733	EM_CORE_LOCK_DESTROY(adapter);
734
735	e1000_phy_hw_reset(&adapter->hw);
736
737	em_release_manageability(adapter);
738	em_release_hw_control(adapter);
739
740	/* Unregister VLAN events */
741	if (adapter->vlan_attach != NULL)
742		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743	if (adapter->vlan_detach != NULL)
744		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
745
746	ether_ifdetach(adapter->ifp);
747	callout_drain(&adapter->timer);
748
749	em_free_pci_resources(adapter);
750	bus_generic_detach(dev);
751	if_free(ifp);
752
753	em_free_transmit_structures(adapter);
754	em_free_receive_structures(adapter);
755
756	em_release_hw_control(adapter);
757	free(adapter->mta, M_DEVBUF);
758
759	return (0);
760}
761
762/*********************************************************************
763 *
764 *  Shutdown entry point
765 *
766 **********************************************************************/
767
768static int
769em_shutdown(device_t dev)
770{
771	return em_suspend(dev);
772}
773
774/*
775 * Suspend/resume device methods.
776 */
777static int
778em_suspend(device_t dev)
779{
780	struct adapter *adapter = device_get_softc(dev);
781
782	EM_CORE_LOCK(adapter);
783
784        em_release_manageability(adapter);
785	em_release_hw_control(adapter);
786	em_enable_wakeup(dev);
787
788	EM_CORE_UNLOCK(adapter);
789
790	return bus_generic_suspend(dev);
791}
792
793static int
794em_resume(device_t dev)
795{
796	struct adapter *adapter = device_get_softc(dev);
797	struct ifnet *ifp = adapter->ifp;
798
799	EM_CORE_LOCK(adapter);
800	em_init_locked(adapter);
801	em_init_manageability(adapter);
802	EM_CORE_UNLOCK(adapter);
803	em_start(ifp);
804
805	return bus_generic_resume(dev);
806}
807
808
809/*********************************************************************
810 *  Transmit entry point
811 *
812 *  em_start is called by the stack to initiate a transmit.
813 *  The driver will remain in this routine as long as there are
814 *  packets to transmit and transmit resources are available.
815 *  In case resources are not available stack is notified and
816 *  the packet is requeued.
817 **********************************************************************/
818
819#ifdef EM_MULTIQUEUE
820static int
821em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822{
823	struct adapter  *adapter = txr->adapter;
824        struct mbuf     *next;
825        int             err = 0, enq = 0;
826
827	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828	    IFF_DRV_RUNNING || adapter->link_active == 0) {
829		if (m != NULL)
830			err = drbr_enqueue(ifp, txr->br, m);
831		return (err);
832	}
833
834        /* Call cleanup if number of TX descriptors low */
835	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836		em_txeof(txr);
837
838	enq = 0;
839	if (m == NULL) {
840		next = drbr_dequeue(ifp, txr->br);
841	} else if (drbr_needs_enqueue(ifp, txr->br)) {
842		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843			return (err);
844		next = drbr_dequeue(ifp, txr->br);
845	} else
846		next = m;
847
848	/* Process the queue */
849	while (next != NULL) {
850		if ((err = em_xmit(txr, &next)) != 0) {
851                        if (next != NULL)
852                                err = drbr_enqueue(ifp, txr->br, next);
853                        break;
854		}
855		enq++;
856		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857		ETHER_BPF_MTAP(ifp, next);
858		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                        break;
860		if (txr->tx_avail < EM_MAX_SCATTER) {
861			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862			break;
863		}
864		next = drbr_dequeue(ifp, txr->br);
865	}
866
867	if (enq > 0) {
868                /* Set the watchdog */
869                txr->queue_status = EM_QUEUE_WORKING;
870		txr->watchdog_time = ticks;
871	}
872	return (err);
873}
874
875/*
876** Multiqueue capable stack interface
877*/
878static int
879em_mq_start(struct ifnet *ifp, struct mbuf *m)
880{
881	struct adapter	*adapter = ifp->if_softc;
882	struct tx_ring	*txr = adapter->tx_rings;
883	int 		error;
884
885	if (EM_TX_TRYLOCK(txr)) {
886		error = em_mq_start_locked(ifp, txr, m);
887		EM_TX_UNLOCK(txr);
888	} else
889		error = drbr_enqueue(ifp, txr->br, m);
890
891	return (error);
892}
893
894/*
895** Flush all ring buffers
896*/
897static void
898em_qflush(struct ifnet *ifp)
899{
900	struct adapter  *adapter = ifp->if_softc;
901	struct tx_ring  *txr = adapter->tx_rings;
902	struct mbuf     *m;
903
904	for (int i = 0; i < adapter->num_queues; i++, txr++) {
905		EM_TX_LOCK(txr);
906		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907			m_freem(m);
908		EM_TX_UNLOCK(txr);
909	}
910	if_qflush(ifp);
911}
912
913#endif /* EM_MULTIQUEUE */
914
915static void
916em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917{
918	struct adapter	*adapter = ifp->if_softc;
919	struct mbuf	*m_head;
920
921	EM_TX_LOCK_ASSERT(txr);
922
923	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924	    IFF_DRV_RUNNING)
925		return;
926
927	if (!adapter->link_active)
928		return;
929
930        /* Call cleanup if number of TX descriptors low */
931	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932		em_txeof(txr);
933
934	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935		if (txr->tx_avail < EM_MAX_SCATTER) {
936			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937			break;
938		}
939                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940		if (m_head == NULL)
941			break;
942		/*
943		 *  Encapsulation can modify our pointer, and or make it
944		 *  NULL on failure.  In that event, we can't requeue.
945		 */
946		if (em_xmit(txr, &m_head)) {
947			if (m_head == NULL)
948				break;
949			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951			break;
952		}
953
954		/* Send a copy of the frame to the BPF listener */
955		ETHER_BPF_MTAP(ifp, m_head);
956
957		/* Set timeout in case hardware has problems transmitting. */
958		txr->watchdog_time = ticks;
959                txr->queue_status = EM_QUEUE_WORKING;
960	}
961
962	return;
963}
964
965static void
966em_start(struct ifnet *ifp)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970
971	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972		EM_TX_LOCK(txr);
973		em_start_locked(ifp, txr);
974		EM_TX_UNLOCK(txr);
975	}
976	return;
977}
978
979/*********************************************************************
980 *  Ioctl entry point
981 *
982 *  em_ioctl is called when the user wants to configure the
983 *  interface.
984 *
985 *  return 0 on success, positive on failure
986 **********************************************************************/
987
988static int
989em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990{
991	struct adapter	*adapter = ifp->if_softc;
992	struct ifreq *ifr = (struct ifreq *)data;
993#ifdef INET
994	struct ifaddr *ifa = (struct ifaddr *)data;
995#endif
996	int error = 0;
997
998	if (adapter->in_detach)
999		return (error);
1000
1001	switch (command) {
1002	case SIOCSIFADDR:
1003#ifdef INET
1004		if (ifa->ifa_addr->sa_family == AF_INET) {
1005			/*
1006			 * XXX
1007			 * Since resetting hardware takes a very long time
1008			 * and results in link renegotiation we only
1009			 * initialize the hardware only when it is absolutely
1010			 * required.
1011			 */
1012			ifp->if_flags |= IFF_UP;
1013			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014				EM_CORE_LOCK(adapter);
1015				em_init_locked(adapter);
1016				EM_CORE_UNLOCK(adapter);
1017			}
1018			arp_ifinit(ifp, ifa);
1019		} else
1020#endif
1021			error = ether_ioctl(ifp, command, data);
1022		break;
1023	case SIOCSIFMTU:
1024	    {
1025		int max_frame_size;
1026
1027		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028
1029		EM_CORE_LOCK(adapter);
1030		switch (adapter->hw.mac.type) {
1031		case e1000_82571:
1032		case e1000_82572:
1033		case e1000_ich9lan:
1034		case e1000_ich10lan:
1035		case e1000_pch2lan:
1036		case e1000_82574:
1037		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1038			max_frame_size = 9234;
1039			break;
1040		case e1000_pchlan:
1041			max_frame_size = 4096;
1042			break;
1043			/* Adapters that do not support jumbo frames */
1044		case e1000_82583:
1045		case e1000_ich8lan:
1046			max_frame_size = ETHER_MAX_LEN;
1047			break;
1048		default:
1049			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050		}
1051		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052		    ETHER_CRC_LEN) {
1053			EM_CORE_UNLOCK(adapter);
1054			error = EINVAL;
1055			break;
1056		}
1057
1058		ifp->if_mtu = ifr->ifr_mtu;
1059		adapter->max_frame_size =
1060		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061		em_init_locked(adapter);
1062		EM_CORE_UNLOCK(adapter);
1063		break;
1064	    }
1065	case SIOCSIFFLAGS:
1066		IOCTL_DEBUGOUT("ioctl rcv'd:\
1067		    SIOCSIFFLAGS (Set Interface Flags)");
1068		EM_CORE_LOCK(adapter);
1069		if (ifp->if_flags & IFF_UP) {
1070			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071				if ((ifp->if_flags ^ adapter->if_flags) &
1072				    (IFF_PROMISC | IFF_ALLMULTI)) {
1073					em_disable_promisc(adapter);
1074					em_set_promisc(adapter);
1075				}
1076			} else
1077				em_init_locked(adapter);
1078		} else
1079			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080				em_stop(adapter);
1081		adapter->if_flags = ifp->if_flags;
1082		EM_CORE_UNLOCK(adapter);
1083		break;
1084	case SIOCADDMULTI:
1085	case SIOCDELMULTI:
1086		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088			EM_CORE_LOCK(adapter);
1089			em_disable_intr(adapter);
1090			em_set_multi(adapter);
1091#ifdef DEVICE_POLLING
1092			if (!(ifp->if_capenable & IFCAP_POLLING))
1093#endif
1094				em_enable_intr(adapter);
1095			EM_CORE_UNLOCK(adapter);
1096		}
1097		break;
1098	case SIOCSIFMEDIA:
1099		/*
1100		** As the speed/duplex settings are being
1101		** changed, we need to reset the PHY.
1102		*/
1103		adapter->hw.phy.reset_disable = FALSE;
1104		/* Check SOL/IDER usage */
1105		EM_CORE_LOCK(adapter);
1106		if (e1000_check_reset_block(&adapter->hw)) {
1107			EM_CORE_UNLOCK(adapter);
1108			device_printf(adapter->dev, "Media change is"
1109			    " blocked due to SOL/IDER session.\n");
1110			break;
1111		}
1112		EM_CORE_UNLOCK(adapter);
1113		/* falls thru */
1114	case SIOCGIFMEDIA:
1115		IOCTL_DEBUGOUT("ioctl rcv'd: \
1116		    SIOCxIFMEDIA (Get/Set Interface Media)");
1117		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118		break;
1119	case SIOCSIFCAP:
1120	    {
1121		int mask, reinit;
1122
1123		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124		reinit = 0;
1125		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126#ifdef DEVICE_POLLING
1127		if (mask & IFCAP_POLLING) {
1128			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129				error = ether_poll_register(em_poll, ifp);
1130				if (error)
1131					return (error);
1132				EM_CORE_LOCK(adapter);
1133				em_disable_intr(adapter);
1134				ifp->if_capenable |= IFCAP_POLLING;
1135				EM_CORE_UNLOCK(adapter);
1136			} else {
1137				error = ether_poll_deregister(ifp);
1138				/* Enable interrupt even in error case */
1139				EM_CORE_LOCK(adapter);
1140				em_enable_intr(adapter);
1141				ifp->if_capenable &= ~IFCAP_POLLING;
1142				EM_CORE_UNLOCK(adapter);
1143			}
1144		}
1145#endif
1146		if (mask & IFCAP_HWCSUM) {
1147			ifp->if_capenable ^= IFCAP_HWCSUM;
1148			reinit = 1;
1149		}
1150		if (mask & IFCAP_TSO4) {
1151			ifp->if_capenable ^= IFCAP_TSO4;
1152			reinit = 1;
1153		}
1154		if (mask & IFCAP_VLAN_HWTAGGING) {
1155			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156			reinit = 1;
1157		}
1158		if (mask & IFCAP_VLAN_HWFILTER) {
1159			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160			reinit = 1;
1161		}
1162		if ((mask & IFCAP_WOL) &&
1163		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164			if (mask & IFCAP_WOL_MCAST)
1165				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166			if (mask & IFCAP_WOL_MAGIC)
1167				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168		}
1169		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170			em_init(adapter);
1171		VLAN_CAPABILITIES(ifp);
1172		break;
1173	    }
1174
1175	default:
1176		error = ether_ioctl(ifp, command, data);
1177		break;
1178	}
1179
1180	return (error);
1181}
1182
1183
1184/*********************************************************************
1185 *  Init entry point
1186 *
1187 *  This routine is used in two ways. It is used by the stack as
1188 *  init entry point in network interface structure. It is also used
1189 *  by the driver as a hw/sw initialization routine to get to a
1190 *  consistent state.
1191 *
1192 *  return 0 on success, positive on failure
1193 **********************************************************************/
1194
1195static void
1196em_init_locked(struct adapter *adapter)
1197{
1198	struct ifnet	*ifp = adapter->ifp;
1199	device_t	dev = adapter->dev;
1200	u32		pba;
1201
1202	INIT_DEBUGOUT("em_init: begin");
1203
1204	EM_CORE_LOCK_ASSERT(adapter);
1205
1206	em_disable_intr(adapter);
1207	callout_stop(&adapter->timer);
1208
1209	/*
1210	 * Packet Buffer Allocation (PBA)
1211	 * Writing PBA sets the receive portion of the buffer
1212	 * the remainder is used for the transmit buffer.
1213	 */
1214	switch (adapter->hw.mac.type) {
1215	/* Total Packet Buffer on these is 48K */
1216	case e1000_82571:
1217	case e1000_82572:
1218	case e1000_80003es2lan:
1219			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220		break;
1221	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223		break;
1224	case e1000_82574:
1225	case e1000_82583:
1226			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227		break;
1228	case e1000_ich8lan:
1229		pba = E1000_PBA_8K;
1230		break;
1231	case e1000_ich9lan:
1232	case e1000_ich10lan:
1233		pba = E1000_PBA_10K;
1234		break;
1235	case e1000_pchlan:
1236	case e1000_pch2lan:
1237		pba = E1000_PBA_26K;
1238		break;
1239	default:
1240		if (adapter->max_frame_size > 8192)
1241			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242		else
1243			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244	}
1245
1246	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248
1249	/* Get the latest mac address, User can use a LAA */
1250        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251              ETHER_ADDR_LEN);
1252
1253	/* Put the address into the Receive Address Array */
1254	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255
1256	/*
1257	 * With the 82571 adapter, RAR[0] may be overwritten
1258	 * when the other port is reset, we make a duplicate
1259	 * in RAR[14] for that eventuality, this assures
1260	 * the interface continues to function.
1261	 */
1262	if (adapter->hw.mac.type == e1000_82571) {
1263		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265		    E1000_RAR_ENTRIES - 1);
1266	}
1267
1268	/* Initialize the hardware */
1269	em_reset(adapter);
1270	em_update_link_status(adapter);
1271
1272	/* Setup VLAN support, basic and offload if available */
1273	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274
1275	/* Set hardware offload abilities */
1276	ifp->if_hwassist = 0;
1277	if (ifp->if_capenable & IFCAP_TXCSUM)
1278		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279	if (ifp->if_capenable & IFCAP_TSO4)
1280		ifp->if_hwassist |= CSUM_TSO;
1281
1282	/* Configure for OS presence */
1283	em_init_manageability(adapter);
1284
1285	/* Prepare transmit descriptors and buffers */
1286	em_setup_transmit_structures(adapter);
1287	em_initialize_transmit_unit(adapter);
1288
1289	/* Setup Multicast table */
1290	em_set_multi(adapter);
1291
1292	/*
1293	** Figure out the desired mbuf
1294	** pool for doing jumbos
1295	*/
1296	if (adapter->max_frame_size <= 2048)
1297		adapter->rx_mbuf_sz = MCLBYTES;
1298	else if (adapter->max_frame_size <= 4096)
1299		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300	else
1301		adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Use real VLAN Filter support? */
1312	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314			/* Use real VLAN Filter support */
1315			em_setup_vlan_hw_support(adapter);
1316		else {
1317			u32 ctrl;
1318			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319			ctrl |= E1000_CTRL_VME;
1320			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321		}
1322	}
1323
1324	/* Don't lose promiscuous settings */
1325	em_set_promisc(adapter);
1326
1327	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329
1330	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332
1333	/* MSI/X configuration for 82574 */
1334	if (adapter->hw.mac.type == e1000_82574) {
1335		int tmp;
1336		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337		tmp |= E1000_CTRL_EXT_PBA_CLR;
1338		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339		/* Set the IVAR - interrupt vector routing. */
1340		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341	}
1342
1343#ifdef DEVICE_POLLING
1344	/*
1345	 * Only enable interrupts if we are not polling, make sure
1346	 * they are off otherwise.
1347	 */
1348	if (ifp->if_capenable & IFCAP_POLLING)
1349		em_disable_intr(adapter);
1350	else
1351#endif /* DEVICE_POLLING */
1352		em_enable_intr(adapter);
1353
1354	/* AMT based hardware can now take control from firmware */
1355	if (adapter->has_manage && adapter->has_amt)
1356		em_get_hw_control(adapter);
1357
1358	/* Don't reset the phy next time init gets called */
1359	adapter->hw.phy.reset_disable = TRUE;
1360}
1361
1362static void
1363em_init(void *arg)
1364{
1365	struct adapter *adapter = arg;
1366
1367	EM_CORE_LOCK(adapter);
1368	em_init_locked(adapter);
1369	EM_CORE_UNLOCK(adapter);
1370}
1371
1372
1373#ifdef DEVICE_POLLING
1374/*********************************************************************
1375 *
1376 *  Legacy polling routine: note this only works with single queue
1377 *
1378 *********************************************************************/
1379static int
1380em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381{
1382	struct adapter *adapter = ifp->if_softc;
1383	struct tx_ring	*txr = adapter->tx_rings;
1384	struct rx_ring	*rxr = adapter->rx_rings;
1385	u32		reg_icr;
1386	int		rx_done;
1387
1388	EM_CORE_LOCK(adapter);
1389	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390		EM_CORE_UNLOCK(adapter);
1391		return (0);
1392	}
1393
1394	if (cmd == POLL_AND_CHECK_STATUS) {
1395		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397			callout_stop(&adapter->timer);
1398			adapter->hw.mac.get_link_status = 1;
1399			em_update_link_status(adapter);
1400			callout_reset(&adapter->timer, hz,
1401			    em_local_timer, adapter);
1402		}
1403	}
1404	EM_CORE_UNLOCK(adapter);
1405
1406	em_rxeof(rxr, count, &rx_done);
1407
1408	EM_TX_LOCK(txr);
1409	em_txeof(txr);
1410#ifdef EM_MULTIQUEUE
1411	if (!drbr_empty(ifp, txr->br))
1412		em_mq_start_locked(ifp, txr, NULL);
1413#else
1414	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415		em_start_locked(ifp, txr);
1416#endif
1417	EM_TX_UNLOCK(txr);
1418
1419	return (rx_done);
1420}
1421#endif /* DEVICE_POLLING */
1422
1423
1424/*********************************************************************
1425 *
1426 *  Fast Legacy/MSI Combined Interrupt Service routine
1427 *
1428 *********************************************************************/
1429static int
1430em_irq_fast(void *arg)
1431{
1432	struct adapter	*adapter = arg;
1433	struct ifnet	*ifp;
1434	u32		reg_icr;
1435
1436	ifp = adapter->ifp;
1437
1438	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439
1440	/* Hot eject?  */
1441	if (reg_icr == 0xffffffff)
1442		return FILTER_STRAY;
1443
1444	/* Definitely not our interrupt.  */
1445	if (reg_icr == 0x0)
1446		return FILTER_STRAY;
1447
1448	/*
1449	 * Starting with the 82571 chip, bit 31 should be used to
1450	 * determine whether the interrupt belongs to us.
1451	 */
1452	if (adapter->hw.mac.type >= e1000_82571 &&
1453	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454		return FILTER_STRAY;
1455
1456	em_disable_intr(adapter);
1457	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458
1459	/* Link status change */
1460	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461		adapter->hw.mac.get_link_status = 1;
1462		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463	}
1464
1465	if (reg_icr & E1000_ICR_RXO)
1466		adapter->rx_overruns++;
1467	return FILTER_HANDLED;
1468}
1469
1470/* Combined RX/TX handler, used by Legacy and MSI */
1471static void
1472em_handle_que(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp = adapter->ifp;
1476	struct tx_ring	*txr = adapter->tx_rings;
1477	struct rx_ring	*rxr = adapter->rx_rings;
1478	bool		more;
1479
1480
1481	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483
1484		EM_TX_LOCK(txr);
1485		em_txeof(txr);
1486#ifdef EM_MULTIQUEUE
1487		if (!drbr_empty(ifp, txr->br))
1488			em_mq_start_locked(ifp, txr, NULL);
1489#else
1490		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491			em_start_locked(ifp, txr);
1492#endif
1493		em_txeof(txr);
1494		EM_TX_UNLOCK(txr);
1495		if (more) {
1496			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497			return;
1498		}
1499	}
1500
1501	em_enable_intr(adapter);
1502	return;
1503}
1504
1505
1506/*********************************************************************
1507 *
1508 *  MSIX Interrupt Service Routines
1509 *
1510 **********************************************************************/
1511static void
1512em_msix_tx(void *arg)
1513{
1514	struct tx_ring *txr = arg;
1515	struct adapter *adapter = txr->adapter;
1516	bool		more;
1517
1518	++txr->tx_irq;
1519	EM_TX_LOCK(txr);
1520	more = em_txeof(txr);
1521	EM_TX_UNLOCK(txr);
1522	if (more)
1523		taskqueue_enqueue(txr->tq, &txr->tx_task);
1524	else
1525		/* Reenable this interrupt */
1526		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527	return;
1528}
1529
1530/*********************************************************************
1531 *
1532 *  MSIX RX Interrupt Service routine
1533 *
1534 **********************************************************************/
1535
1536static void
1537em_msix_rx(void *arg)
1538{
1539	struct rx_ring	*rxr = arg;
1540	struct adapter	*adapter = rxr->adapter;
1541	bool		more;
1542
1543	++rxr->rx_irq;
1544	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545	if (more)
1546		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547	else
1548		/* Reenable this interrupt */
1549		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550	return;
1551}
1552
1553/*********************************************************************
1554 *
1555 *  MSIX Link Fast Interrupt Service routine
1556 *
1557 **********************************************************************/
1558static void
1559em_msix_link(void *arg)
1560{
1561	struct adapter	*adapter = arg;
1562	u32		reg_icr;
1563
1564	++adapter->link_irq;
1565	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566
1567	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568		adapter->hw.mac.get_link_status = 1;
1569		em_handle_link(adapter, 0);
1570	} else
1571		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572		    EM_MSIX_LINK | E1000_IMS_LSC);
1573	return;
1574}
1575
1576static void
1577em_handle_rx(void *context, int pending)
1578{
1579	struct rx_ring	*rxr = context;
1580	struct adapter	*adapter = rxr->adapter;
1581        bool            more;
1582
1583	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584	if (more)
1585		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586	else
1587		/* Reenable this interrupt */
1588		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589}
1590
1591static void
1592em_handle_tx(void *context, int pending)
1593{
1594	struct tx_ring	*txr = context;
1595	struct adapter	*adapter = txr->adapter;
1596	struct ifnet	*ifp = adapter->ifp;
1597
1598	EM_TX_LOCK(txr);
1599	em_txeof(txr);
1600#ifdef EM_MULTIQUEUE
1601	if (!drbr_empty(ifp, txr->br))
1602		em_mq_start_locked(ifp, txr, NULL);
1603#else
1604	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605		em_start_locked(ifp, txr);
1606#endif
1607	em_txeof(txr);
1608	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609	EM_TX_UNLOCK(txr);
1610}
1611
1612static void
1613em_handle_link(void *context, int pending)
1614{
1615	struct adapter	*adapter = context;
1616	struct ifnet *ifp = adapter->ifp;
1617
1618	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619		return;
1620
1621	EM_CORE_LOCK(adapter);
1622	callout_stop(&adapter->timer);
1623	em_update_link_status(adapter);
1624	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626	    EM_MSIX_LINK | E1000_IMS_LSC);
1627	EM_CORE_UNLOCK(adapter);
1628}
1629
1630
1631/*********************************************************************
1632 *
1633 *  Media Ioctl callback
1634 *
1635 *  This routine is called whenever the user queries the status of
1636 *  the interface using ifconfig.
1637 *
1638 **********************************************************************/
1639static void
1640em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641{
1642	struct adapter *adapter = ifp->if_softc;
1643	u_char fiber_type = IFM_1000_SX;
1644
1645	INIT_DEBUGOUT("em_media_status: begin");
1646
1647	EM_CORE_LOCK(adapter);
1648	em_update_link_status(adapter);
1649
1650	ifmr->ifm_status = IFM_AVALID;
1651	ifmr->ifm_active = IFM_ETHER;
1652
1653	if (!adapter->link_active) {
1654		EM_CORE_UNLOCK(adapter);
1655		return;
1656	}
1657
1658	ifmr->ifm_status |= IFM_ACTIVE;
1659
1660	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662		ifmr->ifm_active |= fiber_type | IFM_FDX;
1663	} else {
1664		switch (adapter->link_speed) {
1665		case 10:
1666			ifmr->ifm_active |= IFM_10_T;
1667			break;
1668		case 100:
1669			ifmr->ifm_active |= IFM_100_TX;
1670			break;
1671		case 1000:
1672			ifmr->ifm_active |= IFM_1000_T;
1673			break;
1674		}
1675		if (adapter->link_duplex == FULL_DUPLEX)
1676			ifmr->ifm_active |= IFM_FDX;
1677		else
1678			ifmr->ifm_active |= IFM_HDX;
1679	}
1680	EM_CORE_UNLOCK(adapter);
1681}
1682
1683/*********************************************************************
1684 *
1685 *  Media Ioctl callback
1686 *
1687 *  This routine is called when the user changes speed/duplex using
1688 *  media/mediopt option with ifconfig.
1689 *
1690 **********************************************************************/
1691static int
1692em_media_change(struct ifnet *ifp)
1693{
1694	struct adapter *adapter = ifp->if_softc;
1695	struct ifmedia  *ifm = &adapter->media;
1696
1697	INIT_DEBUGOUT("em_media_change: begin");
1698
1699	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700		return (EINVAL);
1701
1702	EM_CORE_LOCK(adapter);
1703	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704	case IFM_AUTO:
1705		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707		break;
1708	case IFM_1000_LX:
1709	case IFM_1000_SX:
1710	case IFM_1000_T:
1711		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713		break;
1714	case IFM_100_TX:
1715		adapter->hw.mac.autoneg = FALSE;
1716		adapter->hw.phy.autoneg_advertised = 0;
1717		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719		else
1720			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721		break;
1722	case IFM_10_T:
1723		adapter->hw.mac.autoneg = FALSE;
1724		adapter->hw.phy.autoneg_advertised = 0;
1725		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727		else
1728			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729		break;
1730	default:
1731		device_printf(adapter->dev, "Unsupported media type\n");
1732	}
1733
1734	em_init_locked(adapter);
1735	EM_CORE_UNLOCK(adapter);
1736
1737	return (0);
1738}
1739
1740/*********************************************************************
1741 *
1742 *  This routine maps the mbufs to tx descriptors.
1743 *
1744 *  return 0 on success, positive on failure
1745 **********************************************************************/
1746
1747static int
1748em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749{
1750	struct adapter		*adapter = txr->adapter;
1751	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1752	bus_dmamap_t		map;
1753	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1754	struct e1000_tx_desc	*ctxd = NULL;
1755	struct mbuf		*m_head;
1756	struct ether_header	*eh;
1757	struct ip		*ip = NULL;
1758	struct tcphdr		*tp = NULL;
1759	u32			txd_upper, txd_lower, txd_used, txd_saved;
1760	int			ip_off, poff;
1761	int			nsegs, i, j, first, last = 0;
1762	int			error, do_tso, tso_desc = 0;
1763
1764	m_head = *m_headp;
1765	txd_upper = txd_lower = txd_used = txd_saved = 0;
1766	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767	ip_off = poff = 0;
1768
1769	/*
1770	 * Intel recommends entire IP/TCP header length reside in a single
1771	 * buffer. If multiple descriptors are used to describe the IP and
1772	 * TCP header, each descriptor should describe one or more
1773	 * complete headers; descriptors referencing only parts of headers
1774	 * are not supported. If all layer headers are not coalesced into
1775	 * a single buffer, each buffer should not cross a 4KB boundary,
1776	 * or be larger than the maximum read request size.
1777	 * Controller also requires modifing IP/TCP header to make TSO work
1778	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1779	 * IP/TCP header into a single buffer to meet the requirement of
1780	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1781	 * which also has similiar restrictions.
1782	 */
1783	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784		if (do_tso || (m_head->m_next != NULL &&
1785		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786			if (M_WRITABLE(*m_headp) == 0) {
1787				m_head = m_dup(*m_headp, M_DONTWAIT);
1788				m_freem(*m_headp);
1789				if (m_head == NULL) {
1790					*m_headp = NULL;
1791					return (ENOBUFS);
1792				}
1793				*m_headp = m_head;
1794			}
1795		}
1796		/*
1797		 * XXX
1798		 * Assume IPv4, we don't have TSO/checksum offload support
1799		 * for IPv6 yet.
1800		 */
1801		ip_off = sizeof(struct ether_header);
1802		m_head = m_pullup(m_head, ip_off);
1803		if (m_head == NULL) {
1804			*m_headp = NULL;
1805			return (ENOBUFS);
1806		}
1807		eh = mtod(m_head, struct ether_header *);
1808		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809			ip_off = sizeof(struct ether_vlan_header);
1810			m_head = m_pullup(m_head, ip_off);
1811			if (m_head == NULL) {
1812				*m_headp = NULL;
1813				return (ENOBUFS);
1814			}
1815		}
1816		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817		if (m_head == NULL) {
1818			*m_headp = NULL;
1819			return (ENOBUFS);
1820		}
1821		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822		poff = ip_off + (ip->ip_hl << 2);
1823		if (do_tso) {
1824			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1825			if (m_head == NULL) {
1826				*m_headp = NULL;
1827				return (ENOBUFS);
1828			}
1829			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830			/*
1831			 * TSO workaround:
1832			 *   pull 4 more bytes of data into it.
1833			 */
1834			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835			if (m_head == NULL) {
1836				*m_headp = NULL;
1837				return (ENOBUFS);
1838			}
1839			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840			ip->ip_len = 0;
1841			ip->ip_sum = 0;
1842			/*
1843			 * The pseudo TCP checksum does not include TCP payload
1844			 * length so driver should recompute the checksum here
1845			 * what hardware expect to see. This is adherence of
1846			 * Microsoft's Large Send specification.
1847			 */
1848			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1853			if (m_head == NULL) {
1854				*m_headp = NULL;
1855				return (ENOBUFS);
1856			}
1857			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1858			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1859			if (m_head == NULL) {
1860				*m_headp = NULL;
1861				return (ENOBUFS);
1862			}
1863			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1864			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1865		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1866			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1867			if (m_head == NULL) {
1868				*m_headp = NULL;
1869				return (ENOBUFS);
1870			}
1871			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1872		}
1873		*m_headp = m_head;
1874	}
1875
1876	/*
1877	 * Map the packet for DMA
1878	 *
1879	 * Capture the first descriptor index,
1880	 * this descriptor will have the index
1881	 * of the EOP which is the only one that
1882	 * now gets a DONE bit writeback.
1883	 */
1884	first = txr->next_avail_desc;
1885	tx_buffer = &txr->tx_buffers[first];
1886	tx_buffer_mapped = tx_buffer;
1887	map = tx_buffer->map;
1888
1889	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1890	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1891
1892	/*
1893	 * There are two types of errors we can (try) to handle:
1894	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1895	 *   out of segments.  Defragment the mbuf chain and try again.
1896	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1897	 *   at this point in time.  Defer sending and try again later.
1898	 * All other errors, in particular EINVAL, are fatal and prevent the
1899	 * mbuf chain from ever going through.  Drop it and report error.
1900	 */
1901	if (error == EFBIG) {
1902		struct mbuf *m;
1903
1904		m = m_defrag(*m_headp, M_DONTWAIT);
1905		if (m == NULL) {
1906			adapter->mbuf_alloc_failed++;
1907			m_freem(*m_headp);
1908			*m_headp = NULL;
1909			return (ENOBUFS);
1910		}
1911		*m_headp = m;
1912
1913		/* Try it again */
1914		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1915		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1916
1917		if (error == ENOMEM) {
1918			adapter->no_tx_dma_setup++;
1919			return (error);
1920		} else if (error != 0) {
1921			adapter->no_tx_dma_setup++;
1922			m_freem(*m_headp);
1923			*m_headp = NULL;
1924			return (error);
1925		}
1926
1927	} else if (error == ENOMEM) {
1928		adapter->no_tx_dma_setup++;
1929		return (error);
1930	} else if (error != 0) {
1931		adapter->no_tx_dma_setup++;
1932		m_freem(*m_headp);
1933		*m_headp = NULL;
1934		return (error);
1935	}
1936
1937	/*
1938	 * TSO Hardware workaround, if this packet is not
1939	 * TSO, and is only a single descriptor long, and
1940	 * it follows a TSO burst, then we need to add a
1941	 * sentinel descriptor to prevent premature writeback.
1942	 */
1943	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1944		if (nsegs == 1)
1945			tso_desc = TRUE;
1946		txr->tx_tso = FALSE;
1947	}
1948
1949        if (nsegs > (txr->tx_avail - 2)) {
1950                txr->no_desc_avail++;
1951		bus_dmamap_unload(txr->txtag, map);
1952		return (ENOBUFS);
1953        }
1954	m_head = *m_headp;
1955
1956	/* Do hardware assists */
1957	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1958		em_tso_setup(txr, m_head, ip_off, ip, tp,
1959		    &txd_upper, &txd_lower);
1960		/* we need to make a final sentinel transmit desc */
1961		tso_desc = TRUE;
1962	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1963		em_transmit_checksum_setup(txr, m_head,
1964		    ip_off, ip, &txd_upper, &txd_lower);
1965
1966	i = txr->next_avail_desc;
1967
1968	/* Set up our transmit descriptors */
1969	for (j = 0; j < nsegs; j++) {
1970		bus_size_t seg_len;
1971		bus_addr_t seg_addr;
1972
1973		tx_buffer = &txr->tx_buffers[i];
1974		ctxd = &txr->tx_base[i];
1975		seg_addr = segs[j].ds_addr;
1976		seg_len  = segs[j].ds_len;
1977		/*
1978		** TSO Workaround:
1979		** If this is the last descriptor, we want to
1980		** split it so we have a small final sentinel
1981		*/
1982		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1983			seg_len -= 4;
1984			ctxd->buffer_addr = htole64(seg_addr);
1985			ctxd->lower.data = htole32(
1986			adapter->txd_cmd | txd_lower | seg_len);
1987			ctxd->upper.data =
1988			    htole32(txd_upper);
1989			if (++i == adapter->num_tx_desc)
1990				i = 0;
1991			/* Now make the sentinel */
1992			++txd_used; /* using an extra txd */
1993			ctxd = &txr->tx_base[i];
1994			tx_buffer = &txr->tx_buffers[i];
1995			ctxd->buffer_addr =
1996			    htole64(seg_addr + seg_len);
1997			ctxd->lower.data = htole32(
1998			adapter->txd_cmd | txd_lower | 4);
1999			ctxd->upper.data =
2000			    htole32(txd_upper);
2001			last = i;
2002			if (++i == adapter->num_tx_desc)
2003				i = 0;
2004		} else {
2005			ctxd->buffer_addr = htole64(seg_addr);
2006			ctxd->lower.data = htole32(
2007			adapter->txd_cmd | txd_lower | seg_len);
2008			ctxd->upper.data =
2009			    htole32(txd_upper);
2010			last = i;
2011			if (++i == adapter->num_tx_desc)
2012				i = 0;
2013		}
2014		tx_buffer->m_head = NULL;
2015		tx_buffer->next_eop = -1;
2016	}
2017
2018	txr->next_avail_desc = i;
2019	txr->tx_avail -= nsegs;
2020	if (tso_desc) /* TSO used an extra for sentinel */
2021		txr->tx_avail -= txd_used;
2022
2023	if (m_head->m_flags & M_VLANTAG) {
2024		/* Set the vlan id. */
2025		ctxd->upper.fields.special =
2026		    htole16(m_head->m_pkthdr.ether_vtag);
2027                /* Tell hardware to add tag */
2028                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2029        }
2030
2031        tx_buffer->m_head = m_head;
2032	tx_buffer_mapped->map = tx_buffer->map;
2033	tx_buffer->map = map;
2034        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2035
2036        /*
2037         * Last Descriptor of Packet
2038	 * needs End Of Packet (EOP)
2039	 * and Report Status (RS)
2040         */
2041        ctxd->lower.data |=
2042	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2043	/*
2044	 * Keep track in the first buffer which
2045	 * descriptor will be written back
2046	 */
2047	tx_buffer = &txr->tx_buffers[first];
2048	tx_buffer->next_eop = last;
2049	/* Update the watchdog time early and often */
2050	txr->watchdog_time = ticks;
2051
2052	/*
2053	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2054	 * that this frame is available to transmit.
2055	 */
2056	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2057	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2058	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2059
2060	return (0);
2061}
2062
2063static void
2064em_set_promisc(struct adapter *adapter)
2065{
2066	struct ifnet	*ifp = adapter->ifp;
2067	u32		reg_rctl;
2068
2069	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2070
2071	if (ifp->if_flags & IFF_PROMISC) {
2072		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2073		/* Turn this on if you want to see bad packets */
2074		if (em_debug_sbp)
2075			reg_rctl |= E1000_RCTL_SBP;
2076		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2077	} else if (ifp->if_flags & IFF_ALLMULTI) {
2078		reg_rctl |= E1000_RCTL_MPE;
2079		reg_rctl &= ~E1000_RCTL_UPE;
2080		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081	}
2082}
2083
2084static void
2085em_disable_promisc(struct adapter *adapter)
2086{
2087	u32	reg_rctl;
2088
2089	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2090
2091	reg_rctl &=  (~E1000_RCTL_UPE);
2092	reg_rctl &=  (~E1000_RCTL_MPE);
2093	reg_rctl &=  (~E1000_RCTL_SBP);
2094	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2095}
2096
2097
2098/*********************************************************************
2099 *  Multicast Update
2100 *
2101 *  This routine is called whenever multicast address list is updated.
2102 *
2103 **********************************************************************/
2104
2105static void
2106em_set_multi(struct adapter *adapter)
2107{
2108	struct ifnet	*ifp = adapter->ifp;
2109	struct ifmultiaddr *ifma;
2110	u32 reg_rctl = 0;
2111	u8  *mta; /* Multicast array memory */
2112	int mcnt = 0;
2113
2114	IOCTL_DEBUGOUT("em_set_multi: begin");
2115
2116	mta = adapter->mta;
2117	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2118
2119	if (adapter->hw.mac.type == e1000_82542 &&
2120	    adapter->hw.revision_id == E1000_REVISION_2) {
2121		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2122		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2123			e1000_pci_clear_mwi(&adapter->hw);
2124		reg_rctl |= E1000_RCTL_RST;
2125		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2126		msec_delay(5);
2127	}
2128
2129#if __FreeBSD_version < 800000
2130	IF_ADDR_LOCK(ifp);
2131#else
2132	if_maddr_rlock(ifp);
2133#endif
2134	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2135		if (ifma->ifma_addr->sa_family != AF_LINK)
2136			continue;
2137
2138		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2139			break;
2140
2141		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2142		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2143		mcnt++;
2144	}
2145#if __FreeBSD_version < 800000
2146	IF_ADDR_UNLOCK(ifp);
2147#else
2148	if_maddr_runlock(ifp);
2149#endif
2150	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2151		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2152		reg_rctl |= E1000_RCTL_MPE;
2153		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2154	} else
2155		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2156
2157	if (adapter->hw.mac.type == e1000_82542 &&
2158	    adapter->hw.revision_id == E1000_REVISION_2) {
2159		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2160		reg_rctl &= ~E1000_RCTL_RST;
2161		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2162		msec_delay(5);
2163		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2164			e1000_pci_set_mwi(&adapter->hw);
2165	}
2166}
2167
2168
2169/*********************************************************************
2170 *  Timer routine
2171 *
2172 *  This routine checks for link status and updates statistics.
2173 *
2174 **********************************************************************/
2175
2176static void
2177em_local_timer(void *arg)
2178{
2179	struct adapter	*adapter = arg;
2180	struct ifnet	*ifp = adapter->ifp;
2181	struct tx_ring	*txr = adapter->tx_rings;
2182
2183	EM_CORE_LOCK_ASSERT(adapter);
2184
2185	em_update_link_status(adapter);
2186	em_update_stats_counters(adapter);
2187
2188	/* Reset LAA into RAR[0] on 82571 */
2189	if ((adapter->hw.mac.type == e1000_82571) &&
2190	    e1000_get_laa_state_82571(&adapter->hw))
2191		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2192
2193	/*
2194	** Don't do TX watchdog check if we've been paused
2195	*/
2196	if (adapter->pause_frames) {
2197		adapter->pause_frames = 0;
2198		goto out;
2199	}
2200	/*
2201	** Check on the state of the TX queue(s), this
2202	** can be done without the lock because its RO
2203	** and the HUNG state will be static if set.
2204	*/
2205	for (int i = 0; i < adapter->num_queues; i++, txr++)
2206		if (txr->queue_status == EM_QUEUE_HUNG)
2207			goto hung;
2208out:
2209	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2210	return;
2211hung:
2212	/* Looks like we're hung */
2213	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2214	device_printf(adapter->dev,
2215	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2216	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2217	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2218	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2219	    "Next TX to Clean = %d\n",
2220	    txr->me, txr->tx_avail, txr->next_to_clean);
2221	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2222	adapter->watchdog_events++;
2223	em_init_locked(adapter);
2224}
2225
2226
2227static void
2228em_update_link_status(struct adapter *adapter)
2229{
2230	struct e1000_hw *hw = &adapter->hw;
2231	struct ifnet *ifp = adapter->ifp;
2232	device_t dev = adapter->dev;
2233	struct tx_ring *txr = adapter->tx_rings;
2234	u32 link_check = 0;
2235
2236	/* Get the cached link value or read phy for real */
2237	switch (hw->phy.media_type) {
2238	case e1000_media_type_copper:
2239		if (hw->mac.get_link_status) {
2240			/* Do the work to read phy */
2241			e1000_check_for_link(hw);
2242			link_check = !hw->mac.get_link_status;
2243			if (link_check) /* ESB2 fix */
2244				e1000_cfg_on_link_up(hw);
2245		} else
2246			link_check = TRUE;
2247		break;
2248	case e1000_media_type_fiber:
2249		e1000_check_for_link(hw);
2250		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2251                                 E1000_STATUS_LU);
2252		break;
2253	case e1000_media_type_internal_serdes:
2254		e1000_check_for_link(hw);
2255		link_check = adapter->hw.mac.serdes_has_link;
2256		break;
2257	default:
2258	case e1000_media_type_unknown:
2259		break;
2260	}
2261
2262	/* Now check for a transition */
2263	if (link_check && (adapter->link_active == 0)) {
2264		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2265		    &adapter->link_duplex);
2266		/* Check if we must disable SPEED_MODE bit on PCI-E */
2267		if ((adapter->link_speed != SPEED_1000) &&
2268		    ((hw->mac.type == e1000_82571) ||
2269		    (hw->mac.type == e1000_82572))) {
2270			int tarc0;
2271			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2272			tarc0 &= ~SPEED_MODE_BIT;
2273			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2274		}
2275		if (bootverbose)
2276			device_printf(dev, "Link is up %d Mbps %s\n",
2277			    adapter->link_speed,
2278			    ((adapter->link_duplex == FULL_DUPLEX) ?
2279			    "Full Duplex" : "Half Duplex"));
2280		adapter->link_active = 1;
2281		adapter->smartspeed = 0;
2282		ifp->if_baudrate = adapter->link_speed * 1000000;
2283		if_link_state_change(ifp, LINK_STATE_UP);
2284	} else if (!link_check && (adapter->link_active == 1)) {
2285		ifp->if_baudrate = adapter->link_speed = 0;
2286		adapter->link_duplex = 0;
2287		if (bootverbose)
2288			device_printf(dev, "Link is Down\n");
2289		adapter->link_active = 0;
2290		/* Link down, disable watchdog */
2291		for (int i = 0; i < adapter->num_queues; i++, txr++)
2292			txr->queue_status = EM_QUEUE_IDLE;
2293		if_link_state_change(ifp, LINK_STATE_DOWN);
2294	}
2295}
2296
2297/*********************************************************************
2298 *
2299 *  This routine disables all traffic on the adapter by issuing a
2300 *  global reset on the MAC and deallocates TX/RX buffers.
2301 *
2302 *  This routine should always be called with BOTH the CORE
2303 *  and TX locks.
2304 **********************************************************************/
2305
2306static void
2307em_stop(void *arg)
2308{
2309	struct adapter	*adapter = arg;
2310	struct ifnet	*ifp = adapter->ifp;
2311	struct tx_ring	*txr = adapter->tx_rings;
2312
2313	EM_CORE_LOCK_ASSERT(adapter);
2314
2315	INIT_DEBUGOUT("em_stop: begin");
2316
2317	em_disable_intr(adapter);
2318	callout_stop(&adapter->timer);
2319
2320	/* Tell the stack that the interface is no longer active */
2321	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2322
2323        /* Unarm watchdog timer. */
2324	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2325		EM_TX_LOCK(txr);
2326		txr->queue_status = EM_QUEUE_IDLE;
2327		EM_TX_UNLOCK(txr);
2328	}
2329
2330	e1000_reset_hw(&adapter->hw);
2331	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2332
2333	e1000_led_off(&adapter->hw);
2334	e1000_cleanup_led(&adapter->hw);
2335}
2336
2337
2338/*********************************************************************
2339 *
2340 *  Determine hardware revision.
2341 *
2342 **********************************************************************/
2343static void
2344em_identify_hardware(struct adapter *adapter)
2345{
2346	device_t dev = adapter->dev;
2347
2348	/* Make sure our PCI config space has the necessary stuff set */
2349	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2350	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2351	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2352		device_printf(dev, "Memory Access and/or Bus Master bits "
2353		    "were not set!\n");
2354		adapter->hw.bus.pci_cmd_word |=
2355		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2356		pci_write_config(dev, PCIR_COMMAND,
2357		    adapter->hw.bus.pci_cmd_word, 2);
2358	}
2359
2360	/* Save off the information about this board */
2361	adapter->hw.vendor_id = pci_get_vendor(dev);
2362	adapter->hw.device_id = pci_get_device(dev);
2363	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2364	adapter->hw.subsystem_vendor_id =
2365	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2366	adapter->hw.subsystem_device_id =
2367	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2368
2369	/* Do Shared Code Init and Setup */
2370	if (e1000_set_mac_type(&adapter->hw)) {
2371		device_printf(dev, "Setup init failure\n");
2372		return;
2373	}
2374}
2375
2376static int
2377em_allocate_pci_resources(struct adapter *adapter)
2378{
2379	device_t	dev = adapter->dev;
2380	int		rid;
2381
2382	rid = PCIR_BAR(0);
2383	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2384	    &rid, RF_ACTIVE);
2385	if (adapter->memory == NULL) {
2386		device_printf(dev, "Unable to allocate bus resource: memory\n");
2387		return (ENXIO);
2388	}
2389	adapter->osdep.mem_bus_space_tag =
2390	    rman_get_bustag(adapter->memory);
2391	adapter->osdep.mem_bus_space_handle =
2392	    rman_get_bushandle(adapter->memory);
2393	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2394
2395	/* Default to a single queue */
2396	adapter->num_queues = 1;
2397
2398	/*
2399	 * Setup MSI/X or MSI if PCI Express
2400	 */
2401	adapter->msix = em_setup_msix(adapter);
2402
2403	adapter->hw.back = &adapter->osdep;
2404
2405	return (0);
2406}
2407
2408/*********************************************************************
2409 *
2410 *  Setup the Legacy or MSI Interrupt handler
2411 *
2412 **********************************************************************/
2413int
2414em_allocate_legacy(struct adapter *adapter)
2415{
2416	device_t dev = adapter->dev;
2417	int error, rid = 0;
2418
2419	/* Manually turn off all interrupts */
2420	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2421
2422	if (adapter->msix == 1) /* using MSI */
2423		rid = 1;
2424	/* We allocate a single interrupt resource */
2425	adapter->res = bus_alloc_resource_any(dev,
2426	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2427	if (adapter->res == NULL) {
2428		device_printf(dev, "Unable to allocate bus resource: "
2429		    "interrupt\n");
2430		return (ENXIO);
2431	}
2432
2433	/*
2434	 * Allocate a fast interrupt and the associated
2435	 * deferred processing contexts.
2436	 */
2437	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2438	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2439	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2440	    taskqueue_thread_enqueue, &adapter->tq);
2441	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2442	    device_get_nameunit(adapter->dev));
2443	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2444	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2445		device_printf(dev, "Failed to register fast interrupt "
2446			    "handler: %d\n", error);
2447		taskqueue_free(adapter->tq);
2448		adapter->tq = NULL;
2449		return (error);
2450	}
2451
2452	return (0);
2453}
2454
2455/*********************************************************************
2456 *
2457 *  Setup the MSIX Interrupt handlers
2458 *   This is not really Multiqueue, rather
2459 *   its just multiple interrupt vectors.
2460 *
2461 **********************************************************************/
2462int
2463em_allocate_msix(struct adapter *adapter)
2464{
2465	device_t	dev = adapter->dev;
2466	struct		tx_ring *txr = adapter->tx_rings;
2467	struct		rx_ring *rxr = adapter->rx_rings;
2468	int		error, rid, vector = 0;
2469
2470
2471	/* Make sure all interrupts are disabled */
2472	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2473
2474	/* First set up ring resources */
2475	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2476
2477		/* RX ring */
2478		rid = vector + 1;
2479
2480		rxr->res = bus_alloc_resource_any(dev,
2481		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2482		if (rxr->res == NULL) {
2483			device_printf(dev,
2484			    "Unable to allocate bus resource: "
2485			    "RX MSIX Interrupt %d\n", i);
2486			return (ENXIO);
2487		}
2488		if ((error = bus_setup_intr(dev, rxr->res,
2489		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2490		    rxr, &rxr->tag)) != 0) {
2491			device_printf(dev, "Failed to register RX handler");
2492			return (error);
2493		}
2494#if __FreeBSD_version >= 800504
2495		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2496#endif
2497		rxr->msix = vector++; /* NOTE increment vector for TX */
2498		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2499		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2500		    taskqueue_thread_enqueue, &rxr->tq);
2501		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2502		    device_get_nameunit(adapter->dev));
2503		/*
2504		** Set the bit to enable interrupt
2505		** in E1000_IMS -- bits 20 and 21
2506		** are for RX0 and RX1, note this has
2507		** NOTHING to do with the MSIX vector
2508		*/
2509		rxr->ims = 1 << (20 + i);
2510		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2511
2512		/* TX ring */
2513		rid = vector + 1;
2514		txr->res = bus_alloc_resource_any(dev,
2515		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2516		if (txr->res == NULL) {
2517			device_printf(dev,
2518			    "Unable to allocate bus resource: "
2519			    "TX MSIX Interrupt %d\n", i);
2520			return (ENXIO);
2521		}
2522		if ((error = bus_setup_intr(dev, txr->res,
2523		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2524		    txr, &txr->tag)) != 0) {
2525			device_printf(dev, "Failed to register TX handler");
2526			return (error);
2527		}
2528#if __FreeBSD_version >= 800504
2529		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2530#endif
2531		txr->msix = vector++; /* Increment vector for next pass */
2532		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534		    taskqueue_thread_enqueue, &txr->tq);
2535		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536		    device_get_nameunit(adapter->dev));
2537		/*
2538		** Set the bit to enable interrupt
2539		** in E1000_IMS -- bits 22 and 23
2540		** are for TX0 and TX1, note this has
2541		** NOTHING to do with the MSIX vector
2542		*/
2543		txr->ims = 1 << (22 + i);
2544		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2545	}
2546
2547	/* Link interrupt */
2548	++rid;
2549	adapter->res = bus_alloc_resource_any(dev,
2550	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2551	if (!adapter->res) {
2552		device_printf(dev,"Unable to allocate "
2553		    "bus resource: Link interrupt [%d]\n", rid);
2554		return (ENXIO);
2555        }
2556	/* Set the link handler function */
2557	error = bus_setup_intr(dev, adapter->res,
2558	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2559	    em_msix_link, adapter, &adapter->tag);
2560	if (error) {
2561		adapter->res = NULL;
2562		device_printf(dev, "Failed to register LINK handler");
2563		return (error);
2564	}
2565#if __FreeBSD_version >= 800504
2566		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2567#endif
2568	adapter->linkvec = vector;
2569	adapter->ivars |=  (8 | vector) << 16;
2570	adapter->ivars |= 0x80000000;
2571
2572	return (0);
2573}
2574
2575
2576static void
2577em_free_pci_resources(struct adapter *adapter)
2578{
2579	device_t	dev = adapter->dev;
2580	struct tx_ring	*txr;
2581	struct rx_ring	*rxr;
2582	int		rid;
2583
2584
2585	/*
2586	** Release all the queue interrupt resources:
2587	*/
2588	for (int i = 0; i < adapter->num_queues; i++) {
2589		txr = &adapter->tx_rings[i];
2590		rxr = &adapter->rx_rings[i];
2591		/* an early abort? */
2592		if ((txr == NULL) || (rxr == NULL))
2593			break;
2594		rid = txr->msix +1;
2595		if (txr->tag != NULL) {
2596			bus_teardown_intr(dev, txr->res, txr->tag);
2597			txr->tag = NULL;
2598		}
2599		if (txr->res != NULL)
2600			bus_release_resource(dev, SYS_RES_IRQ,
2601			    rid, txr->res);
2602		rid = rxr->msix +1;
2603		if (rxr->tag != NULL) {
2604			bus_teardown_intr(dev, rxr->res, rxr->tag);
2605			rxr->tag = NULL;
2606		}
2607		if (rxr->res != NULL)
2608			bus_release_resource(dev, SYS_RES_IRQ,
2609			    rid, rxr->res);
2610	}
2611
2612        if (adapter->linkvec) /* we are doing MSIX */
2613                rid = adapter->linkvec + 1;
2614        else
2615                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2616
2617	if (adapter->tag != NULL) {
2618		bus_teardown_intr(dev, adapter->res, adapter->tag);
2619		adapter->tag = NULL;
2620	}
2621
2622	if (adapter->res != NULL)
2623		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2624
2625
2626	if (adapter->msix)
2627		pci_release_msi(dev);
2628
2629	if (adapter->msix_mem != NULL)
2630		bus_release_resource(dev, SYS_RES_MEMORY,
2631		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2632
2633	if (adapter->memory != NULL)
2634		bus_release_resource(dev, SYS_RES_MEMORY,
2635		    PCIR_BAR(0), adapter->memory);
2636
2637	if (adapter->flash != NULL)
2638		bus_release_resource(dev, SYS_RES_MEMORY,
2639		    EM_FLASH, adapter->flash);
2640}
2641
2642/*
2643 * Setup MSI or MSI/X
2644 */
2645static int
2646em_setup_msix(struct adapter *adapter)
2647{
2648	device_t dev = adapter->dev;
2649	int val = 0;
2650
2651
2652	/*
2653	** Setup MSI/X for Hartwell: tests have shown
2654	** use of two queues to be unstable, and to
2655	** provide no great gain anyway, so we simply
2656	** seperate the interrupts and use a single queue.
2657	*/
2658	if ((adapter->hw.mac.type == e1000_82574) &&
2659	    (em_enable_msix == TRUE)) {
2660		/* Map the MSIX BAR */
2661		int rid = PCIR_BAR(EM_MSIX_BAR);
2662		adapter->msix_mem = bus_alloc_resource_any(dev,
2663		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2664       		if (!adapter->msix_mem) {
2665			/* May not be enabled */
2666               		device_printf(adapter->dev,
2667			    "Unable to map MSIX table \n");
2668			goto msi;
2669       		}
2670		val = pci_msix_count(dev);
2671		if (val < 3) {
2672			bus_release_resource(dev, SYS_RES_MEMORY,
2673			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2674			adapter->msix_mem = NULL;
2675               		device_printf(adapter->dev,
2676			    "MSIX: insufficient vectors, using MSI\n");
2677			goto msi;
2678		}
2679		val = 3;
2680		adapter->num_queues = 1;
2681		if (pci_alloc_msix(dev, &val) == 0) {
2682			device_printf(adapter->dev,
2683			    "Using MSIX interrupts "
2684			    "with %d vectors\n", val);
2685		}
2686
2687		return (val);
2688	}
2689msi:
2690       	val = pci_msi_count(dev);
2691       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2692               	adapter->msix = 1;
2693               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2694		return (val);
2695	}
2696	/* Should only happen due to manual configuration */
2697	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2698	return (0);
2699}
2700
2701
2702/*********************************************************************
2703 *
2704 *  Initialize the hardware to a configuration
2705 *  as specified by the adapter structure.
2706 *
2707 **********************************************************************/
2708static void
2709em_reset(struct adapter *adapter)
2710{
2711	device_t	dev = adapter->dev;
2712	struct ifnet	*ifp = adapter->ifp;
2713	struct e1000_hw	*hw = &adapter->hw;
2714	u16		rx_buffer_size;
2715
2716	INIT_DEBUGOUT("em_reset: begin");
2717
2718	/* Set up smart power down as default off on newer adapters. */
2719	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2720	    hw->mac.type == e1000_82572)) {
2721		u16 phy_tmp = 0;
2722
2723		/* Speed up time to link by disabling smart power down. */
2724		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2725		phy_tmp &= ~IGP02E1000_PM_SPD;
2726		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2727	}
2728
2729	/*
2730	 * These parameters control the automatic generation (Tx) and
2731	 * response (Rx) to Ethernet PAUSE frames.
2732	 * - High water mark should allow for at least two frames to be
2733	 *   received after sending an XOFF.
2734	 * - Low water mark works best when it is very near the high water mark.
2735	 *   This allows the receiver to restart by sending XON when it has
2736	 *   drained a bit. Here we use an arbitary value of 1500 which will
2737	 *   restart after one full frame is pulled from the buffer. There
2738	 *   could be several smaller frames in the buffer and if so they will
2739	 *   not trigger the XON until their total number reduces the buffer
2740	 *   by 1500.
2741	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2742	 */
2743	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2744
2745	hw->fc.high_water = rx_buffer_size -
2746	    roundup2(adapter->max_frame_size, 1024);
2747	hw->fc.low_water = hw->fc.high_water - 1500;
2748
2749	if (hw->mac.type == e1000_80003es2lan)
2750		hw->fc.pause_time = 0xFFFF;
2751	else
2752		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2753
2754	hw->fc.send_xon = TRUE;
2755
2756        /* Set Flow control, use the tunable location if sane */
2757	hw->fc.requested_mode = adapter->fc_setting;
2758
2759	/* Workaround: no TX flow ctrl for PCH */
2760	if (hw->mac.type == e1000_pchlan)
2761                hw->fc.requested_mode = e1000_fc_rx_pause;
2762
2763	/* Override - settings for PCH2LAN, ya its magic :) */
2764	if (hw->mac.type == e1000_pch2lan) {
2765		hw->fc.high_water = 0x5C20;
2766		hw->fc.low_water = 0x5048;
2767		hw->fc.pause_time = 0x0650;
2768		hw->fc.refresh_time = 0x0400;
2769		/* Jumbos need adjusted PBA */
2770		if (ifp->if_mtu > ETHERMTU)
2771			E1000_WRITE_REG(hw, E1000_PBA, 12);
2772		else
2773			E1000_WRITE_REG(hw, E1000_PBA, 26);
2774	}
2775
2776	/* Issue a global reset */
2777	e1000_reset_hw(hw);
2778	E1000_WRITE_REG(hw, E1000_WUC, 0);
2779	em_disable_aspm(adapter);
2780
2781	if (e1000_init_hw(hw) < 0) {
2782		device_printf(dev, "Hardware Initialization Failed\n");
2783		return;
2784	}
2785
2786	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2787	e1000_get_phy_info(hw);
2788	e1000_check_for_link(hw);
2789	return;
2790}
2791
2792/*********************************************************************
2793 *
2794 *  Setup networking device structure and register an interface.
2795 *
2796 **********************************************************************/
2797static int
2798em_setup_interface(device_t dev, struct adapter *adapter)
2799{
2800	struct ifnet   *ifp;
2801
2802	INIT_DEBUGOUT("em_setup_interface: begin");
2803
2804	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2805	if (ifp == NULL) {
2806		device_printf(dev, "can not allocate ifnet structure\n");
2807		return (-1);
2808	}
2809	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2810	ifp->if_mtu = ETHERMTU;
2811	ifp->if_init =  em_init;
2812	ifp->if_softc = adapter;
2813	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2814	ifp->if_ioctl = em_ioctl;
2815	ifp->if_start = em_start;
2816	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2817	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2818	IFQ_SET_READY(&ifp->if_snd);
2819
2820	ether_ifattach(ifp, adapter->hw.mac.addr);
2821
2822	ifp->if_capabilities = ifp->if_capenable = 0;
2823
2824#ifdef EM_MULTIQUEUE
2825	/* Multiqueue tx functions */
2826	ifp->if_transmit = em_mq_start;
2827	ifp->if_qflush = em_qflush;
2828#endif
2829
2830	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2831	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2832
2833	/* Enable TSO by default, can disable with ifconfig */
2834	ifp->if_capabilities |= IFCAP_TSO4;
2835	ifp->if_capenable |= IFCAP_TSO4;
2836
2837	/*
2838	 * Tell the upper layer(s) we
2839	 * support full VLAN capability
2840	 */
2841	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2842	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2843	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2844
2845	/*
2846	** Dont turn this on by default, if vlans are
2847	** created on another pseudo device (eg. lagg)
2848	** then vlan events are not passed thru, breaking
2849	** operation, but with HW FILTER off it works. If
2850	** using vlans directly on the em driver you can
2851	** enable this and get full hardware tag filtering.
2852	*/
2853	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2854
2855#ifdef DEVICE_POLLING
2856	ifp->if_capabilities |= IFCAP_POLLING;
2857#endif
2858
2859	/* Enable only WOL MAGIC by default */
2860	if (adapter->wol) {
2861		ifp->if_capabilities |= IFCAP_WOL;
2862		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2863	}
2864
2865	/*
2866	 * Specify the media types supported by this adapter and register
2867	 * callbacks to update media and link information
2868	 */
2869	ifmedia_init(&adapter->media, IFM_IMASK,
2870	    em_media_change, em_media_status);
2871	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2872	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2873		u_char fiber_type = IFM_1000_SX;	/* default type */
2874
2875		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2876			    0, NULL);
2877		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2878	} else {
2879		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2880		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2881			    0, NULL);
2882		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2883			    0, NULL);
2884		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2885			    0, NULL);
2886		if (adapter->hw.phy.type != e1000_phy_ife) {
2887			ifmedia_add(&adapter->media,
2888				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2889			ifmedia_add(&adapter->media,
2890				IFM_ETHER | IFM_1000_T, 0, NULL);
2891		}
2892	}
2893	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2894	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2895	return (0);
2896}
2897
2898
2899/*
2900 * Manage DMA'able memory.
2901 */
2902static void
2903em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2904{
2905	if (error)
2906		return;
2907	*(bus_addr_t *) arg = segs[0].ds_addr;
2908}
2909
2910static int
2911em_dma_malloc(struct adapter *adapter, bus_size_t size,
2912        struct em_dma_alloc *dma, int mapflags)
2913{
2914	int error;
2915
2916	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2917				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2918				BUS_SPACE_MAXADDR,	/* lowaddr */
2919				BUS_SPACE_MAXADDR,	/* highaddr */
2920				NULL, NULL,		/* filter, filterarg */
2921				size,			/* maxsize */
2922				1,			/* nsegments */
2923				size,			/* maxsegsize */
2924				0,			/* flags */
2925				NULL,			/* lockfunc */
2926				NULL,			/* lockarg */
2927				&dma->dma_tag);
2928	if (error) {
2929		device_printf(adapter->dev,
2930		    "%s: bus_dma_tag_create failed: %d\n",
2931		    __func__, error);
2932		goto fail_0;
2933	}
2934
2935	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2936	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2937	if (error) {
2938		device_printf(adapter->dev,
2939		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2940		    __func__, (uintmax_t)size, error);
2941		goto fail_2;
2942	}
2943
2944	dma->dma_paddr = 0;
2945	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2946	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2947	if (error || dma->dma_paddr == 0) {
2948		device_printf(adapter->dev,
2949		    "%s: bus_dmamap_load failed: %d\n",
2950		    __func__, error);
2951		goto fail_3;
2952	}
2953
2954	return (0);
2955
2956fail_3:
2957	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2958fail_2:
2959	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2960	bus_dma_tag_destroy(dma->dma_tag);
2961fail_0:
2962	dma->dma_map = NULL;
2963	dma->dma_tag = NULL;
2964
2965	return (error);
2966}
2967
2968static void
2969em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2970{
2971	if (dma->dma_tag == NULL)
2972		return;
2973	if (dma->dma_map != NULL) {
2974		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2975		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2976		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2977		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2978		dma->dma_map = NULL;
2979	}
2980	bus_dma_tag_destroy(dma->dma_tag);
2981	dma->dma_tag = NULL;
2982}
2983
2984
2985/*********************************************************************
2986 *
2987 *  Allocate memory for the transmit and receive rings, and then
2988 *  the descriptors associated with each, called only once at attach.
2989 *
2990 **********************************************************************/
2991static int
2992em_allocate_queues(struct adapter *adapter)
2993{
2994	device_t		dev = adapter->dev;
2995	struct tx_ring		*txr = NULL;
2996	struct rx_ring		*rxr = NULL;
2997	int rsize, tsize, error = E1000_SUCCESS;
2998	int txconf = 0, rxconf = 0;
2999
3000
3001	/* Allocate the TX ring struct memory */
3002	if (!(adapter->tx_rings =
3003	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3004	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3005		device_printf(dev, "Unable to allocate TX ring memory\n");
3006		error = ENOMEM;
3007		goto fail;
3008	}
3009
3010	/* Now allocate the RX */
3011	if (!(adapter->rx_rings =
3012	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3013	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3014		device_printf(dev, "Unable to allocate RX ring memory\n");
3015		error = ENOMEM;
3016		goto rx_fail;
3017	}
3018
3019	tsize = roundup2(adapter->num_tx_desc *
3020	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3021	/*
3022	 * Now set up the TX queues, txconf is needed to handle the
3023	 * possibility that things fail midcourse and we need to
3024	 * undo memory gracefully
3025	 */
3026	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3027		/* Set up some basics */
3028		txr = &adapter->tx_rings[i];
3029		txr->adapter = adapter;
3030		txr->me = i;
3031
3032		/* Initialize the TX lock */
3033		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3034		    device_get_nameunit(dev), txr->me);
3035		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3036
3037		if (em_dma_malloc(adapter, tsize,
3038			&txr->txdma, BUS_DMA_NOWAIT)) {
3039			device_printf(dev,
3040			    "Unable to allocate TX Descriptor memory\n");
3041			error = ENOMEM;
3042			goto err_tx_desc;
3043		}
3044		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3045		bzero((void *)txr->tx_base, tsize);
3046
3047        	if (em_allocate_transmit_buffers(txr)) {
3048			device_printf(dev,
3049			    "Critical Failure setting up transmit buffers\n");
3050			error = ENOMEM;
3051			goto err_tx_desc;
3052        	}
3053#if __FreeBSD_version >= 800000
3054		/* Allocate a buf ring */
3055		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3056		    M_WAITOK, &txr->tx_mtx);
3057#endif
3058	}
3059
3060	/*
3061	 * Next the RX queues...
3062	 */
3063	rsize = roundup2(adapter->num_rx_desc *
3064	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3065	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3066		rxr = &adapter->rx_rings[i];
3067		rxr->adapter = adapter;
3068		rxr->me = i;
3069
3070		/* Initialize the RX lock */
3071		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3072		    device_get_nameunit(dev), txr->me);
3073		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3074
3075		if (em_dma_malloc(adapter, rsize,
3076			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3077			device_printf(dev,
3078			    "Unable to allocate RxDescriptor memory\n");
3079			error = ENOMEM;
3080			goto err_rx_desc;
3081		}
3082		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3083		bzero((void *)rxr->rx_base, rsize);
3084
3085        	/* Allocate receive buffers for the ring*/
3086		if (em_allocate_receive_buffers(rxr)) {
3087			device_printf(dev,
3088			    "Critical Failure setting up receive buffers\n");
3089			error = ENOMEM;
3090			goto err_rx_desc;
3091		}
3092	}
3093
3094	return (0);
3095
3096err_rx_desc:
3097	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3098		em_dma_free(adapter, &rxr->rxdma);
3099err_tx_desc:
3100	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3101		em_dma_free(adapter, &txr->txdma);
3102	free(adapter->rx_rings, M_DEVBUF);
3103rx_fail:
3104#if __FreeBSD_version >= 800000
3105	buf_ring_free(txr->br, M_DEVBUF);
3106#endif
3107	free(adapter->tx_rings, M_DEVBUF);
3108fail:
3109	return (error);
3110}
3111
3112
3113/*********************************************************************
3114 *
3115 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3116 *  the information needed to transmit a packet on the wire. This is
3117 *  called only once at attach, setup is done every reset.
3118 *
3119 **********************************************************************/
3120static int
3121em_allocate_transmit_buffers(struct tx_ring *txr)
3122{
3123	struct adapter *adapter = txr->adapter;
3124	device_t dev = adapter->dev;
3125	struct em_buffer *txbuf;
3126	int error, i;
3127
3128	/*
3129	 * Setup DMA descriptor areas.
3130	 */
3131	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3132			       1, 0,			/* alignment, bounds */
3133			       BUS_SPACE_MAXADDR,	/* lowaddr */
3134			       BUS_SPACE_MAXADDR,	/* highaddr */
3135			       NULL, NULL,		/* filter, filterarg */
3136			       EM_TSO_SIZE,		/* maxsize */
3137			       EM_MAX_SCATTER,		/* nsegments */
3138			       PAGE_SIZE,		/* maxsegsize */
3139			       0,			/* flags */
3140			       NULL,			/* lockfunc */
3141			       NULL,			/* lockfuncarg */
3142			       &txr->txtag))) {
3143		device_printf(dev,"Unable to allocate TX DMA tag\n");
3144		goto fail;
3145	}
3146
3147	if (!(txr->tx_buffers =
3148	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3149	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3150		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3151		error = ENOMEM;
3152		goto fail;
3153	}
3154
3155        /* Create the descriptor buffer dma maps */
3156	txbuf = txr->tx_buffers;
3157	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3158		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3159		if (error != 0) {
3160			device_printf(dev, "Unable to create TX DMA map\n");
3161			goto fail;
3162		}
3163	}
3164
3165	return 0;
3166fail:
3167	/* We free all, it handles case where we are in the middle */
3168	em_free_transmit_structures(adapter);
3169	return (error);
3170}
3171
3172/*********************************************************************
3173 *
3174 *  Initialize a transmit ring.
3175 *
3176 **********************************************************************/
3177static void
3178em_setup_transmit_ring(struct tx_ring *txr)
3179{
3180	struct adapter *adapter = txr->adapter;
3181	struct em_buffer *txbuf;
3182	int i;
3183
3184	/* Clear the old descriptor contents */
3185	EM_TX_LOCK(txr);
3186	bzero((void *)txr->tx_base,
3187	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3188	/* Reset indices */
3189	txr->next_avail_desc = 0;
3190	txr->next_to_clean = 0;
3191
3192	/* Free any existing tx buffers. */
3193        txbuf = txr->tx_buffers;
3194	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3195		if (txbuf->m_head != NULL) {
3196			bus_dmamap_sync(txr->txtag, txbuf->map,
3197			    BUS_DMASYNC_POSTWRITE);
3198			bus_dmamap_unload(txr->txtag, txbuf->map);
3199			m_freem(txbuf->m_head);
3200			txbuf->m_head = NULL;
3201		}
3202		/* clear the watch index */
3203		txbuf->next_eop = -1;
3204        }
3205
3206	/* Set number of descriptors available */
3207	txr->tx_avail = adapter->num_tx_desc;
3208	txr->queue_status = EM_QUEUE_IDLE;
3209
3210	/* Clear checksum offload context. */
3211	txr->last_hw_offload = 0;
3212	txr->last_hw_ipcss = 0;
3213	txr->last_hw_ipcso = 0;
3214	txr->last_hw_tucss = 0;
3215	txr->last_hw_tucso = 0;
3216
3217	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3218	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3219	EM_TX_UNLOCK(txr);
3220}
3221
3222/*********************************************************************
3223 *
3224 *  Initialize all transmit rings.
3225 *
3226 **********************************************************************/
3227static void
3228em_setup_transmit_structures(struct adapter *adapter)
3229{
3230	struct tx_ring *txr = adapter->tx_rings;
3231
3232	for (int i = 0; i < adapter->num_queues; i++, txr++)
3233		em_setup_transmit_ring(txr);
3234
3235	return;
3236}
3237
3238/*********************************************************************
3239 *
3240 *  Enable transmit unit.
3241 *
3242 **********************************************************************/
3243static void
3244em_initialize_transmit_unit(struct adapter *adapter)
3245{
3246	struct tx_ring	*txr = adapter->tx_rings;
3247	struct e1000_hw	*hw = &adapter->hw;
3248	u32	tctl, tarc, tipg = 0;
3249
3250	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3251
3252	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3253		u64 bus_addr = txr->txdma.dma_paddr;
3254		/* Base and Len of TX Ring */
3255		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3256	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3257		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3258	    	    (u32)(bus_addr >> 32));
3259		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3260	    	    (u32)bus_addr);
3261		/* Init the HEAD/TAIL indices */
3262		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3263		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3264
3265		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3266		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3267		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3268
3269		txr->queue_status = EM_QUEUE_IDLE;
3270	}
3271
3272	/* Set the default values for the Tx Inter Packet Gap timer */
3273	switch (adapter->hw.mac.type) {
3274	case e1000_82542:
3275		tipg = DEFAULT_82542_TIPG_IPGT;
3276		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3277		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3278		break;
3279	case e1000_80003es2lan:
3280		tipg = DEFAULT_82543_TIPG_IPGR1;
3281		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3282		    E1000_TIPG_IPGR2_SHIFT;
3283		break;
3284	default:
3285		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3286		    (adapter->hw.phy.media_type ==
3287		    e1000_media_type_internal_serdes))
3288			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3289		else
3290			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3291		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3292		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3293	}
3294
3295	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3296	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3297
3298	if(adapter->hw.mac.type >= e1000_82540)
3299		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3300		    adapter->tx_abs_int_delay.value);
3301
3302	if ((adapter->hw.mac.type == e1000_82571) ||
3303	    (adapter->hw.mac.type == e1000_82572)) {
3304		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3305		tarc |= SPEED_MODE_BIT;
3306		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3307	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3308		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3309		tarc |= 1;
3310		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3311		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3312		tarc |= 1;
3313		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3314	}
3315
3316	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3317	if (adapter->tx_int_delay.value > 0)
3318		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3319
3320	/* Program the Transmit Control Register */
3321	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3322	tctl &= ~E1000_TCTL_CT;
3323	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3324		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3325
3326	if (adapter->hw.mac.type >= e1000_82571)
3327		tctl |= E1000_TCTL_MULR;
3328
3329	/* This write will effectively turn on the transmit unit. */
3330	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3331
3332}
3333
3334
3335/*********************************************************************
3336 *
3337 *  Free all transmit rings.
3338 *
3339 **********************************************************************/
3340static void
3341em_free_transmit_structures(struct adapter *adapter)
3342{
3343	struct tx_ring *txr = adapter->tx_rings;
3344
3345	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3346		EM_TX_LOCK(txr);
3347		em_free_transmit_buffers(txr);
3348		em_dma_free(adapter, &txr->txdma);
3349		EM_TX_UNLOCK(txr);
3350		EM_TX_LOCK_DESTROY(txr);
3351	}
3352
3353	free(adapter->tx_rings, M_DEVBUF);
3354}
3355
3356/*********************************************************************
3357 *
3358 *  Free transmit ring related data structures.
3359 *
3360 **********************************************************************/
3361static void
3362em_free_transmit_buffers(struct tx_ring *txr)
3363{
3364	struct adapter		*adapter = txr->adapter;
3365	struct em_buffer	*txbuf;
3366
3367	INIT_DEBUGOUT("free_transmit_ring: begin");
3368
3369	if (txr->tx_buffers == NULL)
3370		return;
3371
3372	for (int i = 0; i < adapter->num_tx_desc; i++) {
3373		txbuf = &txr->tx_buffers[i];
3374		if (txbuf->m_head != NULL) {
3375			bus_dmamap_sync(txr->txtag, txbuf->map,
3376			    BUS_DMASYNC_POSTWRITE);
3377			bus_dmamap_unload(txr->txtag,
3378			    txbuf->map);
3379			m_freem(txbuf->m_head);
3380			txbuf->m_head = NULL;
3381			if (txbuf->map != NULL) {
3382				bus_dmamap_destroy(txr->txtag,
3383				    txbuf->map);
3384				txbuf->map = NULL;
3385			}
3386		} else if (txbuf->map != NULL) {
3387			bus_dmamap_unload(txr->txtag,
3388			    txbuf->map);
3389			bus_dmamap_destroy(txr->txtag,
3390			    txbuf->map);
3391			txbuf->map = NULL;
3392		}
3393	}
3394#if __FreeBSD_version >= 800000
3395	if (txr->br != NULL)
3396		buf_ring_free(txr->br, M_DEVBUF);
3397#endif
3398	if (txr->tx_buffers != NULL) {
3399		free(txr->tx_buffers, M_DEVBUF);
3400		txr->tx_buffers = NULL;
3401	}
3402	if (txr->txtag != NULL) {
3403		bus_dma_tag_destroy(txr->txtag);
3404		txr->txtag = NULL;
3405	}
3406	return;
3407}
3408
3409
3410/*********************************************************************
3411 *  The offload context is protocol specific (TCP/UDP) and thus
3412 *  only needs to be set when the protocol changes. The occasion
3413 *  of a context change can be a performance detriment, and
3414 *  might be better just disabled. The reason arises in the way
3415 *  in which the controller supports pipelined requests from the
3416 *  Tx data DMA. Up to four requests can be pipelined, and they may
3417 *  belong to the same packet or to multiple packets. However all
3418 *  requests for one packet are issued before a request is issued
3419 *  for a subsequent packet and if a request for the next packet
3420 *  requires a context change, that request will be stalled
3421 *  until the previous request completes. This means setting up
3422 *  a new context effectively disables pipelined Tx data DMA which
3423 *  in turn greatly slow down performance to send small sized
3424 *  frames.
3425 **********************************************************************/
3426static void
3427em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3428    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3429{
3430	struct adapter			*adapter = txr->adapter;
3431	struct e1000_context_desc	*TXD = NULL;
3432	struct em_buffer		*tx_buffer;
3433	int				cur, hdr_len;
3434	u32				cmd = 0;
3435	u16				offload = 0;
3436	u8				ipcso, ipcss, tucso, tucss;
3437
3438	ipcss = ipcso = tucss = tucso = 0;
3439	hdr_len = ip_off + (ip->ip_hl << 2);
3440	cur = txr->next_avail_desc;
3441
3442	/* Setup of IP header checksum. */
3443	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3444		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3445		offload |= CSUM_IP;
3446		ipcss = ip_off;
3447		ipcso = ip_off + offsetof(struct ip, ip_sum);
3448		/*
3449		 * Start offset for header checksum calculation.
3450		 * End offset for header checksum calculation.
3451		 * Offset of place to put the checksum.
3452		 */
3453		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3454		TXD->lower_setup.ip_fields.ipcss = ipcss;
3455		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3456		TXD->lower_setup.ip_fields.ipcso = ipcso;
3457		cmd |= E1000_TXD_CMD_IP;
3458	}
3459
3460	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3461 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3462 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3463 		offload |= CSUM_TCP;
3464 		tucss = hdr_len;
3465 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3466 		/*
3467 		 * Setting up new checksum offload context for every frames
3468 		 * takes a lot of processing time for hardware. This also
3469 		 * reduces performance a lot for small sized frames so avoid
3470 		 * it if driver can use previously configured checksum
3471 		 * offload context.
3472 		 */
3473 		if (txr->last_hw_offload == offload) {
3474 			if (offload & CSUM_IP) {
3475 				if (txr->last_hw_ipcss == ipcss &&
3476 				    txr->last_hw_ipcso == ipcso &&
3477 				    txr->last_hw_tucss == tucss &&
3478 				    txr->last_hw_tucso == tucso)
3479 					return;
3480 			} else {
3481 				if (txr->last_hw_tucss == tucss &&
3482 				    txr->last_hw_tucso == tucso)
3483 					return;
3484 			}
3485  		}
3486 		txr->last_hw_offload = offload;
3487 		txr->last_hw_tucss = tucss;
3488 		txr->last_hw_tucso = tucso;
3489 		/*
3490 		 * Start offset for payload checksum calculation.
3491 		 * End offset for payload checksum calculation.
3492 		 * Offset of place to put the checksum.
3493 		 */
3494		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3495 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3496 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3497 		TXD->upper_setup.tcp_fields.tucso = tucso;
3498 		cmd |= E1000_TXD_CMD_TCP;
3499 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3500 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3501 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3502 		tucss = hdr_len;
3503 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3504 		/*
3505 		 * Setting up new checksum offload context for every frames
3506 		 * takes a lot of processing time for hardware. This also
3507 		 * reduces performance a lot for small sized frames so avoid
3508 		 * it if driver can use previously configured checksum
3509 		 * offload context.
3510 		 */
3511 		if (txr->last_hw_offload == offload) {
3512 			if (offload & CSUM_IP) {
3513 				if (txr->last_hw_ipcss == ipcss &&
3514 				    txr->last_hw_ipcso == ipcso &&
3515 				    txr->last_hw_tucss == tucss &&
3516 				    txr->last_hw_tucso == tucso)
3517 					return;
3518 			} else {
3519 				if (txr->last_hw_tucss == tucss &&
3520 				    txr->last_hw_tucso == tucso)
3521 					return;
3522 			}
3523 		}
3524 		txr->last_hw_offload = offload;
3525 		txr->last_hw_tucss = tucss;
3526 		txr->last_hw_tucso = tucso;
3527 		/*
3528 		 * Start offset for header checksum calculation.
3529 		 * End offset for header checksum calculation.
3530 		 * Offset of place to put the checksum.
3531 		 */
3532		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3533 		TXD->upper_setup.tcp_fields.tucss = tucss;
3534 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3535 		TXD->upper_setup.tcp_fields.tucso = tucso;
3536  	}
3537
3538 	if (offload & CSUM_IP) {
3539 		txr->last_hw_ipcss = ipcss;
3540 		txr->last_hw_ipcso = ipcso;
3541  	}
3542
3543	TXD->tcp_seg_setup.data = htole32(0);
3544	TXD->cmd_and_length =
3545	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3546	tx_buffer = &txr->tx_buffers[cur];
3547	tx_buffer->m_head = NULL;
3548	tx_buffer->next_eop = -1;
3549
3550	if (++cur == adapter->num_tx_desc)
3551		cur = 0;
3552
3553	txr->tx_avail--;
3554	txr->next_avail_desc = cur;
3555}
3556
3557
3558/**********************************************************************
3559 *
3560 *  Setup work for hardware segmentation offload (TSO)
3561 *
3562 **********************************************************************/
3563static void
3564em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3565    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3566{
3567	struct adapter			*adapter = txr->adapter;
3568	struct e1000_context_desc	*TXD;
3569	struct em_buffer		*tx_buffer;
3570	int cur, hdr_len;
3571
3572	/*
3573	 * In theory we can use the same TSO context if and only if
3574	 * frame is the same type(IP/TCP) and the same MSS. However
3575	 * checking whether a frame has the same IP/TCP structure is
3576	 * hard thing so just ignore that and always restablish a
3577	 * new TSO context.
3578	 */
3579	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3580	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3581		      E1000_TXD_DTYP_D |	/* Data descr type */
3582		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3583
3584	/* IP and/or TCP header checksum calculation and insertion. */
3585	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3586
3587	cur = txr->next_avail_desc;
3588	tx_buffer = &txr->tx_buffers[cur];
3589	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3590
3591	/*
3592	 * Start offset for header checksum calculation.
3593	 * End offset for header checksum calculation.
3594	 * Offset of place put the checksum.
3595	 */
3596	TXD->lower_setup.ip_fields.ipcss = ip_off;
3597	TXD->lower_setup.ip_fields.ipcse =
3598	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3599	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3600	/*
3601	 * Start offset for payload checksum calculation.
3602	 * End offset for payload checksum calculation.
3603	 * Offset of place to put the checksum.
3604	 */
3605	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3606	TXD->upper_setup.tcp_fields.tucse = 0;
3607	TXD->upper_setup.tcp_fields.tucso =
3608	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3609	/*
3610	 * Payload size per packet w/o any headers.
3611	 * Length of all headers up to payload.
3612	 */
3613	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3614	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3615
3616	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3617				E1000_TXD_CMD_DEXT |	/* Extended descr */
3618				E1000_TXD_CMD_TSE |	/* TSE context */
3619				E1000_TXD_CMD_IP |	/* Do IP csum */
3620				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3621				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3622
3623	tx_buffer->m_head = NULL;
3624	tx_buffer->next_eop = -1;
3625
3626	if (++cur == adapter->num_tx_desc)
3627		cur = 0;
3628
3629	txr->tx_avail--;
3630	txr->next_avail_desc = cur;
3631	txr->tx_tso = TRUE;
3632}
3633
3634
3635/**********************************************************************
3636 *
3637 *  Examine each tx_buffer in the used queue. If the hardware is done
3638 *  processing the packet then free associated resources. The
3639 *  tx_buffer is put back on the free queue.
3640 *
3641 **********************************************************************/
3642static bool
3643em_txeof(struct tx_ring *txr)
3644{
3645	struct adapter	*adapter = txr->adapter;
3646        int first, last, done, processed;
3647        struct em_buffer *tx_buffer;
3648        struct e1000_tx_desc   *tx_desc, *eop_desc;
3649	struct ifnet   *ifp = adapter->ifp;
3650
3651	EM_TX_LOCK_ASSERT(txr);
3652
3653	/* No work, make sure watchdog is off */
3654        if (txr->tx_avail == adapter->num_tx_desc) {
3655		txr->queue_status = EM_QUEUE_IDLE;
3656                return (FALSE);
3657	}
3658
3659	processed = 0;
3660        first = txr->next_to_clean;
3661        tx_desc = &txr->tx_base[first];
3662        tx_buffer = &txr->tx_buffers[first];
3663	last = tx_buffer->next_eop;
3664        eop_desc = &txr->tx_base[last];
3665
3666	/*
3667	 * What this does is get the index of the
3668	 * first descriptor AFTER the EOP of the
3669	 * first packet, that way we can do the
3670	 * simple comparison on the inner while loop.
3671	 */
3672	if (++last == adapter->num_tx_desc)
3673 		last = 0;
3674	done = last;
3675
3676        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3677            BUS_DMASYNC_POSTREAD);
3678
3679        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3680		/* We clean the range of the packet */
3681		while (first != done) {
3682                	tx_desc->upper.data = 0;
3683                	tx_desc->lower.data = 0;
3684                	tx_desc->buffer_addr = 0;
3685                	++txr->tx_avail;
3686			++processed;
3687
3688			if (tx_buffer->m_head) {
3689				bus_dmamap_sync(txr->txtag,
3690				    tx_buffer->map,
3691				    BUS_DMASYNC_POSTWRITE);
3692				bus_dmamap_unload(txr->txtag,
3693				    tx_buffer->map);
3694                        	m_freem(tx_buffer->m_head);
3695                        	tx_buffer->m_head = NULL;
3696                	}
3697			tx_buffer->next_eop = -1;
3698			txr->watchdog_time = ticks;
3699
3700	                if (++first == adapter->num_tx_desc)
3701				first = 0;
3702
3703	                tx_buffer = &txr->tx_buffers[first];
3704			tx_desc = &txr->tx_base[first];
3705		}
3706		++ifp->if_opackets;
3707		/* See if we can continue to the next packet */
3708		last = tx_buffer->next_eop;
3709		if (last != -1) {
3710        		eop_desc = &txr->tx_base[last];
3711			/* Get new done point */
3712			if (++last == adapter->num_tx_desc) last = 0;
3713			done = last;
3714		} else
3715			break;
3716        }
3717        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3718            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3719
3720        txr->next_to_clean = first;
3721
3722	/*
3723	** Watchdog calculation, we know there's
3724	** work outstanding or the first return
3725	** would have been taken, so none processed
3726	** for too long indicates a hang. local timer
3727	** will examine this and do a reset if needed.
3728	*/
3729	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3730		txr->queue_status = EM_QUEUE_HUNG;
3731
3732        /*
3733         * If we have enough room, clear IFF_DRV_OACTIVE
3734         * to tell the stack that it is OK to send packets.
3735         */
3736        if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3737                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3738		/* Disable watchdog if all clean */
3739                if (txr->tx_avail == adapter->num_tx_desc) {
3740			txr->queue_status = EM_QUEUE_IDLE;
3741			return (FALSE);
3742		}
3743        }
3744
3745	return (TRUE);
3746}
3747
3748
3749/*********************************************************************
3750 *
3751 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3752 *
3753 **********************************************************************/
3754static void
3755em_refresh_mbufs(struct rx_ring *rxr, int limit)
3756{
3757	struct adapter		*adapter = rxr->adapter;
3758	struct mbuf		*m;
3759	bus_dma_segment_t	segs[1];
3760	struct em_buffer	*rxbuf;
3761	int			i, error, nsegs, cleaned;
3762
3763	i = rxr->next_to_refresh;
3764	cleaned = -1;
3765	while (i != limit) {
3766		rxbuf = &rxr->rx_buffers[i];
3767		if (rxbuf->m_head == NULL) {
3768			m = m_getjcl(M_DONTWAIT, MT_DATA,
3769			    M_PKTHDR, adapter->rx_mbuf_sz);
3770			/*
3771			** If we have a temporary resource shortage
3772			** that causes a failure, just abort refresh
3773			** for now, we will return to this point when
3774			** reinvoked from em_rxeof.
3775			*/
3776			if (m == NULL)
3777				goto update;
3778		} else
3779			m = rxbuf->m_head;
3780
3781		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3782		m->m_flags |= M_PKTHDR;
3783		m->m_data = m->m_ext.ext_buf;
3784
3785		/* Use bus_dma machinery to setup the memory mapping  */
3786		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3787		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3788		if (error != 0) {
3789			printf("Refresh mbufs: hdr dmamap load"
3790			    " failure - %d\n", error);
3791			m_free(m);
3792			rxbuf->m_head = NULL;
3793			goto update;
3794		}
3795		rxbuf->m_head = m;
3796		bus_dmamap_sync(rxr->rxtag,
3797		    rxbuf->map, BUS_DMASYNC_PREREAD);
3798		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3799
3800		cleaned = i;
3801		/* Calculate next index */
3802		if (++i == adapter->num_rx_desc)
3803			i = 0;
3804		rxr->next_to_refresh = i;
3805	}
3806update:
3807	/*
3808	** Update the tail pointer only if,
3809	** and as far as we have refreshed.
3810	*/
3811	if (cleaned != -1) /* Update tail index */
3812		E1000_WRITE_REG(&adapter->hw,
3813		    E1000_RDT(rxr->me), cleaned);
3814
3815	return;
3816}
3817
3818
3819/*********************************************************************
3820 *
3821 *  Allocate memory for rx_buffer structures. Since we use one
3822 *  rx_buffer per received packet, the maximum number of rx_buffer's
3823 *  that we'll need is equal to the number of receive descriptors
3824 *  that we've allocated.
3825 *
3826 **********************************************************************/
3827static int
3828em_allocate_receive_buffers(struct rx_ring *rxr)
3829{
3830	struct adapter		*adapter = rxr->adapter;
3831	device_t		dev = adapter->dev;
3832	struct em_buffer	*rxbuf;
3833	int			error;
3834
3835	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3836	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3837	if (rxr->rx_buffers == NULL) {
3838		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3839		return (ENOMEM);
3840	}
3841
3842	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3843				1, 0,			/* alignment, bounds */
3844				BUS_SPACE_MAXADDR,	/* lowaddr */
3845				BUS_SPACE_MAXADDR,	/* highaddr */
3846				NULL, NULL,		/* filter, filterarg */
3847				MJUM9BYTES,		/* maxsize */
3848				1,			/* nsegments */
3849				MJUM9BYTES,		/* maxsegsize */
3850				0,			/* flags */
3851				NULL,			/* lockfunc */
3852				NULL,			/* lockarg */
3853				&rxr->rxtag);
3854	if (error) {
3855		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3856		    __func__, error);
3857		goto fail;
3858	}
3859
3860	rxbuf = rxr->rx_buffers;
3861	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3862		rxbuf = &rxr->rx_buffers[i];
3863		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3864		    &rxbuf->map);
3865		if (error) {
3866			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3867			    __func__, error);
3868			goto fail;
3869		}
3870	}
3871
3872	return (0);
3873
3874fail:
3875	em_free_receive_structures(adapter);
3876	return (error);
3877}
3878
3879
3880/*********************************************************************
3881 *
3882 *  Initialize a receive ring and its buffers.
3883 *
3884 **********************************************************************/
3885static int
3886em_setup_receive_ring(struct rx_ring *rxr)
3887{
3888	struct	adapter 	*adapter = rxr->adapter;
3889	struct em_buffer	*rxbuf;
3890	bus_dma_segment_t	seg[1];
3891	int			rsize, nsegs, error;
3892
3893
3894	/* Clear the ring contents */
3895	EM_RX_LOCK(rxr);
3896	rsize = roundup2(adapter->num_rx_desc *
3897	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3898	bzero((void *)rxr->rx_base, rsize);
3899
3900	/*
3901	** Free current RX buffer structs and their mbufs
3902	*/
3903	for (int i = 0; i < adapter->num_rx_desc; i++) {
3904		rxbuf = &rxr->rx_buffers[i];
3905		if (rxbuf->m_head != NULL) {
3906			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3907			    BUS_DMASYNC_POSTREAD);
3908			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3909			m_freem(rxbuf->m_head);
3910		}
3911	}
3912
3913	/* Now replenish the mbufs */
3914	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3915
3916		rxbuf = &rxr->rx_buffers[j];
3917		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3918		    M_PKTHDR, adapter->rx_mbuf_sz);
3919		if (rxbuf->m_head == NULL)
3920			return (ENOBUFS);
3921		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3922		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3923		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3924
3925		/* Get the memory mapping */
3926		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3927		    rxbuf->map, rxbuf->m_head, seg,
3928		    &nsegs, BUS_DMA_NOWAIT);
3929		if (error != 0) {
3930			m_freem(rxbuf->m_head);
3931			rxbuf->m_head = NULL;
3932			return (error);
3933		}
3934		bus_dmamap_sync(rxr->rxtag,
3935		    rxbuf->map, BUS_DMASYNC_PREREAD);
3936
3937		/* Update descriptor */
3938		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3939	}
3940
3941
3942	/* Setup our descriptor indices */
3943	rxr->next_to_check = 0;
3944	rxr->next_to_refresh = 0;
3945
3946	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3947	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3948
3949	EM_RX_UNLOCK(rxr);
3950	return (0);
3951}
3952
3953/*********************************************************************
3954 *
3955 *  Initialize all receive rings.
3956 *
3957 **********************************************************************/
3958static int
3959em_setup_receive_structures(struct adapter *adapter)
3960{
3961	struct rx_ring *rxr = adapter->rx_rings;
3962	int j;
3963
3964	for (j = 0; j < adapter->num_queues; j++, rxr++)
3965		if (em_setup_receive_ring(rxr))
3966			goto fail;
3967
3968	return (0);
3969fail:
3970	/*
3971	 * Free RX buffers allocated so far, we will only handle
3972	 * the rings that completed, the failing case will have
3973	 * cleaned up for itself. 'j' failed, so its the terminus.
3974	 */
3975	for (int i = 0; i < j; ++i) {
3976		rxr = &adapter->rx_rings[i];
3977		for (int n = 0; n < adapter->num_rx_desc; n++) {
3978			struct em_buffer *rxbuf;
3979			rxbuf = &rxr->rx_buffers[n];
3980			if (rxbuf->m_head != NULL) {
3981				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3982			  	  BUS_DMASYNC_POSTREAD);
3983				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3984				m_freem(rxbuf->m_head);
3985				rxbuf->m_head = NULL;
3986			}
3987		}
3988	}
3989
3990	return (ENOBUFS);
3991}
3992
3993/*********************************************************************
3994 *
3995 *  Free all receive rings.
3996 *
3997 **********************************************************************/
3998static void
3999em_free_receive_structures(struct adapter *adapter)
4000{
4001	struct rx_ring *rxr = adapter->rx_rings;
4002
4003	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4004		em_free_receive_buffers(rxr);
4005		/* Free the ring memory as well */
4006		em_dma_free(adapter, &rxr->rxdma);
4007		EM_RX_LOCK_DESTROY(rxr);
4008	}
4009
4010	free(adapter->rx_rings, M_DEVBUF);
4011}
4012
4013
4014/*********************************************************************
4015 *
4016 *  Free receive ring data structures
4017 *
4018 **********************************************************************/
4019static void
4020em_free_receive_buffers(struct rx_ring *rxr)
4021{
4022	struct adapter		*adapter = rxr->adapter;
4023	struct em_buffer	*rxbuf = NULL;
4024
4025	INIT_DEBUGOUT("free_receive_buffers: begin");
4026
4027	if (rxr->rx_buffers != NULL) {
4028		for (int i = 0; i < adapter->num_rx_desc; i++) {
4029			rxbuf = &rxr->rx_buffers[i];
4030			if (rxbuf->map != NULL) {
4031				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4032				    BUS_DMASYNC_POSTREAD);
4033				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4034				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4035			}
4036			if (rxbuf->m_head != NULL) {
4037				m_freem(rxbuf->m_head);
4038				rxbuf->m_head = NULL;
4039			}
4040		}
4041		free(rxr->rx_buffers, M_DEVBUF);
4042		rxr->rx_buffers = NULL;
4043	}
4044
4045	if (rxr->rxtag != NULL) {
4046		bus_dma_tag_destroy(rxr->rxtag);
4047		rxr->rxtag = NULL;
4048	}
4049
4050	return;
4051}
4052
4053
4054/*********************************************************************
4055 *
4056 *  Enable receive unit.
4057 *
4058 **********************************************************************/
4059#define MAX_INTS_PER_SEC	8000
4060#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4061
4062static void
4063em_initialize_receive_unit(struct adapter *adapter)
4064{
4065	struct rx_ring	*rxr = adapter->rx_rings;
4066	struct ifnet	*ifp = adapter->ifp;
4067	struct e1000_hw	*hw = &adapter->hw;
4068	u64	bus_addr;
4069	u32	rctl, rxcsum;
4070
4071	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4072
4073	/*
4074	 * Make sure receives are disabled while setting
4075	 * up the descriptor ring
4076	 */
4077	rctl = E1000_READ_REG(hw, E1000_RCTL);
4078	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4079
4080	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4081	    adapter->rx_abs_int_delay.value);
4082	/*
4083	 * Set the interrupt throttling rate. Value is calculated
4084	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4085	 */
4086	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4087
4088	/*
4089	** When using MSIX interrupts we need to throttle
4090	** using the EITR register (82574 only)
4091	*/
4092	if (hw->mac.type == e1000_82574)
4093		for (int i = 0; i < 4; i++)
4094			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4095			    DEFAULT_ITR);
4096
4097	/* Disable accelerated ackknowledge */
4098	if (adapter->hw.mac.type == e1000_82574)
4099		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4100
4101	if (ifp->if_capenable & IFCAP_RXCSUM) {
4102		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4103		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4104		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4105	}
4106
4107	/*
4108	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4109	** long latencies are observed, like Lenovo X60. This
4110	** change eliminates the problem, but since having positive
4111	** values in RDTR is a known source of problems on other
4112	** platforms another solution is being sought.
4113	*/
4114	if (hw->mac.type == e1000_82573)
4115		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4116
4117	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4118		/* Setup the Base and Length of the Rx Descriptor Ring */
4119		bus_addr = rxr->rxdma.dma_paddr;
4120		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4121		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4122		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4123		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4124		/* Setup the Head and Tail Descriptor Pointers */
4125		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4126		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4127	}
4128
4129	/* Set early receive threshold on appropriate hw */
4130	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4131	    (adapter->hw.mac.type == e1000_pch2lan) ||
4132	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4133	    (ifp->if_mtu > ETHERMTU)) {
4134		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4135		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4136		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4137	}
4138
4139	if (adapter->hw.mac.type == e1000_pch2lan) {
4140		if (ifp->if_mtu > ETHERMTU)
4141			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4142		else
4143			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4144	}
4145
4146	/* Setup the Receive Control Register */
4147	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4148	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4149	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4150	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4151
4152        /* Strip the CRC */
4153        rctl |= E1000_RCTL_SECRC;
4154
4155        /* Make sure VLAN Filters are off */
4156        rctl &= ~E1000_RCTL_VFE;
4157	rctl &= ~E1000_RCTL_SBP;
4158
4159	if (adapter->rx_mbuf_sz == MCLBYTES)
4160		rctl |= E1000_RCTL_SZ_2048;
4161	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4162		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4163	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4164		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4165
4166	if (ifp->if_mtu > ETHERMTU)
4167		rctl |= E1000_RCTL_LPE;
4168	else
4169		rctl &= ~E1000_RCTL_LPE;
4170
4171	/* Write out the settings */
4172	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4173
4174	return;
4175}
4176
4177
4178/*********************************************************************
4179 *
4180 *  This routine executes in interrupt context. It replenishes
4181 *  the mbufs in the descriptor and sends data which has been
4182 *  dma'ed into host memory to upper layer.
4183 *
4184 *  We loop at most count times if count is > 0, or until done if
4185 *  count < 0.
4186 *
4187 *  For polling we also now return the number of cleaned packets
4188 *********************************************************************/
4189static bool
4190em_rxeof(struct rx_ring *rxr, int count, int *done)
4191{
4192	struct adapter		*adapter = rxr->adapter;
4193	struct ifnet		*ifp = adapter->ifp;
4194	struct mbuf		*mp, *sendmp;
4195	u8			status = 0;
4196	u16 			len;
4197	int			i, processed, rxdone = 0;
4198	bool			eop;
4199	struct e1000_rx_desc	*cur;
4200
4201	EM_RX_LOCK(rxr);
4202
4203	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4204
4205		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4206			break;
4207
4208		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4209		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4210
4211		cur = &rxr->rx_base[i];
4212		status = cur->status;
4213		mp = sendmp = NULL;
4214
4215		if ((status & E1000_RXD_STAT_DD) == 0)
4216			break;
4217
4218		len = le16toh(cur->length);
4219		eop = (status & E1000_RXD_STAT_EOP) != 0;
4220
4221		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4222		    (rxr->discard == TRUE)) {
4223			ifp->if_ierrors++;
4224			++rxr->rx_discarded;
4225			if (!eop) /* Catch subsequent segs */
4226				rxr->discard = TRUE;
4227			else
4228				rxr->discard = FALSE;
4229			em_rx_discard(rxr, i);
4230			goto next_desc;
4231		}
4232
4233		/* Assign correct length to the current fragment */
4234		mp = rxr->rx_buffers[i].m_head;
4235		mp->m_len = len;
4236
4237		/* Trigger for refresh */
4238		rxr->rx_buffers[i].m_head = NULL;
4239
4240		/* First segment? */
4241		if (rxr->fmp == NULL) {
4242			mp->m_pkthdr.len = len;
4243			rxr->fmp = rxr->lmp = mp;
4244		} else {
4245			/* Chain mbuf's together */
4246			mp->m_flags &= ~M_PKTHDR;
4247			rxr->lmp->m_next = mp;
4248			rxr->lmp = mp;
4249			rxr->fmp->m_pkthdr.len += len;
4250		}
4251
4252		if (eop) {
4253			--count;
4254			sendmp = rxr->fmp;
4255			sendmp->m_pkthdr.rcvif = ifp;
4256			ifp->if_ipackets++;
4257			em_receive_checksum(cur, sendmp);
4258#ifndef __NO_STRICT_ALIGNMENT
4259			if (adapter->max_frame_size >
4260			    (MCLBYTES - ETHER_ALIGN) &&
4261			    em_fixup_rx(rxr) != 0)
4262				goto skip;
4263#endif
4264			if (status & E1000_RXD_STAT_VP) {
4265				sendmp->m_pkthdr.ether_vtag =
4266				    (le16toh(cur->special) &
4267				    E1000_RXD_SPC_VLAN_MASK);
4268				sendmp->m_flags |= M_VLANTAG;
4269			}
4270#ifdef EM_MULTIQUEUE
4271			sendmp->m_pkthdr.flowid = rxr->msix;
4272			sendmp->m_flags |= M_FLOWID;
4273#endif
4274#ifndef __NO_STRICT_ALIGNMENT
4275skip:
4276#endif
4277			rxr->fmp = rxr->lmp = NULL;
4278		}
4279next_desc:
4280		/* Zero out the receive descriptors status. */
4281		cur->status = 0;
4282		++rxdone;	/* cumulative for POLL */
4283		++processed;
4284
4285		/* Advance our pointers to the next descriptor. */
4286		if (++i == adapter->num_rx_desc)
4287			i = 0;
4288
4289		/* Send to the stack */
4290		if (sendmp != NULL) {
4291			rxr->next_to_check = i;
4292			EM_RX_UNLOCK(rxr);
4293			(*ifp->if_input)(ifp, sendmp);
4294			EM_RX_LOCK(rxr);
4295			i = rxr->next_to_check;
4296		}
4297
4298		/* Only refresh mbufs every 8 descriptors */
4299		if (processed == 8) {
4300			em_refresh_mbufs(rxr, i);
4301			processed = 0;
4302		}
4303	}
4304
4305	/* Catch any remaining refresh work */
4306	em_refresh_mbufs(rxr, i);
4307
4308	rxr->next_to_check = i;
4309	if (done != NULL)
4310		*done = rxdone;
4311	EM_RX_UNLOCK(rxr);
4312
4313	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4314}
4315
4316static __inline void
4317em_rx_discard(struct rx_ring *rxr, int i)
4318{
4319	struct em_buffer	*rbuf;
4320
4321	rbuf = &rxr->rx_buffers[i];
4322	/* Free any previous pieces */
4323	if (rxr->fmp != NULL) {
4324		rxr->fmp->m_flags |= M_PKTHDR;
4325		m_freem(rxr->fmp);
4326		rxr->fmp = NULL;
4327		rxr->lmp = NULL;
4328	}
4329	/*
4330	** Free buffer and allow em_refresh_mbufs()
4331	** to clean up and recharge buffer.
4332	*/
4333	if (rbuf->m_head) {
4334		m_free(rbuf->m_head);
4335		rbuf->m_head = NULL;
4336	}
4337	return;
4338}
4339
4340#ifndef __NO_STRICT_ALIGNMENT
4341/*
4342 * When jumbo frames are enabled we should realign entire payload on
4343 * architecures with strict alignment. This is serious design mistake of 8254x
4344 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4345 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4346 * payload. On architecures without strict alignment restrictions 8254x still
4347 * performs unaligned memory access which would reduce the performance too.
4348 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4349 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4350 * existing mbuf chain.
4351 *
4352 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4353 * not used at all on architectures with strict alignment.
4354 */
4355static int
4356em_fixup_rx(struct rx_ring *rxr)
4357{
4358	struct adapter *adapter = rxr->adapter;
4359	struct mbuf *m, *n;
4360	int error;
4361
4362	error = 0;
4363	m = rxr->fmp;
4364	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4365		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4366		m->m_data += ETHER_HDR_LEN;
4367	} else {
4368		MGETHDR(n, M_DONTWAIT, MT_DATA);
4369		if (n != NULL) {
4370			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4371			m->m_data += ETHER_HDR_LEN;
4372			m->m_len -= ETHER_HDR_LEN;
4373			n->m_len = ETHER_HDR_LEN;
4374			M_MOVE_PKTHDR(n, m);
4375			n->m_next = m;
4376			rxr->fmp = n;
4377		} else {
4378			adapter->dropped_pkts++;
4379			m_freem(rxr->fmp);
4380			rxr->fmp = NULL;
4381			error = ENOMEM;
4382		}
4383	}
4384
4385	return (error);
4386}
4387#endif
4388
4389/*********************************************************************
4390 *
4391 *  Verify that the hardware indicated that the checksum is valid.
4392 *  Inform the stack about the status of checksum so that stack
4393 *  doesn't spend time verifying the checksum.
4394 *
4395 *********************************************************************/
4396static void
4397em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4398{
4399	/* Ignore Checksum bit is set */
4400	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4401		mp->m_pkthdr.csum_flags = 0;
4402		return;
4403	}
4404
4405	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4406		/* Did it pass? */
4407		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4408			/* IP Checksum Good */
4409			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4410			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4411
4412		} else {
4413			mp->m_pkthdr.csum_flags = 0;
4414		}
4415	}
4416
4417	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4418		/* Did it pass? */
4419		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4420			mp->m_pkthdr.csum_flags |=
4421			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4422			mp->m_pkthdr.csum_data = htons(0xffff);
4423		}
4424	}
4425}
4426
4427/*
4428 * This routine is run via an vlan
4429 * config EVENT
4430 */
4431static void
4432em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4433{
4434	struct adapter	*adapter = ifp->if_softc;
4435	u32		index, bit;
4436
4437	if (ifp->if_softc !=  arg)   /* Not our event */
4438		return;
4439
4440	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4441                return;
4442
4443	EM_CORE_LOCK(adapter);
4444	index = (vtag >> 5) & 0x7F;
4445	bit = vtag & 0x1F;
4446	adapter->shadow_vfta[index] |= (1 << bit);
4447	++adapter->num_vlans;
4448	/* Re-init to load the changes */
4449	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4450		em_init_locked(adapter);
4451	EM_CORE_UNLOCK(adapter);
4452}
4453
4454/*
4455 * This routine is run via an vlan
4456 * unconfig EVENT
4457 */
4458static void
4459em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4460{
4461	struct adapter	*adapter = ifp->if_softc;
4462	u32		index, bit;
4463
4464	if (ifp->if_softc !=  arg)
4465		return;
4466
4467	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4468                return;
4469
4470	EM_CORE_LOCK(adapter);
4471	index = (vtag >> 5) & 0x7F;
4472	bit = vtag & 0x1F;
4473	adapter->shadow_vfta[index] &= ~(1 << bit);
4474	--adapter->num_vlans;
4475	/* Re-init to load the changes */
4476	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4477		em_init_locked(adapter);
4478	EM_CORE_UNLOCK(adapter);
4479}
4480
4481static void
4482em_setup_vlan_hw_support(struct adapter *adapter)
4483{
4484	struct e1000_hw *hw = &adapter->hw;
4485	u32             reg;
4486
4487	/*
4488	** We get here thru init_locked, meaning
4489	** a soft reset, this has already cleared
4490	** the VFTA and other state, so if there
4491	** have been no vlan's registered do nothing.
4492	*/
4493	if (adapter->num_vlans == 0)
4494                return;
4495
4496	/*
4497	** A soft reset zero's out the VFTA, so
4498	** we need to repopulate it now.
4499	*/
4500	for (int i = 0; i < EM_VFTA_SIZE; i++)
4501                if (adapter->shadow_vfta[i] != 0)
4502			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4503                            i, adapter->shadow_vfta[i]);
4504
4505	reg = E1000_READ_REG(hw, E1000_CTRL);
4506	reg |= E1000_CTRL_VME;
4507	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4508
4509	/* Enable the Filter Table */
4510	reg = E1000_READ_REG(hw, E1000_RCTL);
4511	reg &= ~E1000_RCTL_CFIEN;
4512	reg |= E1000_RCTL_VFE;
4513	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4514}
4515
4516static void
4517em_enable_intr(struct adapter *adapter)
4518{
4519	struct e1000_hw *hw = &adapter->hw;
4520	u32 ims_mask = IMS_ENABLE_MASK;
4521
4522	if (hw->mac.type == e1000_82574) {
4523		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4524		ims_mask |= EM_MSIX_MASK;
4525	}
4526	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4527}
4528
4529static void
4530em_disable_intr(struct adapter *adapter)
4531{
4532	struct e1000_hw *hw = &adapter->hw;
4533
4534	if (hw->mac.type == e1000_82574)
4535		E1000_WRITE_REG(hw, EM_EIAC, 0);
4536	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4537}
4538
4539/*
4540 * Bit of a misnomer, what this really means is
4541 * to enable OS management of the system... aka
4542 * to disable special hardware management features
4543 */
4544static void
4545em_init_manageability(struct adapter *adapter)
4546{
4547	/* A shared code workaround */
4548#define E1000_82542_MANC2H E1000_MANC2H
4549	if (adapter->has_manage) {
4550		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4551		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4552
4553		/* disable hardware interception of ARP */
4554		manc &= ~(E1000_MANC_ARP_EN);
4555
4556                /* enable receiving management packets to the host */
4557		manc |= E1000_MANC_EN_MNG2HOST;
4558#define E1000_MNG2HOST_PORT_623 (1 << 5)
4559#define E1000_MNG2HOST_PORT_664 (1 << 6)
4560		manc2h |= E1000_MNG2HOST_PORT_623;
4561		manc2h |= E1000_MNG2HOST_PORT_664;
4562		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4563		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4564	}
4565}
4566
4567/*
4568 * Give control back to hardware management
4569 * controller if there is one.
4570 */
4571static void
4572em_release_manageability(struct adapter *adapter)
4573{
4574	if (adapter->has_manage) {
4575		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4576
4577		/* re-enable hardware interception of ARP */
4578		manc |= E1000_MANC_ARP_EN;
4579		manc &= ~E1000_MANC_EN_MNG2HOST;
4580
4581		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4582	}
4583}
4584
4585/*
4586 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4587 * For ASF and Pass Through versions of f/w this means
4588 * that the driver is loaded. For AMT version type f/w
4589 * this means that the network i/f is open.
4590 */
4591static void
4592em_get_hw_control(struct adapter *adapter)
4593{
4594	u32 ctrl_ext, swsm;
4595
4596	if (adapter->hw.mac.type == e1000_82573) {
4597		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4598		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4599		    swsm | E1000_SWSM_DRV_LOAD);
4600		return;
4601	}
4602	/* else */
4603	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4604	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4605	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4606	return;
4607}
4608
4609/*
4610 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4611 * For ASF and Pass Through versions of f/w this means that
4612 * the driver is no longer loaded. For AMT versions of the
4613 * f/w this means that the network i/f is closed.
4614 */
4615static void
4616em_release_hw_control(struct adapter *adapter)
4617{
4618	u32 ctrl_ext, swsm;
4619
4620	if (!adapter->has_manage)
4621		return;
4622
4623	if (adapter->hw.mac.type == e1000_82573) {
4624		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4625		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4626		    swsm & ~E1000_SWSM_DRV_LOAD);
4627		return;
4628	}
4629	/* else */
4630	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4631	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4632	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4633	return;
4634}
4635
4636static int
4637em_is_valid_ether_addr(u8 *addr)
4638{
4639	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4640
4641	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4642		return (FALSE);
4643	}
4644
4645	return (TRUE);
4646}
4647
4648/*
4649** Parse the interface capabilities with regard
4650** to both system management and wake-on-lan for
4651** later use.
4652*/
4653static void
4654em_get_wakeup(device_t dev)
4655{
4656	struct adapter	*adapter = device_get_softc(dev);
4657	u16		eeprom_data = 0, device_id, apme_mask;
4658
4659	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4660	apme_mask = EM_EEPROM_APME;
4661
4662	switch (adapter->hw.mac.type) {
4663	case e1000_82573:
4664	case e1000_82583:
4665		adapter->has_amt = TRUE;
4666		/* Falls thru */
4667	case e1000_82571:
4668	case e1000_82572:
4669	case e1000_80003es2lan:
4670		if (adapter->hw.bus.func == 1) {
4671			e1000_read_nvm(&adapter->hw,
4672			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4673			break;
4674		} else
4675			e1000_read_nvm(&adapter->hw,
4676			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4677		break;
4678	case e1000_ich8lan:
4679	case e1000_ich9lan:
4680	case e1000_ich10lan:
4681	case e1000_pchlan:
4682	case e1000_pch2lan:
4683		apme_mask = E1000_WUC_APME;
4684		adapter->has_amt = TRUE;
4685		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4686		break;
4687	default:
4688		e1000_read_nvm(&adapter->hw,
4689		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4690		break;
4691	}
4692	if (eeprom_data & apme_mask)
4693		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4694	/*
4695         * We have the eeprom settings, now apply the special cases
4696         * where the eeprom may be wrong or the board won't support
4697         * wake on lan on a particular port
4698	 */
4699	device_id = pci_get_device(dev);
4700        switch (device_id) {
4701	case E1000_DEV_ID_82571EB_FIBER:
4702		/* Wake events only supported on port A for dual fiber
4703		 * regardless of eeprom setting */
4704		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4705		    E1000_STATUS_FUNC_1)
4706			adapter->wol = 0;
4707		break;
4708	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4709	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4710	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4711                /* if quad port adapter, disable WoL on all but port A */
4712		if (global_quad_port_a != 0)
4713			adapter->wol = 0;
4714		/* Reset for multiple quad port adapters */
4715		if (++global_quad_port_a == 4)
4716			global_quad_port_a = 0;
4717                break;
4718	}
4719	return;
4720}
4721
4722
4723/*
4724 * Enable PCI Wake On Lan capability
4725 */
4726static void
4727em_enable_wakeup(device_t dev)
4728{
4729	struct adapter	*adapter = device_get_softc(dev);
4730	struct ifnet	*ifp = adapter->ifp;
4731	u32		pmc, ctrl, ctrl_ext, rctl;
4732	u16     	status;
4733
4734	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4735		return;
4736
4737	/* Advertise the wakeup capability */
4738	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4739	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4740	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4741	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4742
4743	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4744	    (adapter->hw.mac.type == e1000_pchlan) ||
4745	    (adapter->hw.mac.type == e1000_ich9lan) ||
4746	    (adapter->hw.mac.type == e1000_ich10lan)) {
4747		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4748		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4749	}
4750
4751	/* Keep the laser running on Fiber adapters */
4752	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4753	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4754		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4755		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4756		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4757	}
4758
4759	/*
4760	** Determine type of Wakeup: note that wol
4761	** is set with all bits on by default.
4762	*/
4763	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4764		adapter->wol &= ~E1000_WUFC_MAG;
4765
4766	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4767		adapter->wol &= ~E1000_WUFC_MC;
4768	else {
4769		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4770		rctl |= E1000_RCTL_MPE;
4771		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4772	}
4773
4774	if ((adapter->hw.mac.type == e1000_pchlan) ||
4775	    (adapter->hw.mac.type == e1000_pch2lan)) {
4776		if (em_enable_phy_wakeup(adapter))
4777			return;
4778	} else {
4779		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4780		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4781	}
4782
4783	if (adapter->hw.phy.type == e1000_phy_igp_3)
4784		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4785
4786        /* Request PME */
4787        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4788	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4789	if (ifp->if_capenable & IFCAP_WOL)
4790		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4791        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4792
4793	return;
4794}
4795
4796/*
4797** WOL in the newer chipset interfaces (pchlan)
4798** require thing to be copied into the phy
4799*/
4800static int
4801em_enable_phy_wakeup(struct adapter *adapter)
4802{
4803	struct e1000_hw *hw = &adapter->hw;
4804	u32 mreg, ret = 0;
4805	u16 preg;
4806
4807	/* copy MAC RARs to PHY RARs */
4808	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4809
4810	/* copy MAC MTA to PHY MTA */
4811	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4812		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4813		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4814		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4815		    (u16)((mreg >> 16) & 0xFFFF));
4816	}
4817
4818	/* configure PHY Rx Control register */
4819	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4820	mreg = E1000_READ_REG(hw, E1000_RCTL);
4821	if (mreg & E1000_RCTL_UPE)
4822		preg |= BM_RCTL_UPE;
4823	if (mreg & E1000_RCTL_MPE)
4824		preg |= BM_RCTL_MPE;
4825	preg &= ~(BM_RCTL_MO_MASK);
4826	if (mreg & E1000_RCTL_MO_3)
4827		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4828				<< BM_RCTL_MO_SHIFT);
4829	if (mreg & E1000_RCTL_BAM)
4830		preg |= BM_RCTL_BAM;
4831	if (mreg & E1000_RCTL_PMCF)
4832		preg |= BM_RCTL_PMCF;
4833	mreg = E1000_READ_REG(hw, E1000_CTRL);
4834	if (mreg & E1000_CTRL_RFCE)
4835		preg |= BM_RCTL_RFCE;
4836	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4837
4838	/* enable PHY wakeup in MAC register */
4839	E1000_WRITE_REG(hw, E1000_WUC,
4840	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4841	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4842
4843	/* configure and enable PHY wakeup in PHY registers */
4844	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4845	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4846
4847	/* activate PHY wakeup */
4848	ret = hw->phy.ops.acquire(hw);
4849	if (ret) {
4850		printf("Could not acquire PHY\n");
4851		return ret;
4852	}
4853	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4854	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4855	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4856	if (ret) {
4857		printf("Could not read PHY page 769\n");
4858		goto out;
4859	}
4860	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4861	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4862	if (ret)
4863		printf("Could not set PHY Host Wakeup bit\n");
4864out:
4865	hw->phy.ops.release(hw);
4866
4867	return ret;
4868}
4869
4870static void
4871em_led_func(void *arg, int onoff)
4872{
4873	struct adapter	*adapter = arg;
4874
4875	EM_CORE_LOCK(adapter);
4876	if (onoff) {
4877		e1000_setup_led(&adapter->hw);
4878		e1000_led_on(&adapter->hw);
4879	} else {
4880		e1000_led_off(&adapter->hw);
4881		e1000_cleanup_led(&adapter->hw);
4882	}
4883	EM_CORE_UNLOCK(adapter);
4884}
4885
4886/*
4887** Disable the L0S and L1 LINK states
4888*/
4889static void
4890em_disable_aspm(struct adapter *adapter)
4891{
4892	int		base, reg;
4893	u16		link_cap,link_ctrl;
4894	device_t	dev = adapter->dev;
4895
4896	switch (adapter->hw.mac.type) {
4897		case e1000_82573:
4898		case e1000_82574:
4899		case e1000_82583:
4900			break;
4901		default:
4902			return;
4903	}
4904	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4905		return;
4906	reg = base + PCIR_EXPRESS_LINK_CAP;
4907	link_cap = pci_read_config(dev, reg, 2);
4908	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4909		return;
4910	reg = base + PCIR_EXPRESS_LINK_CTL;
4911	link_ctrl = pci_read_config(dev, reg, 2);
4912	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4913	pci_write_config(dev, reg, link_ctrl, 2);
4914	return;
4915}
4916
4917/**********************************************************************
4918 *
4919 *  Update the board statistics counters.
4920 *
4921 **********************************************************************/
4922static void
4923em_update_stats_counters(struct adapter *adapter)
4924{
4925	struct ifnet   *ifp;
4926
4927	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4928	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4929		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4930		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4931	}
4932	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4933	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4934	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4935	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4936
4937	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4938	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4939	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4940	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4941	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4942	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4943	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4944	/*
4945	** For watchdog management we need to know if we have been
4946	** paused during the last interval, so capture that here.
4947	*/
4948	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4949	adapter->stats.xoffrxc += adapter->pause_frames;
4950	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4951	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4952	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4953	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4954	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4955	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4956	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4957	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4958	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4959	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4960	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4961	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4962
4963	/* For the 64-bit byte counters the low dword must be read first. */
4964	/* Both registers clear on the read of the high dword */
4965
4966	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4967	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4968	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4969	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4970
4971	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4972	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4973	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4974	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4975	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4976
4977	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4978	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4979
4980	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4981	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4982	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4983	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4984	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4985	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4986	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4987	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4988	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4989	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4990
4991	/* Interrupt Counts */
4992
4993	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4994	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4995	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4996	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4997	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4998	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4999	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5000	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5001	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5002
5003	if (adapter->hw.mac.type >= e1000_82543) {
5004		adapter->stats.algnerrc +=
5005		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5006		adapter->stats.rxerrc +=
5007		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5008		adapter->stats.tncrs +=
5009		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5010		adapter->stats.cexterr +=
5011		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5012		adapter->stats.tsctc +=
5013		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5014		adapter->stats.tsctfc +=
5015		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5016	}
5017	ifp = adapter->ifp;
5018
5019	ifp->if_collisions = adapter->stats.colc;
5020
5021	/* Rx Errors */
5022	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5023	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5024	    adapter->stats.ruc + adapter->stats.roc +
5025	    adapter->stats.mpc + adapter->stats.cexterr;
5026
5027	/* Tx Errors */
5028	ifp->if_oerrors = adapter->stats.ecol +
5029	    adapter->stats.latecol + adapter->watchdog_events;
5030}
5031
5032/* Export a single 32-bit register via a read-only sysctl. */
5033static int
5034em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5035{
5036	struct adapter *adapter;
5037	u_int val;
5038
5039	adapter = oidp->oid_arg1;
5040	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5041	return (sysctl_handle_int(oidp, &val, 0, req));
5042}
5043
5044/*
5045 * Add sysctl variables, one per statistic, to the system.
5046 */
5047static void
5048em_add_hw_stats(struct adapter *adapter)
5049{
5050	device_t dev = adapter->dev;
5051
5052	struct tx_ring *txr = adapter->tx_rings;
5053	struct rx_ring *rxr = adapter->rx_rings;
5054
5055	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5056	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5057	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5058	struct e1000_hw_stats *stats = &adapter->stats;
5059
5060	struct sysctl_oid *stat_node, *queue_node, *int_node;
5061	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5062
5063#define QUEUE_NAME_LEN 32
5064	char namebuf[QUEUE_NAME_LEN];
5065
5066	/* Driver Statistics */
5067	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5068			CTLFLAG_RD, &adapter->link_irq,
5069			"Link MSIX IRQ Handled");
5070	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5071			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5072			 "Std mbuf failed");
5073	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5074			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5075			 "Std mbuf cluster failed");
5076	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5077			CTLFLAG_RD, &adapter->dropped_pkts,
5078			"Driver dropped packets");
5079	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5080			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5081			"Driver tx dma failure in xmit");
5082	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5083			CTLFLAG_RD, &adapter->rx_overruns,
5084			"RX overruns");
5085	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5086			CTLFLAG_RD, &adapter->watchdog_events,
5087			"Watchdog timeouts");
5088
5089	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5090			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5091			em_sysctl_reg_handler, "IU",
5092			"Device Control Register");
5093	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5094			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5095			em_sysctl_reg_handler, "IU",
5096			"Receiver Control Register");
5097	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5098			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5099			"Flow Control High Watermark");
5100	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5101			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5102			"Flow Control Low Watermark");
5103
5104	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5105		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5106		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5107					    CTLFLAG_RD, NULL, "Queue Name");
5108		queue_list = SYSCTL_CHILDREN(queue_node);
5109
5110		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5111				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5112				E1000_TDH(txr->me),
5113				em_sysctl_reg_handler, "IU",
5114 				"Transmit Descriptor Head");
5115		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5116				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5117				E1000_TDT(txr->me),
5118				em_sysctl_reg_handler, "IU",
5119 				"Transmit Descriptor Tail");
5120		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5121				CTLFLAG_RD, &txr->tx_irq,
5122				"Queue MSI-X Transmit Interrupts");
5123		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5124				CTLFLAG_RD, &txr->no_desc_avail,
5125				"Queue No Descriptor Available");
5126
5127		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5128				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5129				E1000_RDH(rxr->me),
5130				em_sysctl_reg_handler, "IU",
5131				"Receive Descriptor Head");
5132		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5133				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5134				E1000_RDT(rxr->me),
5135				em_sysctl_reg_handler, "IU",
5136				"Receive Descriptor Tail");
5137		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5138				CTLFLAG_RD, &rxr->rx_irq,
5139				"Queue MSI-X Receive Interrupts");
5140	}
5141
5142	/* MAC stats get their own sub node */
5143
5144	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5145				    CTLFLAG_RD, NULL, "Statistics");
5146	stat_list = SYSCTL_CHILDREN(stat_node);
5147
5148	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5149			CTLFLAG_RD, &stats->ecol,
5150			"Excessive collisions");
5151	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5152			CTLFLAG_RD, &stats->scc,
5153			"Single collisions");
5154	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5155			CTLFLAG_RD, &stats->mcc,
5156			"Multiple collisions");
5157	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5158			CTLFLAG_RD, &stats->latecol,
5159			"Late collisions");
5160	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5161			CTLFLAG_RD, &stats->colc,
5162			"Collision Count");
5163	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5164			CTLFLAG_RD, &adapter->stats.symerrs,
5165			"Symbol Errors");
5166	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5167			CTLFLAG_RD, &adapter->stats.sec,
5168			"Sequence Errors");
5169	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5170			CTLFLAG_RD, &adapter->stats.dc,
5171			"Defer Count");
5172	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5173			CTLFLAG_RD, &adapter->stats.mpc,
5174			"Missed Packets");
5175	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5176			CTLFLAG_RD, &adapter->stats.rnbc,
5177			"Receive No Buffers");
5178	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5179			CTLFLAG_RD, &adapter->stats.ruc,
5180			"Receive Undersize");
5181	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5182			CTLFLAG_RD, &adapter->stats.rfc,
5183			"Fragmented Packets Received ");
5184	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5185			CTLFLAG_RD, &adapter->stats.roc,
5186			"Oversized Packets Received");
5187	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5188			CTLFLAG_RD, &adapter->stats.rjc,
5189			"Recevied Jabber");
5190	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5191			CTLFLAG_RD, &adapter->stats.rxerrc,
5192			"Receive Errors");
5193	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5194			CTLFLAG_RD, &adapter->stats.crcerrs,
5195			"CRC errors");
5196	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5197			CTLFLAG_RD, &adapter->stats.algnerrc,
5198			"Alignment Errors");
5199	/* On 82575 these are collision counts */
5200	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5201			CTLFLAG_RD, &adapter->stats.cexterr,
5202			"Collision/Carrier extension errors");
5203	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5204			CTLFLAG_RD, &adapter->stats.xonrxc,
5205			"XON Received");
5206	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5207			CTLFLAG_RD, &adapter->stats.xontxc,
5208			"XON Transmitted");
5209	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5210			CTLFLAG_RD, &adapter->stats.xoffrxc,
5211			"XOFF Received");
5212	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5213			CTLFLAG_RD, &adapter->stats.xofftxc,
5214			"XOFF Transmitted");
5215
5216	/* Packet Reception Stats */
5217	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5218			CTLFLAG_RD, &adapter->stats.tpr,
5219			"Total Packets Received ");
5220	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5221			CTLFLAG_RD, &adapter->stats.gprc,
5222			"Good Packets Received");
5223	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5224			CTLFLAG_RD, &adapter->stats.bprc,
5225			"Broadcast Packets Received");
5226	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5227			CTLFLAG_RD, &adapter->stats.mprc,
5228			"Multicast Packets Received");
5229	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5230			CTLFLAG_RD, &adapter->stats.prc64,
5231			"64 byte frames received ");
5232	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5233			CTLFLAG_RD, &adapter->stats.prc127,
5234			"65-127 byte frames received");
5235	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5236			CTLFLAG_RD, &adapter->stats.prc255,
5237			"128-255 byte frames received");
5238	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5239			CTLFLAG_RD, &adapter->stats.prc511,
5240			"256-511 byte frames received");
5241	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5242			CTLFLAG_RD, &adapter->stats.prc1023,
5243			"512-1023 byte frames received");
5244	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5245			CTLFLAG_RD, &adapter->stats.prc1522,
5246			"1023-1522 byte frames received");
5247 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5248 			CTLFLAG_RD, &adapter->stats.gorc,
5249 			"Good Octets Received");
5250
5251	/* Packet Transmission Stats */
5252 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5253 			CTLFLAG_RD, &adapter->stats.gotc,
5254 			"Good Octets Transmitted");
5255	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5256			CTLFLAG_RD, &adapter->stats.tpt,
5257			"Total Packets Transmitted");
5258	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5259			CTLFLAG_RD, &adapter->stats.gptc,
5260			"Good Packets Transmitted");
5261	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5262			CTLFLAG_RD, &adapter->stats.bptc,
5263			"Broadcast Packets Transmitted");
5264	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5265			CTLFLAG_RD, &adapter->stats.mptc,
5266			"Multicast Packets Transmitted");
5267	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5268			CTLFLAG_RD, &adapter->stats.ptc64,
5269			"64 byte frames transmitted ");
5270	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5271			CTLFLAG_RD, &adapter->stats.ptc127,
5272			"65-127 byte frames transmitted");
5273	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5274			CTLFLAG_RD, &adapter->stats.ptc255,
5275			"128-255 byte frames transmitted");
5276	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5277			CTLFLAG_RD, &adapter->stats.ptc511,
5278			"256-511 byte frames transmitted");
5279	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5280			CTLFLAG_RD, &adapter->stats.ptc1023,
5281			"512-1023 byte frames transmitted");
5282	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5283			CTLFLAG_RD, &adapter->stats.ptc1522,
5284			"1024-1522 byte frames transmitted");
5285	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5286			CTLFLAG_RD, &adapter->stats.tsctc,
5287			"TSO Contexts Transmitted");
5288	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5289			CTLFLAG_RD, &adapter->stats.tsctfc,
5290			"TSO Contexts Failed");
5291
5292
5293	/* Interrupt Stats */
5294
5295	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5296				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5297	int_list = SYSCTL_CHILDREN(int_node);
5298
5299	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5300			CTLFLAG_RD, &adapter->stats.iac,
5301			"Interrupt Assertion Count");
5302
5303	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5304			CTLFLAG_RD, &adapter->stats.icrxptc,
5305			"Interrupt Cause Rx Pkt Timer Expire Count");
5306
5307	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5308			CTLFLAG_RD, &adapter->stats.icrxatc,
5309			"Interrupt Cause Rx Abs Timer Expire Count");
5310
5311	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5312			CTLFLAG_RD, &adapter->stats.ictxptc,
5313			"Interrupt Cause Tx Pkt Timer Expire Count");
5314
5315	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5316			CTLFLAG_RD, &adapter->stats.ictxatc,
5317			"Interrupt Cause Tx Abs Timer Expire Count");
5318
5319	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5320			CTLFLAG_RD, &adapter->stats.ictxqec,
5321			"Interrupt Cause Tx Queue Empty Count");
5322
5323	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5324			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5325			"Interrupt Cause Tx Queue Min Thresh Count");
5326
5327	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5328			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5329			"Interrupt Cause Rx Desc Min Thresh Count");
5330
5331	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5332			CTLFLAG_RD, &adapter->stats.icrxoc,
5333			"Interrupt Cause Receiver Overrun Count");
5334}
5335
5336/**********************************************************************
5337 *
5338 *  This routine provides a way to dump out the adapter eeprom,
5339 *  often a useful debug/service tool. This only dumps the first
5340 *  32 words, stuff that matters is in that extent.
5341 *
5342 **********************************************************************/
5343static int
5344em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5345{
5346	struct adapter *adapter;
5347	int error;
5348	int result;
5349
5350	result = -1;
5351	error = sysctl_handle_int(oidp, &result, 0, req);
5352
5353	if (error || !req->newptr)
5354		return (error);
5355
5356	/*
5357	 * This value will cause a hex dump of the
5358	 * first 32 16-bit words of the EEPROM to
5359	 * the screen.
5360	 */
5361	if (result == 1) {
5362		adapter = (struct adapter *)arg1;
5363		em_print_nvm_info(adapter);
5364        }
5365
5366	return (error);
5367}
5368
5369static void
5370em_print_nvm_info(struct adapter *adapter)
5371{
5372	u16	eeprom_data;
5373	int	i, j, row = 0;
5374
5375	/* Its a bit crude, but it gets the job done */
5376	printf("\nInterface EEPROM Dump:\n");
5377	printf("Offset\n0x0000  ");
5378	for (i = 0, j = 0; i < 32; i++, j++) {
5379		if (j == 8) { /* Make the offset block */
5380			j = 0; ++row;
5381			printf("\n0x00%x0  ",row);
5382		}
5383		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5384		printf("%04x ", eeprom_data);
5385	}
5386	printf("\n");
5387}
5388
5389static int
5390em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5391{
5392	struct em_int_delay_info *info;
5393	struct adapter *adapter;
5394	u32 regval;
5395	int error, usecs, ticks;
5396
5397	info = (struct em_int_delay_info *)arg1;
5398	usecs = info->value;
5399	error = sysctl_handle_int(oidp, &usecs, 0, req);
5400	if (error != 0 || req->newptr == NULL)
5401		return (error);
5402	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5403		return (EINVAL);
5404	info->value = usecs;
5405	ticks = EM_USECS_TO_TICKS(usecs);
5406
5407	adapter = info->adapter;
5408
5409	EM_CORE_LOCK(adapter);
5410	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5411	regval = (regval & ~0xffff) | (ticks & 0xffff);
5412	/* Handle a few special cases. */
5413	switch (info->offset) {
5414	case E1000_RDTR:
5415		break;
5416	case E1000_TIDV:
5417		if (ticks == 0) {
5418			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5419			/* Don't write 0 into the TIDV register. */
5420			regval++;
5421		} else
5422			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5423		break;
5424	}
5425	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5426	EM_CORE_UNLOCK(adapter);
5427	return (0);
5428}
5429
5430static void
5431em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5432	const char *description, struct em_int_delay_info *info,
5433	int offset, int value)
5434{
5435	info->adapter = adapter;
5436	info->offset = offset;
5437	info->value = value;
5438	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5439	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5440	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5441	    info, 0, em_sysctl_int_delay, "I", description);
5442}
5443
5444static void
5445em_add_rx_process_limit(struct adapter *adapter, const char *name,
5446	const char *description, int *limit, int value)
5447{
5448	*limit = value;
5449	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5450	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5451	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5452}
5453
5454static void
5455em_set_flow_cntrl(struct adapter *adapter, const char *name,
5456	const char *description, int *limit, int value)
5457{
5458	*limit = value;
5459	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5460	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5461	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5462}
5463
5464static int
5465em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5466{
5467	struct adapter *adapter;
5468	int error;
5469	int result;
5470
5471	result = -1;
5472	error = sysctl_handle_int(oidp, &result, 0, req);
5473
5474	if (error || !req->newptr)
5475		return (error);
5476
5477	if (result == 1) {
5478		adapter = (struct adapter *)arg1;
5479		em_print_debug_info(adapter);
5480        }
5481
5482	return (error);
5483}
5484
5485/*
5486** This routine is meant to be fluid, add whatever is
5487** needed for debugging a problem.  -jfv
5488*/
5489static void
5490em_print_debug_info(struct adapter *adapter)
5491{
5492	device_t dev = adapter->dev;
5493	struct tx_ring *txr = adapter->tx_rings;
5494	struct rx_ring *rxr = adapter->rx_rings;
5495
5496	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5497		printf("Interface is RUNNING ");
5498	else
5499		printf("Interface is NOT RUNNING\n");
5500	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5501		printf("and ACTIVE\n");
5502	else
5503		printf("and INACTIVE\n");
5504
5505	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5506	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5507	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5508	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5509	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5510	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5511	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5512	device_printf(dev, "TX descriptors avail = %d\n",
5513	    txr->tx_avail);
5514	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5515	    txr->no_desc_avail);
5516	device_printf(dev, "RX discarded packets = %ld\n",
5517	    rxr->rx_discarded);
5518	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5519	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5520}
5521