if_em.c revision 217318
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 217318 2011-01-12 19:53:23Z mdf $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.1.9";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275static void	em_disable_aspm(struct adapter *);
276
277static int	em_irq_fast(void *);
278
279/* MSIX handlers */
280static void	em_msix_tx(void *);
281static void	em_msix_rx(void *);
282static void	em_msix_link(void *);
283static void	em_handle_tx(void *context, int pending);
284static void	em_handle_rx(void *context, int pending);
285static void	em_handle_link(void *context, int pending);
286
287static void	em_add_rx_process_limit(struct adapter *, const char *,
288		    const char *, int *, int);
289static void	em_set_flow_cntrl(struct adapter *, const char *,
290		    const char *, int *, int);
291
292static __inline void em_rx_discard(struct rx_ring *, int);
293
294#ifdef DEVICE_POLLING
295static poll_handler_t em_poll;
296#endif /* POLLING */
297
298/*********************************************************************
299 *  FreeBSD Device Interface Entry Points
300 *********************************************************************/
301
302static device_method_t em_methods[] = {
303	/* Device interface */
304	DEVMETHOD(device_probe, em_probe),
305	DEVMETHOD(device_attach, em_attach),
306	DEVMETHOD(device_detach, em_detach),
307	DEVMETHOD(device_shutdown, em_shutdown),
308	DEVMETHOD(device_suspend, em_suspend),
309	DEVMETHOD(device_resume, em_resume),
310	{0, 0}
311};
312
313static driver_t em_driver = {
314	"em", em_methods, sizeof(struct adapter),
315};
316
317devclass_t em_devclass;
318DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319MODULE_DEPEND(em, pci, 1, 1, 1);
320MODULE_DEPEND(em, ether, 1, 1, 1);
321
322/*********************************************************************
323 *  Tunable default values.
324 *********************************************************************/
325
326#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
327#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
328#define M_TSO_LEN			66
329
330/* Allow common code without TSO */
331#ifndef CSUM_TSO
332#define CSUM_TSO	0
333#endif
334
335static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344
345static int em_rxd = EM_DEFAULT_RXD;
346static int em_txd = EM_DEFAULT_TXD;
347TUNABLE_INT("hw.em.rxd", &em_rxd);
348TUNABLE_INT("hw.em.txd", &em_txd);
349
350static int em_smart_pwr_down = FALSE;
351TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352
353/* Controls whether promiscuous also shows bad packets */
354static int em_debug_sbp = FALSE;
355TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356
357static int em_enable_msix = TRUE;
358TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359
360/* How many packets rxeof tries to clean at a time */
361static int em_rx_process_limit = 100;
362TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364/* Flow control setting - default to FULL */
365static int em_fc_setting = e1000_fc_full;
366TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368/* Global used in WOL setup with multiport cards */
369static int global_quad_port_a = 0;
370
371/*********************************************************************
372 *  Device identification routine
373 *
374 *  em_probe determines if the driver should be loaded on
375 *  adapter based on PCI vendor/device id of the adapter.
376 *
377 *  return BUS_PROBE_DEFAULT on success, positive on failure
378 *********************************************************************/
379
380static int
381em_probe(device_t dev)
382{
383	char		adapter_name[60];
384	u16		pci_vendor_id = 0;
385	u16		pci_device_id = 0;
386	u16		pci_subvendor_id = 0;
387	u16		pci_subdevice_id = 0;
388	em_vendor_info_t *ent;
389
390	INIT_DEBUGOUT("em_probe: begin");
391
392	pci_vendor_id = pci_get_vendor(dev);
393	if (pci_vendor_id != EM_VENDOR_ID)
394		return (ENXIO);
395
396	pci_device_id = pci_get_device(dev);
397	pci_subvendor_id = pci_get_subvendor(dev);
398	pci_subdevice_id = pci_get_subdevice(dev);
399
400	ent = em_vendor_info_array;
401	while (ent->vendor_id != 0) {
402		if ((pci_vendor_id == ent->vendor_id) &&
403		    (pci_device_id == ent->device_id) &&
404
405		    ((pci_subvendor_id == ent->subvendor_id) ||
406		    (ent->subvendor_id == PCI_ANY_ID)) &&
407
408		    ((pci_subdevice_id == ent->subdevice_id) ||
409		    (ent->subdevice_id == PCI_ANY_ID))) {
410			sprintf(adapter_name, "%s %s",
411				em_strings[ent->index],
412				em_driver_version);
413			device_set_desc_copy(dev, adapter_name);
414			return (BUS_PROBE_DEFAULT);
415		}
416		ent++;
417	}
418
419	return (ENXIO);
420}
421
422/*********************************************************************
423 *  Device initialization routine
424 *
425 *  The attach entry point is called when the driver is being loaded.
426 *  This routine identifies the type of hardware, allocates all resources
427 *  and initializes the hardware.
428 *
429 *  return 0 on success, positive on failure
430 *********************************************************************/
431
432static int
433em_attach(device_t dev)
434{
435	struct adapter	*adapter;
436	int		error = 0;
437
438	INIT_DEBUGOUT("em_attach: begin");
439
440	adapter = device_get_softc(dev);
441	adapter->dev = adapter->osdep.dev = dev;
442	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443
444	/* SYSCTL stuff */
445	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448	    em_sysctl_nvm_info, "I", "NVM Information");
449
450	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453	    em_sysctl_debug_info, "I", "Debug Information");
454
455	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457	/* Determine hardware and mac info */
458	em_identify_hardware(adapter);
459
460	/* Setup PCI resources */
461	if (em_allocate_pci_resources(adapter)) {
462		device_printf(dev, "Allocation of PCI resources failed\n");
463		error = ENXIO;
464		goto err_pci;
465	}
466
467	/*
468	** For ICH8 and family we need to
469	** map the flash memory, and this
470	** must happen after the MAC is
471	** identified
472	*/
473	if ((adapter->hw.mac.type == e1000_ich8lan) ||
474	    (adapter->hw.mac.type == e1000_ich9lan) ||
475	    (adapter->hw.mac.type == e1000_ich10lan) ||
476	    (adapter->hw.mac.type == e1000_pchlan) ||
477	    (adapter->hw.mac.type == e1000_pch2lan)) {
478		int rid = EM_BAR_TYPE_FLASH;
479		adapter->flash = bus_alloc_resource_any(dev,
480		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481		if (adapter->flash == NULL) {
482			device_printf(dev, "Mapping of Flash failed\n");
483			error = ENXIO;
484			goto err_pci;
485		}
486		/* This is used in the shared code */
487		adapter->hw.flash_address = (u8 *)adapter->flash;
488		adapter->osdep.flash_bus_space_tag =
489		    rman_get_bustag(adapter->flash);
490		adapter->osdep.flash_bus_space_handle =
491		    rman_get_bushandle(adapter->flash);
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Set up some sysctls for the tunable interrupt delays */
504	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511	    "receive interrupt delay limit in usecs",
512	    &adapter->rx_abs_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514	    em_rx_abs_int_delay_dflt);
515	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516	    "transmit interrupt delay limit in usecs",
517	    &adapter->tx_abs_int_delay,
518	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519	    em_tx_abs_int_delay_dflt);
520
521	/* Sysctl for limiting the amount of work done in the taskqueue */
522	em_add_rx_process_limit(adapter, "rx_processing_limit",
523	    "max number of rx packets to process", &adapter->rx_process_limit,
524	    em_rx_process_limit);
525
526	/* Sysctl for setting the interface flow control */
527	em_set_flow_cntrl(adapter, "flow_control",
528	    "configure flow control",
529	    &adapter->fc_setting, em_fc_setting);
530
531	/*
532	 * Validate number of transmit and receive descriptors. It
533	 * must not exceed hardware maximum, and must be multiple
534	 * of E1000_DBA_ALIGN.
535	 */
536	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539		    EM_DEFAULT_TXD, em_txd);
540		adapter->num_tx_desc = EM_DEFAULT_TXD;
541	} else
542		adapter->num_tx_desc = em_txd;
543
544	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547		    EM_DEFAULT_RXD, em_rxd);
548		adapter->num_rx_desc = EM_DEFAULT_RXD;
549	} else
550		adapter->num_rx_desc = em_rxd;
551
552	adapter->hw.mac.autoneg = DO_AUTO_NEG;
553	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555
556	/* Copper options */
557	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558		adapter->hw.phy.mdix = AUTO_ALL_MODES;
559		adapter->hw.phy.disable_polarity_correction = FALSE;
560		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561	}
562
563	/*
564	 * Set the frame limits assuming
565	 * standard ethernet sized frames.
566	 */
567	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569
570	/*
571	 * This controls when hardware reports transmit completion
572	 * status.
573	 */
574	adapter->hw.mac.report_tx_early = 1;
575
576	/*
577	** Get queue/ring memory
578	*/
579	if (em_allocate_queues(adapter)) {
580		error = ENOMEM;
581		goto err_pci;
582	}
583
584	/* Allocate multicast array memory. */
585	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587	if (adapter->mta == NULL) {
588		device_printf(dev, "Can not allocate multicast setup array\n");
589		error = ENOMEM;
590		goto err_late;
591	}
592
593	/* Check SOL/IDER usage */
594	if (e1000_check_reset_block(&adapter->hw))
595		device_printf(dev, "PHY reset is blocked"
596		    " due to SOL/IDER session.\n");
597
598	/*
599	** Start from a known state, this is
600	** important in reading the nvm and
601	** mac from that.
602	*/
603	e1000_reset_hw(&adapter->hw);
604
605	/* Make sure we have a good EEPROM before we read from it */
606	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607		/*
608		** Some PCI-E parts fail the first check due to
609		** the link being in sleep state, call it again,
610		** if it fails a second time its a real issue.
611		*/
612		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613			device_printf(dev,
614			    "The EEPROM Checksum Is Not Valid\n");
615			error = EIO;
616			goto err_late;
617		}
618	}
619
620	/* Copy the permanent MAC address out of the EEPROM */
621	if (e1000_read_mac_addr(&adapter->hw) < 0) {
622		device_printf(dev, "EEPROM read error while reading MAC"
623		    " address\n");
624		error = EIO;
625		goto err_late;
626	}
627
628	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629		device_printf(dev, "Invalid MAC address\n");
630		error = EIO;
631		goto err_late;
632	}
633
634	/*
635	**  Do interrupt configuration
636	*/
637	if (adapter->msix > 1) /* Do MSIX */
638		error = em_allocate_msix(adapter);
639	else  /* MSI or Legacy */
640		error = em_allocate_legacy(adapter);
641	if (error)
642		goto err_late;
643
644	/*
645	 * Get Wake-on-Lan and Management info for later use
646	 */
647	em_get_wakeup(dev);
648
649	/* Setup OS specific network interface */
650	if (em_setup_interface(dev, adapter) != 0)
651		goto err_late;
652
653	em_reset(adapter);
654
655	/* Initialize statistics */
656	em_update_stats_counters(adapter);
657
658	adapter->hw.mac.get_link_status = 1;
659	em_update_link_status(adapter);
660
661	/* Register for VLAN events */
662	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
666
667	em_add_hw_stats(adapter);
668
669	/* Non-AMT based hardware can now take control from firmware */
670	if (adapter->has_manage && !adapter->has_amt)
671		em_get_hw_control(adapter);
672
673	/* Tell the stack that the interface is not active */
674	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675
676	adapter->led_dev = led_create(em_led_func, adapter,
677	    device_get_nameunit(dev));
678
679	INIT_DEBUGOUT("em_attach: end");
680
681	return (0);
682
683err_late:
684	em_free_transmit_structures(adapter);
685	em_free_receive_structures(adapter);
686	em_release_hw_control(adapter);
687	if (adapter->ifp != NULL)
688		if_free(adapter->ifp);
689err_pci:
690	em_free_pci_resources(adapter);
691	free(adapter->mta, M_DEVBUF);
692	EM_CORE_LOCK_DESTROY(adapter);
693
694	return (error);
695}
696
697/*********************************************************************
698 *  Device removal routine
699 *
700 *  The detach entry point is called when the driver is being removed.
701 *  This routine stops the adapter and deallocates all the resources
702 *  that were allocated for driver operation.
703 *
704 *  return 0 on success, positive on failure
705 *********************************************************************/
706
707static int
708em_detach(device_t dev)
709{
710	struct adapter	*adapter = device_get_softc(dev);
711	struct ifnet	*ifp = adapter->ifp;
712
713	INIT_DEBUGOUT("em_detach: begin");
714
715	/* Make sure VLANS are not using driver */
716	if (adapter->ifp->if_vlantrunk != NULL) {
717		device_printf(dev,"Vlan in use, detach first\n");
718		return (EBUSY);
719	}
720
721#ifdef DEVICE_POLLING
722	if (ifp->if_capenable & IFCAP_POLLING)
723		ether_poll_deregister(ifp);
724#endif
725
726	if (adapter->led_dev != NULL)
727		led_destroy(adapter->led_dev);
728
729	EM_CORE_LOCK(adapter);
730	adapter->in_detach = 1;
731	em_stop(adapter);
732	EM_CORE_UNLOCK(adapter);
733	EM_CORE_LOCK_DESTROY(adapter);
734
735	e1000_phy_hw_reset(&adapter->hw);
736
737	em_release_manageability(adapter);
738	em_release_hw_control(adapter);
739
740	/* Unregister VLAN events */
741	if (adapter->vlan_attach != NULL)
742		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743	if (adapter->vlan_detach != NULL)
744		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
745
746	ether_ifdetach(adapter->ifp);
747	callout_drain(&adapter->timer);
748
749	em_free_pci_resources(adapter);
750	bus_generic_detach(dev);
751	if_free(ifp);
752
753	em_free_transmit_structures(adapter);
754	em_free_receive_structures(adapter);
755
756	em_release_hw_control(adapter);
757	free(adapter->mta, M_DEVBUF);
758
759	return (0);
760}
761
762/*********************************************************************
763 *
764 *  Shutdown entry point
765 *
766 **********************************************************************/
767
768static int
769em_shutdown(device_t dev)
770{
771	return em_suspend(dev);
772}
773
774/*
775 * Suspend/resume device methods.
776 */
777static int
778em_suspend(device_t dev)
779{
780	struct adapter *adapter = device_get_softc(dev);
781
782	EM_CORE_LOCK(adapter);
783
784        em_release_manageability(adapter);
785	em_release_hw_control(adapter);
786	em_enable_wakeup(dev);
787
788	EM_CORE_UNLOCK(adapter);
789
790	return bus_generic_suspend(dev);
791}
792
793static int
794em_resume(device_t dev)
795{
796	struct adapter *adapter = device_get_softc(dev);
797	struct ifnet *ifp = adapter->ifp;
798
799	EM_CORE_LOCK(adapter);
800	em_init_locked(adapter);
801	em_init_manageability(adapter);
802	EM_CORE_UNLOCK(adapter);
803	em_start(ifp);
804
805	return bus_generic_resume(dev);
806}
807
808
809/*********************************************************************
810 *  Transmit entry point
811 *
812 *  em_start is called by the stack to initiate a transmit.
813 *  The driver will remain in this routine as long as there are
814 *  packets to transmit and transmit resources are available.
815 *  In case resources are not available stack is notified and
816 *  the packet is requeued.
817 **********************************************************************/
818
819#ifdef EM_MULTIQUEUE
820static int
821em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822{
823	struct adapter  *adapter = txr->adapter;
824        struct mbuf     *next;
825        int             err = 0, enq = 0;
826
827	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828	    IFF_DRV_RUNNING || adapter->link_active == 0) {
829		if (m != NULL)
830			err = drbr_enqueue(ifp, txr->br, m);
831		return (err);
832	}
833
834        /* Call cleanup if number of TX descriptors low */
835	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836		em_txeof(txr);
837
838	enq = 0;
839	if (m == NULL) {
840		next = drbr_dequeue(ifp, txr->br);
841	} else if (drbr_needs_enqueue(ifp, txr->br)) {
842		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843			return (err);
844		next = drbr_dequeue(ifp, txr->br);
845	} else
846		next = m;
847
848	/* Process the queue */
849	while (next != NULL) {
850		if ((err = em_xmit(txr, &next)) != 0) {
851                        if (next != NULL)
852                                err = drbr_enqueue(ifp, txr->br, next);
853                        break;
854		}
855		enq++;
856		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857		ETHER_BPF_MTAP(ifp, next);
858		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                        break;
860		if (txr->tx_avail < EM_MAX_SCATTER) {
861			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862			break;
863		}
864		next = drbr_dequeue(ifp, txr->br);
865	}
866
867	if (enq > 0) {
868                /* Set the watchdog */
869                txr->queue_status = EM_QUEUE_WORKING;
870		txr->watchdog_time = ticks;
871	}
872	return (err);
873}
874
875/*
876** Multiqueue capable stack interface
877*/
878static int
879em_mq_start(struct ifnet *ifp, struct mbuf *m)
880{
881	struct adapter	*adapter = ifp->if_softc;
882	struct tx_ring	*txr = adapter->tx_rings;
883	int 		error;
884
885	if (EM_TX_TRYLOCK(txr)) {
886		error = em_mq_start_locked(ifp, txr, m);
887		EM_TX_UNLOCK(txr);
888	} else
889		error = drbr_enqueue(ifp, txr->br, m);
890
891	return (error);
892}
893
894/*
895** Flush all ring buffers
896*/
897static void
898em_qflush(struct ifnet *ifp)
899{
900	struct adapter  *adapter = ifp->if_softc;
901	struct tx_ring  *txr = adapter->tx_rings;
902	struct mbuf     *m;
903
904	for (int i = 0; i < adapter->num_queues; i++, txr++) {
905		EM_TX_LOCK(txr);
906		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907			m_freem(m);
908		EM_TX_UNLOCK(txr);
909	}
910	if_qflush(ifp);
911}
912
913#endif /* EM_MULTIQUEUE */
914
915static void
916em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917{
918	struct adapter	*adapter = ifp->if_softc;
919	struct mbuf	*m_head;
920
921	EM_TX_LOCK_ASSERT(txr);
922
923	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924	    IFF_DRV_RUNNING)
925		return;
926
927	if (!adapter->link_active)
928		return;
929
930        /* Call cleanup if number of TX descriptors low */
931	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932		em_txeof(txr);
933
934	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935		if (txr->tx_avail < EM_MAX_SCATTER) {
936			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937			break;
938		}
939                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940		if (m_head == NULL)
941			break;
942		/*
943		 *  Encapsulation can modify our pointer, and or make it
944		 *  NULL on failure.  In that event, we can't requeue.
945		 */
946		if (em_xmit(txr, &m_head)) {
947			if (m_head == NULL)
948				break;
949			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951			break;
952		}
953
954		/* Send a copy of the frame to the BPF listener */
955		ETHER_BPF_MTAP(ifp, m_head);
956
957		/* Set timeout in case hardware has problems transmitting. */
958		txr->watchdog_time = ticks;
959                txr->queue_status = EM_QUEUE_WORKING;
960	}
961
962	return;
963}
964
965static void
966em_start(struct ifnet *ifp)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970
971	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972		EM_TX_LOCK(txr);
973		em_start_locked(ifp, txr);
974		EM_TX_UNLOCK(txr);
975	}
976	return;
977}
978
979/*********************************************************************
980 *  Ioctl entry point
981 *
982 *  em_ioctl is called when the user wants to configure the
983 *  interface.
984 *
985 *  return 0 on success, positive on failure
986 **********************************************************************/
987
988static int
989em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990{
991	struct adapter	*adapter = ifp->if_softc;
992	struct ifreq *ifr = (struct ifreq *)data;
993#ifdef INET
994	struct ifaddr *ifa = (struct ifaddr *)data;
995#endif
996	int error = 0;
997
998	if (adapter->in_detach)
999		return (error);
1000
1001	switch (command) {
1002	case SIOCSIFADDR:
1003#ifdef INET
1004		if (ifa->ifa_addr->sa_family == AF_INET) {
1005			/*
1006			 * XXX
1007			 * Since resetting hardware takes a very long time
1008			 * and results in link renegotiation we only
1009			 * initialize the hardware only when it is absolutely
1010			 * required.
1011			 */
1012			ifp->if_flags |= IFF_UP;
1013			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014				EM_CORE_LOCK(adapter);
1015				em_init_locked(adapter);
1016				EM_CORE_UNLOCK(adapter);
1017			}
1018			arp_ifinit(ifp, ifa);
1019		} else
1020#endif
1021			error = ether_ioctl(ifp, command, data);
1022		break;
1023	case SIOCSIFMTU:
1024	    {
1025		int max_frame_size;
1026
1027		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028
1029		EM_CORE_LOCK(adapter);
1030		switch (adapter->hw.mac.type) {
1031		case e1000_82571:
1032		case e1000_82572:
1033		case e1000_ich9lan:
1034		case e1000_ich10lan:
1035		case e1000_pch2lan:
1036		case e1000_82574:
1037		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1038			max_frame_size = 9234;
1039			break;
1040		case e1000_pchlan:
1041			max_frame_size = 4096;
1042			break;
1043			/* Adapters that do not support jumbo frames */
1044		case e1000_82583:
1045		case e1000_ich8lan:
1046			max_frame_size = ETHER_MAX_LEN;
1047			break;
1048		default:
1049			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050		}
1051		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052		    ETHER_CRC_LEN) {
1053			EM_CORE_UNLOCK(adapter);
1054			error = EINVAL;
1055			break;
1056		}
1057
1058		ifp->if_mtu = ifr->ifr_mtu;
1059		adapter->max_frame_size =
1060		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061		em_init_locked(adapter);
1062		EM_CORE_UNLOCK(adapter);
1063		break;
1064	    }
1065	case SIOCSIFFLAGS:
1066		IOCTL_DEBUGOUT("ioctl rcv'd:\
1067		    SIOCSIFFLAGS (Set Interface Flags)");
1068		EM_CORE_LOCK(adapter);
1069		if (ifp->if_flags & IFF_UP) {
1070			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071				if ((ifp->if_flags ^ adapter->if_flags) &
1072				    (IFF_PROMISC | IFF_ALLMULTI)) {
1073					em_disable_promisc(adapter);
1074					em_set_promisc(adapter);
1075				}
1076			} else
1077				em_init_locked(adapter);
1078		} else
1079			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080				em_stop(adapter);
1081		adapter->if_flags = ifp->if_flags;
1082		EM_CORE_UNLOCK(adapter);
1083		break;
1084	case SIOCADDMULTI:
1085	case SIOCDELMULTI:
1086		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088			EM_CORE_LOCK(adapter);
1089			em_disable_intr(adapter);
1090			em_set_multi(adapter);
1091#ifdef DEVICE_POLLING
1092			if (!(ifp->if_capenable & IFCAP_POLLING))
1093#endif
1094				em_enable_intr(adapter);
1095			EM_CORE_UNLOCK(adapter);
1096		}
1097		break;
1098	case SIOCSIFMEDIA:
1099		/*
1100		** As the speed/duplex settings are being
1101		** changed, we need to reset the PHY.
1102		*/
1103		adapter->hw.phy.reset_disable = FALSE;
1104		/* Check SOL/IDER usage */
1105		EM_CORE_LOCK(adapter);
1106		if (e1000_check_reset_block(&adapter->hw)) {
1107			EM_CORE_UNLOCK(adapter);
1108			device_printf(adapter->dev, "Media change is"
1109			    " blocked due to SOL/IDER session.\n");
1110			break;
1111		}
1112		EM_CORE_UNLOCK(adapter);
1113		/* falls thru */
1114	case SIOCGIFMEDIA:
1115		IOCTL_DEBUGOUT("ioctl rcv'd: \
1116		    SIOCxIFMEDIA (Get/Set Interface Media)");
1117		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118		break;
1119	case SIOCSIFCAP:
1120	    {
1121		int mask, reinit;
1122
1123		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124		reinit = 0;
1125		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126#ifdef DEVICE_POLLING
1127		if (mask & IFCAP_POLLING) {
1128			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129				error = ether_poll_register(em_poll, ifp);
1130				if (error)
1131					return (error);
1132				EM_CORE_LOCK(adapter);
1133				em_disable_intr(adapter);
1134				ifp->if_capenable |= IFCAP_POLLING;
1135				EM_CORE_UNLOCK(adapter);
1136			} else {
1137				error = ether_poll_deregister(ifp);
1138				/* Enable interrupt even in error case */
1139				EM_CORE_LOCK(adapter);
1140				em_enable_intr(adapter);
1141				ifp->if_capenable &= ~IFCAP_POLLING;
1142				EM_CORE_UNLOCK(adapter);
1143			}
1144		}
1145#endif
1146		if (mask & IFCAP_HWCSUM) {
1147			ifp->if_capenable ^= IFCAP_HWCSUM;
1148			reinit = 1;
1149		}
1150		if (mask & IFCAP_TSO4) {
1151			ifp->if_capenable ^= IFCAP_TSO4;
1152			reinit = 1;
1153		}
1154		if (mask & IFCAP_VLAN_HWTAGGING) {
1155			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156			reinit = 1;
1157		}
1158		if (mask & IFCAP_VLAN_HWFILTER) {
1159			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160			reinit = 1;
1161		}
1162		if ((mask & IFCAP_WOL) &&
1163		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164			if (mask & IFCAP_WOL_MCAST)
1165				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166			if (mask & IFCAP_WOL_MAGIC)
1167				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168		}
1169		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170			em_init(adapter);
1171		VLAN_CAPABILITIES(ifp);
1172		break;
1173	    }
1174
1175	default:
1176		error = ether_ioctl(ifp, command, data);
1177		break;
1178	}
1179
1180	return (error);
1181}
1182
1183
1184/*********************************************************************
1185 *  Init entry point
1186 *
1187 *  This routine is used in two ways. It is used by the stack as
1188 *  init entry point in network interface structure. It is also used
1189 *  by the driver as a hw/sw initialization routine to get to a
1190 *  consistent state.
1191 *
1192 *  return 0 on success, positive on failure
1193 **********************************************************************/
1194
1195static void
1196em_init_locked(struct adapter *adapter)
1197{
1198	struct ifnet	*ifp = adapter->ifp;
1199	device_t	dev = adapter->dev;
1200	u32		pba;
1201
1202	INIT_DEBUGOUT("em_init: begin");
1203
1204	EM_CORE_LOCK_ASSERT(adapter);
1205
1206	em_disable_intr(adapter);
1207	callout_stop(&adapter->timer);
1208
1209	/*
1210	 * Packet Buffer Allocation (PBA)
1211	 * Writing PBA sets the receive portion of the buffer
1212	 * the remainder is used for the transmit buffer.
1213	 */
1214	switch (adapter->hw.mac.type) {
1215	/* Total Packet Buffer on these is 48K */
1216	case e1000_82571:
1217	case e1000_82572:
1218	case e1000_80003es2lan:
1219			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220		break;
1221	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223		break;
1224	case e1000_82574:
1225	case e1000_82583:
1226			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227		break;
1228	case e1000_ich8lan:
1229		pba = E1000_PBA_8K;
1230		break;
1231	case e1000_ich9lan:
1232	case e1000_ich10lan:
1233		pba = E1000_PBA_10K;
1234		break;
1235	case e1000_pchlan:
1236	case e1000_pch2lan:
1237		pba = E1000_PBA_26K;
1238		break;
1239	default:
1240		if (adapter->max_frame_size > 8192)
1241			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242		else
1243			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244	}
1245
1246	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248
1249	/* Get the latest mac address, User can use a LAA */
1250        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251              ETHER_ADDR_LEN);
1252
1253	/* Put the address into the Receive Address Array */
1254	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255
1256	/*
1257	 * With the 82571 adapter, RAR[0] may be overwritten
1258	 * when the other port is reset, we make a duplicate
1259	 * in RAR[14] for that eventuality, this assures
1260	 * the interface continues to function.
1261	 */
1262	if (adapter->hw.mac.type == e1000_82571) {
1263		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265		    E1000_RAR_ENTRIES - 1);
1266	}
1267
1268	/* Initialize the hardware */
1269	em_reset(adapter);
1270	em_update_link_status(adapter);
1271
1272	/* Setup VLAN support, basic and offload if available */
1273	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274
1275	/* Set hardware offload abilities */
1276	ifp->if_hwassist = 0;
1277	if (ifp->if_capenable & IFCAP_TXCSUM)
1278		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279	if (ifp->if_capenable & IFCAP_TSO4)
1280		ifp->if_hwassist |= CSUM_TSO;
1281
1282	/* Configure for OS presence */
1283	em_init_manageability(adapter);
1284
1285	/* Prepare transmit descriptors and buffers */
1286	em_setup_transmit_structures(adapter);
1287	em_initialize_transmit_unit(adapter);
1288
1289	/* Setup Multicast table */
1290	em_set_multi(adapter);
1291
1292	/*
1293	** Figure out the desired mbuf
1294	** pool for doing jumbos
1295	*/
1296	if (adapter->max_frame_size <= 2048)
1297		adapter->rx_mbuf_sz = MCLBYTES;
1298	else if (adapter->max_frame_size <= 4096)
1299		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300	else
1301		adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Use real VLAN Filter support? */
1312	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314			/* Use real VLAN Filter support */
1315			em_setup_vlan_hw_support(adapter);
1316		else {
1317			u32 ctrl;
1318			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319			ctrl |= E1000_CTRL_VME;
1320			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321		}
1322	}
1323
1324	/* Don't lose promiscuous settings */
1325	em_set_promisc(adapter);
1326
1327	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329
1330	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332
1333	/* MSI/X configuration for 82574 */
1334	if (adapter->hw.mac.type == e1000_82574) {
1335		int tmp;
1336		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337		tmp |= E1000_CTRL_EXT_PBA_CLR;
1338		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339		/* Set the IVAR - interrupt vector routing. */
1340		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341	}
1342
1343#ifdef DEVICE_POLLING
1344	/*
1345	 * Only enable interrupts if we are not polling, make sure
1346	 * they are off otherwise.
1347	 */
1348	if (ifp->if_capenable & IFCAP_POLLING)
1349		em_disable_intr(adapter);
1350	else
1351#endif /* DEVICE_POLLING */
1352		em_enable_intr(adapter);
1353
1354	/* AMT based hardware can now take control from firmware */
1355	if (adapter->has_manage && adapter->has_amt)
1356		em_get_hw_control(adapter);
1357
1358	/* Don't reset the phy next time init gets called */
1359	adapter->hw.phy.reset_disable = TRUE;
1360}
1361
1362static void
1363em_init(void *arg)
1364{
1365	struct adapter *adapter = arg;
1366
1367	EM_CORE_LOCK(adapter);
1368	em_init_locked(adapter);
1369	EM_CORE_UNLOCK(adapter);
1370}
1371
1372
1373#ifdef DEVICE_POLLING
1374/*********************************************************************
1375 *
1376 *  Legacy polling routine: note this only works with single queue
1377 *
1378 *********************************************************************/
1379static int
1380em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381{
1382	struct adapter *adapter = ifp->if_softc;
1383	struct tx_ring	*txr = adapter->tx_rings;
1384	struct rx_ring	*rxr = adapter->rx_rings;
1385	u32		reg_icr;
1386	int		rx_done;
1387
1388	EM_CORE_LOCK(adapter);
1389	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390		EM_CORE_UNLOCK(adapter);
1391		return (0);
1392	}
1393
1394	if (cmd == POLL_AND_CHECK_STATUS) {
1395		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397			callout_stop(&adapter->timer);
1398			adapter->hw.mac.get_link_status = 1;
1399			em_update_link_status(adapter);
1400			callout_reset(&adapter->timer, hz,
1401			    em_local_timer, adapter);
1402		}
1403	}
1404	EM_CORE_UNLOCK(adapter);
1405
1406	em_rxeof(rxr, count, &rx_done);
1407
1408	EM_TX_LOCK(txr);
1409	em_txeof(txr);
1410#ifdef EM_MULTIQUEUE
1411	if (!drbr_empty(ifp, txr->br))
1412		em_mq_start_locked(ifp, txr, NULL);
1413#else
1414	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415		em_start_locked(ifp, txr);
1416#endif
1417	EM_TX_UNLOCK(txr);
1418
1419	return (rx_done);
1420}
1421#endif /* DEVICE_POLLING */
1422
1423
1424/*********************************************************************
1425 *
1426 *  Fast Legacy/MSI Combined Interrupt Service routine
1427 *
1428 *********************************************************************/
1429static int
1430em_irq_fast(void *arg)
1431{
1432	struct adapter	*adapter = arg;
1433	struct ifnet	*ifp;
1434	u32		reg_icr;
1435
1436	ifp = adapter->ifp;
1437
1438	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439
1440	/* Hot eject?  */
1441	if (reg_icr == 0xffffffff)
1442		return FILTER_STRAY;
1443
1444	/* Definitely not our interrupt.  */
1445	if (reg_icr == 0x0)
1446		return FILTER_STRAY;
1447
1448	/*
1449	 * Starting with the 82571 chip, bit 31 should be used to
1450	 * determine whether the interrupt belongs to us.
1451	 */
1452	if (adapter->hw.mac.type >= e1000_82571 &&
1453	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454		return FILTER_STRAY;
1455
1456	em_disable_intr(adapter);
1457	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458
1459	/* Link status change */
1460	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461		adapter->hw.mac.get_link_status = 1;
1462		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463	}
1464
1465	if (reg_icr & E1000_ICR_RXO)
1466		adapter->rx_overruns++;
1467	return FILTER_HANDLED;
1468}
1469
1470/* Combined RX/TX handler, used by Legacy and MSI */
1471static void
1472em_handle_que(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp = adapter->ifp;
1476	struct tx_ring	*txr = adapter->tx_rings;
1477	struct rx_ring	*rxr = adapter->rx_rings;
1478	bool		more;
1479
1480
1481	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483
1484		EM_TX_LOCK(txr);
1485		em_txeof(txr);
1486#ifdef EM_MULTIQUEUE
1487		if (!drbr_empty(ifp, txr->br))
1488			em_mq_start_locked(ifp, txr, NULL);
1489#else
1490		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491			em_start_locked(ifp, txr);
1492#endif
1493		em_txeof(txr);
1494		EM_TX_UNLOCK(txr);
1495		if (more) {
1496			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497			return;
1498		}
1499	}
1500
1501	em_enable_intr(adapter);
1502	return;
1503}
1504
1505
1506/*********************************************************************
1507 *
1508 *  MSIX Interrupt Service Routines
1509 *
1510 **********************************************************************/
1511static void
1512em_msix_tx(void *arg)
1513{
1514	struct tx_ring *txr = arg;
1515	struct adapter *adapter = txr->adapter;
1516	bool		more;
1517
1518	++txr->tx_irq;
1519	EM_TX_LOCK(txr);
1520	more = em_txeof(txr);
1521	EM_TX_UNLOCK(txr);
1522	if (more)
1523		taskqueue_enqueue(txr->tq, &txr->tx_task);
1524	else
1525		/* Reenable this interrupt */
1526		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527	return;
1528}
1529
1530/*********************************************************************
1531 *
1532 *  MSIX RX Interrupt Service routine
1533 *
1534 **********************************************************************/
1535
1536static void
1537em_msix_rx(void *arg)
1538{
1539	struct rx_ring	*rxr = arg;
1540	struct adapter	*adapter = rxr->adapter;
1541	bool		more;
1542
1543	++rxr->rx_irq;
1544	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545	if (more)
1546		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547	else
1548		/* Reenable this interrupt */
1549		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550	return;
1551}
1552
1553/*********************************************************************
1554 *
1555 *  MSIX Link Fast Interrupt Service routine
1556 *
1557 **********************************************************************/
1558static void
1559em_msix_link(void *arg)
1560{
1561	struct adapter	*adapter = arg;
1562	u32		reg_icr;
1563
1564	++adapter->link_irq;
1565	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566
1567	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568		adapter->hw.mac.get_link_status = 1;
1569		em_handle_link(adapter, 0);
1570	} else
1571		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572		    EM_MSIX_LINK | E1000_IMS_LSC);
1573	return;
1574}
1575
1576static void
1577em_handle_rx(void *context, int pending)
1578{
1579	struct rx_ring	*rxr = context;
1580	struct adapter	*adapter = rxr->adapter;
1581        bool            more;
1582
1583	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584	if (more)
1585		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586	else
1587		/* Reenable this interrupt */
1588		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589}
1590
1591static void
1592em_handle_tx(void *context, int pending)
1593{
1594	struct tx_ring	*txr = context;
1595	struct adapter	*adapter = txr->adapter;
1596	struct ifnet	*ifp = adapter->ifp;
1597
1598	EM_TX_LOCK(txr);
1599	em_txeof(txr);
1600#ifdef EM_MULTIQUEUE
1601	if (!drbr_empty(ifp, txr->br))
1602		em_mq_start_locked(ifp, txr, NULL);
1603#else
1604	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605		em_start_locked(ifp, txr);
1606#endif
1607	em_txeof(txr);
1608	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609	EM_TX_UNLOCK(txr);
1610}
1611
1612static void
1613em_handle_link(void *context, int pending)
1614{
1615	struct adapter	*adapter = context;
1616	struct ifnet *ifp = adapter->ifp;
1617
1618	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619		return;
1620
1621	EM_CORE_LOCK(adapter);
1622	callout_stop(&adapter->timer);
1623	em_update_link_status(adapter);
1624	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626	    EM_MSIX_LINK | E1000_IMS_LSC);
1627	EM_CORE_UNLOCK(adapter);
1628}
1629
1630
1631/*********************************************************************
1632 *
1633 *  Media Ioctl callback
1634 *
1635 *  This routine is called whenever the user queries the status of
1636 *  the interface using ifconfig.
1637 *
1638 **********************************************************************/
1639static void
1640em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641{
1642	struct adapter *adapter = ifp->if_softc;
1643	u_char fiber_type = IFM_1000_SX;
1644
1645	INIT_DEBUGOUT("em_media_status: begin");
1646
1647	EM_CORE_LOCK(adapter);
1648	em_update_link_status(adapter);
1649
1650	ifmr->ifm_status = IFM_AVALID;
1651	ifmr->ifm_active = IFM_ETHER;
1652
1653	if (!adapter->link_active) {
1654		EM_CORE_UNLOCK(adapter);
1655		return;
1656	}
1657
1658	ifmr->ifm_status |= IFM_ACTIVE;
1659
1660	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662		ifmr->ifm_active |= fiber_type | IFM_FDX;
1663	} else {
1664		switch (adapter->link_speed) {
1665		case 10:
1666			ifmr->ifm_active |= IFM_10_T;
1667			break;
1668		case 100:
1669			ifmr->ifm_active |= IFM_100_TX;
1670			break;
1671		case 1000:
1672			ifmr->ifm_active |= IFM_1000_T;
1673			break;
1674		}
1675		if (adapter->link_duplex == FULL_DUPLEX)
1676			ifmr->ifm_active |= IFM_FDX;
1677		else
1678			ifmr->ifm_active |= IFM_HDX;
1679	}
1680	EM_CORE_UNLOCK(adapter);
1681}
1682
1683/*********************************************************************
1684 *
1685 *  Media Ioctl callback
1686 *
1687 *  This routine is called when the user changes speed/duplex using
1688 *  media/mediopt option with ifconfig.
1689 *
1690 **********************************************************************/
1691static int
1692em_media_change(struct ifnet *ifp)
1693{
1694	struct adapter *adapter = ifp->if_softc;
1695	struct ifmedia  *ifm = &adapter->media;
1696
1697	INIT_DEBUGOUT("em_media_change: begin");
1698
1699	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700		return (EINVAL);
1701
1702	EM_CORE_LOCK(adapter);
1703	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704	case IFM_AUTO:
1705		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707		break;
1708	case IFM_1000_LX:
1709	case IFM_1000_SX:
1710	case IFM_1000_T:
1711		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713		break;
1714	case IFM_100_TX:
1715		adapter->hw.mac.autoneg = FALSE;
1716		adapter->hw.phy.autoneg_advertised = 0;
1717		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719		else
1720			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721		break;
1722	case IFM_10_T:
1723		adapter->hw.mac.autoneg = FALSE;
1724		adapter->hw.phy.autoneg_advertised = 0;
1725		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727		else
1728			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729		break;
1730	default:
1731		device_printf(adapter->dev, "Unsupported media type\n");
1732	}
1733
1734	em_init_locked(adapter);
1735	EM_CORE_UNLOCK(adapter);
1736
1737	return (0);
1738}
1739
1740/*********************************************************************
1741 *
1742 *  This routine maps the mbufs to tx descriptors.
1743 *
1744 *  return 0 on success, positive on failure
1745 **********************************************************************/
1746
1747static int
1748em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749{
1750	struct adapter		*adapter = txr->adapter;
1751	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1752	bus_dmamap_t		map;
1753	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1754	struct e1000_tx_desc	*ctxd = NULL;
1755	struct mbuf		*m_head;
1756	struct ether_header	*eh;
1757	struct ip		*ip = NULL;
1758	struct tcphdr		*tp = NULL;
1759	u32			txd_upper, txd_lower, txd_used, txd_saved;
1760	int			ip_off, poff;
1761	int			nsegs, i, j, first, last = 0;
1762	int			error, do_tso, tso_desc = 0;
1763
1764	m_head = *m_headp;
1765	txd_upper = txd_lower = txd_used = txd_saved = 0;
1766	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767	ip_off = poff = 0;
1768
1769	/*
1770	 * Intel recommends entire IP/TCP header length reside in a single
1771	 * buffer. If multiple descriptors are used to describe the IP and
1772	 * TCP header, each descriptor should describe one or more
1773	 * complete headers; descriptors referencing only parts of headers
1774	 * are not supported. If all layer headers are not coalesced into
1775	 * a single buffer, each buffer should not cross a 4KB boundary,
1776	 * or be larger than the maximum read request size.
1777	 * Controller also requires modifing IP/TCP header to make TSO work
1778	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1779	 * IP/TCP header into a single buffer to meet the requirement of
1780	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1781	 * which also has similiar restrictions.
1782	 */
1783	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784		if (do_tso || (m_head->m_next != NULL &&
1785		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786			if (M_WRITABLE(*m_headp) == 0) {
1787				m_head = m_dup(*m_headp, M_DONTWAIT);
1788				m_freem(*m_headp);
1789				if (m_head == NULL) {
1790					*m_headp = NULL;
1791					return (ENOBUFS);
1792				}
1793				*m_headp = m_head;
1794			}
1795		}
1796		/*
1797		 * XXX
1798		 * Assume IPv4, we don't have TSO/checksum offload support
1799		 * for IPv6 yet.
1800		 */
1801		ip_off = sizeof(struct ether_header);
1802		m_head = m_pullup(m_head, ip_off);
1803		if (m_head == NULL) {
1804			*m_headp = NULL;
1805			return (ENOBUFS);
1806		}
1807		eh = mtod(m_head, struct ether_header *);
1808		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809			ip_off = sizeof(struct ether_vlan_header);
1810			m_head = m_pullup(m_head, ip_off);
1811			if (m_head == NULL) {
1812				*m_headp = NULL;
1813				return (ENOBUFS);
1814			}
1815		}
1816		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817		if (m_head == NULL) {
1818			*m_headp = NULL;
1819			return (ENOBUFS);
1820		}
1821		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822		poff = ip_off + (ip->ip_hl << 2);
1823		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1824		if (m_head == NULL) {
1825			*m_headp = NULL;
1826			return (ENOBUFS);
1827		}
1828		if (do_tso) {
1829			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830			/*
1831			 * TSO workaround:
1832			 *   pull 4 more bytes of data into it.
1833			 */
1834			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835			if (m_head == NULL) {
1836				*m_headp = NULL;
1837				return (ENOBUFS);
1838			}
1839			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840			ip->ip_len = 0;
1841			ip->ip_sum = 0;
1842			/*
1843			 * The pseudo TCP checksum does not include TCP payload
1844			 * length so driver should recompute the checksum here
1845			 * what hardware expect to see. This is adherence of
1846			 * Microsoft's Large Send specification.
1847			 */
1848			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1853			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1854			if (m_head == NULL) {
1855				*m_headp = NULL;
1856				return (ENOBUFS);
1857			}
1858			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1859			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1861			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1862			if (m_head == NULL) {
1863				*m_headp = NULL;
1864				return (ENOBUFS);
1865			}
1866			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867		}
1868		*m_headp = m_head;
1869	}
1870
1871	/*
1872	 * Map the packet for DMA
1873	 *
1874	 * Capture the first descriptor index,
1875	 * this descriptor will have the index
1876	 * of the EOP which is the only one that
1877	 * now gets a DONE bit writeback.
1878	 */
1879	first = txr->next_avail_desc;
1880	tx_buffer = &txr->tx_buffers[first];
1881	tx_buffer_mapped = tx_buffer;
1882	map = tx_buffer->map;
1883
1884	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1885	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1886
1887	/*
1888	 * There are two types of errors we can (try) to handle:
1889	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1890	 *   out of segments.  Defragment the mbuf chain and try again.
1891	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1892	 *   at this point in time.  Defer sending and try again later.
1893	 * All other errors, in particular EINVAL, are fatal and prevent the
1894	 * mbuf chain from ever going through.  Drop it and report error.
1895	 */
1896	if (error == EFBIG) {
1897		struct mbuf *m;
1898
1899		m = m_defrag(*m_headp, M_DONTWAIT);
1900		if (m == NULL) {
1901			adapter->mbuf_alloc_failed++;
1902			m_freem(*m_headp);
1903			*m_headp = NULL;
1904			return (ENOBUFS);
1905		}
1906		*m_headp = m;
1907
1908		/* Try it again */
1909		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1910		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1911
1912		if (error == ENOMEM) {
1913			adapter->no_tx_dma_setup++;
1914			return (error);
1915		} else if (error != 0) {
1916			adapter->no_tx_dma_setup++;
1917			m_freem(*m_headp);
1918			*m_headp = NULL;
1919			return (error);
1920		}
1921
1922	} else if (error == ENOMEM) {
1923		adapter->no_tx_dma_setup++;
1924		return (error);
1925	} else if (error != 0) {
1926		adapter->no_tx_dma_setup++;
1927		m_freem(*m_headp);
1928		*m_headp = NULL;
1929		return (error);
1930	}
1931
1932	/*
1933	 * TSO Hardware workaround, if this packet is not
1934	 * TSO, and is only a single descriptor long, and
1935	 * it follows a TSO burst, then we need to add a
1936	 * sentinel descriptor to prevent premature writeback.
1937	 */
1938	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1939		if (nsegs == 1)
1940			tso_desc = TRUE;
1941		txr->tx_tso = FALSE;
1942	}
1943
1944        if (nsegs > (txr->tx_avail - 2)) {
1945                txr->no_desc_avail++;
1946		bus_dmamap_unload(txr->txtag, map);
1947		return (ENOBUFS);
1948        }
1949	m_head = *m_headp;
1950
1951	/* Do hardware assists */
1952	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1953		em_tso_setup(txr, m_head, ip_off, ip, tp,
1954		    &txd_upper, &txd_lower);
1955		/* we need to make a final sentinel transmit desc */
1956		tso_desc = TRUE;
1957	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1958		em_transmit_checksum_setup(txr, m_head,
1959		    ip_off, ip, &txd_upper, &txd_lower);
1960
1961	i = txr->next_avail_desc;
1962
1963	/* Set up our transmit descriptors */
1964	for (j = 0; j < nsegs; j++) {
1965		bus_size_t seg_len;
1966		bus_addr_t seg_addr;
1967
1968		tx_buffer = &txr->tx_buffers[i];
1969		ctxd = &txr->tx_base[i];
1970		seg_addr = segs[j].ds_addr;
1971		seg_len  = segs[j].ds_len;
1972		/*
1973		** TSO Workaround:
1974		** If this is the last descriptor, we want to
1975		** split it so we have a small final sentinel
1976		*/
1977		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1978			seg_len -= 4;
1979			ctxd->buffer_addr = htole64(seg_addr);
1980			ctxd->lower.data = htole32(
1981			adapter->txd_cmd | txd_lower | seg_len);
1982			ctxd->upper.data =
1983			    htole32(txd_upper);
1984			if (++i == adapter->num_tx_desc)
1985				i = 0;
1986			/* Now make the sentinel */
1987			++txd_used; /* using an extra txd */
1988			ctxd = &txr->tx_base[i];
1989			tx_buffer = &txr->tx_buffers[i];
1990			ctxd->buffer_addr =
1991			    htole64(seg_addr + seg_len);
1992			ctxd->lower.data = htole32(
1993			adapter->txd_cmd | txd_lower | 4);
1994			ctxd->upper.data =
1995			    htole32(txd_upper);
1996			last = i;
1997			if (++i == adapter->num_tx_desc)
1998				i = 0;
1999		} else {
2000			ctxd->buffer_addr = htole64(seg_addr);
2001			ctxd->lower.data = htole32(
2002			adapter->txd_cmd | txd_lower | seg_len);
2003			ctxd->upper.data =
2004			    htole32(txd_upper);
2005			last = i;
2006			if (++i == adapter->num_tx_desc)
2007				i = 0;
2008		}
2009		tx_buffer->m_head = NULL;
2010		tx_buffer->next_eop = -1;
2011	}
2012
2013	txr->next_avail_desc = i;
2014	txr->tx_avail -= nsegs;
2015	if (tso_desc) /* TSO used an extra for sentinel */
2016		txr->tx_avail -= txd_used;
2017
2018	if (m_head->m_flags & M_VLANTAG) {
2019		/* Set the vlan id. */
2020		ctxd->upper.fields.special =
2021		    htole16(m_head->m_pkthdr.ether_vtag);
2022                /* Tell hardware to add tag */
2023                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2024        }
2025
2026        tx_buffer->m_head = m_head;
2027	tx_buffer_mapped->map = tx_buffer->map;
2028	tx_buffer->map = map;
2029        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2030
2031        /*
2032         * Last Descriptor of Packet
2033	 * needs End Of Packet (EOP)
2034	 * and Report Status (RS)
2035         */
2036        ctxd->lower.data |=
2037	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2038	/*
2039	 * Keep track in the first buffer which
2040	 * descriptor will be written back
2041	 */
2042	tx_buffer = &txr->tx_buffers[first];
2043	tx_buffer->next_eop = last;
2044	/* Update the watchdog time early and often */
2045	txr->watchdog_time = ticks;
2046
2047	/*
2048	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2049	 * that this frame is available to transmit.
2050	 */
2051	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2052	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2053	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2054
2055	return (0);
2056}
2057
2058static void
2059em_set_promisc(struct adapter *adapter)
2060{
2061	struct ifnet	*ifp = adapter->ifp;
2062	u32		reg_rctl;
2063
2064	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2065
2066	if (ifp->if_flags & IFF_PROMISC) {
2067		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2068		/* Turn this on if you want to see bad packets */
2069		if (em_debug_sbp)
2070			reg_rctl |= E1000_RCTL_SBP;
2071		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2072	} else if (ifp->if_flags & IFF_ALLMULTI) {
2073		reg_rctl |= E1000_RCTL_MPE;
2074		reg_rctl &= ~E1000_RCTL_UPE;
2075		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2076	}
2077}
2078
2079static void
2080em_disable_promisc(struct adapter *adapter)
2081{
2082	u32	reg_rctl;
2083
2084	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2085
2086	reg_rctl &=  (~E1000_RCTL_UPE);
2087	reg_rctl &=  (~E1000_RCTL_MPE);
2088	reg_rctl &=  (~E1000_RCTL_SBP);
2089	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090}
2091
2092
2093/*********************************************************************
2094 *  Multicast Update
2095 *
2096 *  This routine is called whenever multicast address list is updated.
2097 *
2098 **********************************************************************/
2099
2100static void
2101em_set_multi(struct adapter *adapter)
2102{
2103	struct ifnet	*ifp = adapter->ifp;
2104	struct ifmultiaddr *ifma;
2105	u32 reg_rctl = 0;
2106	u8  *mta; /* Multicast array memory */
2107	int mcnt = 0;
2108
2109	IOCTL_DEBUGOUT("em_set_multi: begin");
2110
2111	mta = adapter->mta;
2112	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2113
2114	if (adapter->hw.mac.type == e1000_82542 &&
2115	    adapter->hw.revision_id == E1000_REVISION_2) {
2116		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2117		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2118			e1000_pci_clear_mwi(&adapter->hw);
2119		reg_rctl |= E1000_RCTL_RST;
2120		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2121		msec_delay(5);
2122	}
2123
2124#if __FreeBSD_version < 800000
2125	IF_ADDR_LOCK(ifp);
2126#else
2127	if_maddr_rlock(ifp);
2128#endif
2129	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130		if (ifma->ifma_addr->sa_family != AF_LINK)
2131			continue;
2132
2133		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2134			break;
2135
2136		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2137		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2138		mcnt++;
2139	}
2140#if __FreeBSD_version < 800000
2141	IF_ADDR_UNLOCK(ifp);
2142#else
2143	if_maddr_runlock(ifp);
2144#endif
2145	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2146		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2147		reg_rctl |= E1000_RCTL_MPE;
2148		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2149	} else
2150		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2151
2152	if (adapter->hw.mac.type == e1000_82542 &&
2153	    adapter->hw.revision_id == E1000_REVISION_2) {
2154		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2155		reg_rctl &= ~E1000_RCTL_RST;
2156		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2157		msec_delay(5);
2158		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2159			e1000_pci_set_mwi(&adapter->hw);
2160	}
2161}
2162
2163
2164/*********************************************************************
2165 *  Timer routine
2166 *
2167 *  This routine checks for link status and updates statistics.
2168 *
2169 **********************************************************************/
2170
2171static void
2172em_local_timer(void *arg)
2173{
2174	struct adapter	*adapter = arg;
2175	struct ifnet	*ifp = adapter->ifp;
2176	struct tx_ring	*txr = adapter->tx_rings;
2177
2178	EM_CORE_LOCK_ASSERT(adapter);
2179
2180	em_update_link_status(adapter);
2181	em_update_stats_counters(adapter);
2182
2183	/* Reset LAA into RAR[0] on 82571 */
2184	if ((adapter->hw.mac.type == e1000_82571) &&
2185	    e1000_get_laa_state_82571(&adapter->hw))
2186		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2187
2188	/*
2189	** Don't do TX watchdog check if we've been paused
2190	*/
2191	if (adapter->pause_frames) {
2192		adapter->pause_frames = 0;
2193		goto out;
2194	}
2195	/*
2196	** Check on the state of the TX queue(s), this
2197	** can be done without the lock because its RO
2198	** and the HUNG state will be static if set.
2199	*/
2200	for (int i = 0; i < adapter->num_queues; i++, txr++)
2201		if (txr->queue_status == EM_QUEUE_HUNG)
2202			goto hung;
2203out:
2204	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2205	return;
2206hung:
2207	/* Looks like we're hung */
2208	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2209	device_printf(adapter->dev,
2210	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2211	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2212	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2213	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2214	    "Next TX to Clean = %d\n",
2215	    txr->me, txr->tx_avail, txr->next_to_clean);
2216	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2217	adapter->watchdog_events++;
2218	em_init_locked(adapter);
2219}
2220
2221
2222static void
2223em_update_link_status(struct adapter *adapter)
2224{
2225	struct e1000_hw *hw = &adapter->hw;
2226	struct ifnet *ifp = adapter->ifp;
2227	device_t dev = adapter->dev;
2228	struct tx_ring *txr = adapter->tx_rings;
2229	u32 link_check = 0;
2230
2231	/* Get the cached link value or read phy for real */
2232	switch (hw->phy.media_type) {
2233	case e1000_media_type_copper:
2234		if (hw->mac.get_link_status) {
2235			/* Do the work to read phy */
2236			e1000_check_for_link(hw);
2237			link_check = !hw->mac.get_link_status;
2238			if (link_check) /* ESB2 fix */
2239				e1000_cfg_on_link_up(hw);
2240		} else
2241			link_check = TRUE;
2242		break;
2243	case e1000_media_type_fiber:
2244		e1000_check_for_link(hw);
2245		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2246                                 E1000_STATUS_LU);
2247		break;
2248	case e1000_media_type_internal_serdes:
2249		e1000_check_for_link(hw);
2250		link_check = adapter->hw.mac.serdes_has_link;
2251		break;
2252	default:
2253	case e1000_media_type_unknown:
2254		break;
2255	}
2256
2257	/* Now check for a transition */
2258	if (link_check && (adapter->link_active == 0)) {
2259		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2260		    &adapter->link_duplex);
2261		/* Check if we must disable SPEED_MODE bit on PCI-E */
2262		if ((adapter->link_speed != SPEED_1000) &&
2263		    ((hw->mac.type == e1000_82571) ||
2264		    (hw->mac.type == e1000_82572))) {
2265			int tarc0;
2266			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2267			tarc0 &= ~SPEED_MODE_BIT;
2268			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2269		}
2270		if (bootverbose)
2271			device_printf(dev, "Link is up %d Mbps %s\n",
2272			    adapter->link_speed,
2273			    ((adapter->link_duplex == FULL_DUPLEX) ?
2274			    "Full Duplex" : "Half Duplex"));
2275		adapter->link_active = 1;
2276		adapter->smartspeed = 0;
2277		ifp->if_baudrate = adapter->link_speed * 1000000;
2278		if_link_state_change(ifp, LINK_STATE_UP);
2279	} else if (!link_check && (adapter->link_active == 1)) {
2280		ifp->if_baudrate = adapter->link_speed = 0;
2281		adapter->link_duplex = 0;
2282		if (bootverbose)
2283			device_printf(dev, "Link is Down\n");
2284		adapter->link_active = 0;
2285		/* Link down, disable watchdog */
2286		for (int i = 0; i < adapter->num_queues; i++, txr++)
2287			txr->queue_status = EM_QUEUE_IDLE;
2288		if_link_state_change(ifp, LINK_STATE_DOWN);
2289	}
2290}
2291
2292/*********************************************************************
2293 *
2294 *  This routine disables all traffic on the adapter by issuing a
2295 *  global reset on the MAC and deallocates TX/RX buffers.
2296 *
2297 *  This routine should always be called with BOTH the CORE
2298 *  and TX locks.
2299 **********************************************************************/
2300
2301static void
2302em_stop(void *arg)
2303{
2304	struct adapter	*adapter = arg;
2305	struct ifnet	*ifp = adapter->ifp;
2306	struct tx_ring	*txr = adapter->tx_rings;
2307
2308	EM_CORE_LOCK_ASSERT(adapter);
2309
2310	INIT_DEBUGOUT("em_stop: begin");
2311
2312	em_disable_intr(adapter);
2313	callout_stop(&adapter->timer);
2314
2315	/* Tell the stack that the interface is no longer active */
2316	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2317
2318        /* Unarm watchdog timer. */
2319	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2320		EM_TX_LOCK(txr);
2321		txr->queue_status = EM_QUEUE_IDLE;
2322		EM_TX_UNLOCK(txr);
2323	}
2324
2325	e1000_reset_hw(&adapter->hw);
2326	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2327
2328	e1000_led_off(&adapter->hw);
2329	e1000_cleanup_led(&adapter->hw);
2330}
2331
2332
2333/*********************************************************************
2334 *
2335 *  Determine hardware revision.
2336 *
2337 **********************************************************************/
2338static void
2339em_identify_hardware(struct adapter *adapter)
2340{
2341	device_t dev = adapter->dev;
2342
2343	/* Make sure our PCI config space has the necessary stuff set */
2344	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2345	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2346	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2347		device_printf(dev, "Memory Access and/or Bus Master bits "
2348		    "were not set!\n");
2349		adapter->hw.bus.pci_cmd_word |=
2350		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2351		pci_write_config(dev, PCIR_COMMAND,
2352		    adapter->hw.bus.pci_cmd_word, 2);
2353	}
2354
2355	/* Save off the information about this board */
2356	adapter->hw.vendor_id = pci_get_vendor(dev);
2357	adapter->hw.device_id = pci_get_device(dev);
2358	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2359	adapter->hw.subsystem_vendor_id =
2360	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2361	adapter->hw.subsystem_device_id =
2362	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2363
2364	/* Do Shared Code Init and Setup */
2365	if (e1000_set_mac_type(&adapter->hw)) {
2366		device_printf(dev, "Setup init failure\n");
2367		return;
2368	}
2369}
2370
2371static int
2372em_allocate_pci_resources(struct adapter *adapter)
2373{
2374	device_t	dev = adapter->dev;
2375	int		rid;
2376
2377	rid = PCIR_BAR(0);
2378	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2379	    &rid, RF_ACTIVE);
2380	if (adapter->memory == NULL) {
2381		device_printf(dev, "Unable to allocate bus resource: memory\n");
2382		return (ENXIO);
2383	}
2384	adapter->osdep.mem_bus_space_tag =
2385	    rman_get_bustag(adapter->memory);
2386	adapter->osdep.mem_bus_space_handle =
2387	    rman_get_bushandle(adapter->memory);
2388	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2389
2390	/* Default to a single queue */
2391	adapter->num_queues = 1;
2392
2393	/*
2394	 * Setup MSI/X or MSI if PCI Express
2395	 */
2396	adapter->msix = em_setup_msix(adapter);
2397
2398	adapter->hw.back = &adapter->osdep;
2399
2400	return (0);
2401}
2402
2403/*********************************************************************
2404 *
2405 *  Setup the Legacy or MSI Interrupt handler
2406 *
2407 **********************************************************************/
2408int
2409em_allocate_legacy(struct adapter *adapter)
2410{
2411	device_t dev = adapter->dev;
2412	int error, rid = 0;
2413
2414	/* Manually turn off all interrupts */
2415	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2416
2417	if (adapter->msix == 1) /* using MSI */
2418		rid = 1;
2419	/* We allocate a single interrupt resource */
2420	adapter->res = bus_alloc_resource_any(dev,
2421	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2422	if (adapter->res == NULL) {
2423		device_printf(dev, "Unable to allocate bus resource: "
2424		    "interrupt\n");
2425		return (ENXIO);
2426	}
2427
2428	/*
2429	 * Allocate a fast interrupt and the associated
2430	 * deferred processing contexts.
2431	 */
2432	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2433	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2434	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2435	    taskqueue_thread_enqueue, &adapter->tq);
2436	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2437	    device_get_nameunit(adapter->dev));
2438	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2439	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2440		device_printf(dev, "Failed to register fast interrupt "
2441			    "handler: %d\n", error);
2442		taskqueue_free(adapter->tq);
2443		adapter->tq = NULL;
2444		return (error);
2445	}
2446
2447	return (0);
2448}
2449
2450/*********************************************************************
2451 *
2452 *  Setup the MSIX Interrupt handlers
2453 *   This is not really Multiqueue, rather
2454 *   its just multiple interrupt vectors.
2455 *
2456 **********************************************************************/
2457int
2458em_allocate_msix(struct adapter *adapter)
2459{
2460	device_t	dev = adapter->dev;
2461	struct		tx_ring *txr = adapter->tx_rings;
2462	struct		rx_ring *rxr = adapter->rx_rings;
2463	int		error, rid, vector = 0;
2464
2465
2466	/* Make sure all interrupts are disabled */
2467	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2468
2469	/* First set up ring resources */
2470	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2471
2472		/* RX ring */
2473		rid = vector + 1;
2474
2475		rxr->res = bus_alloc_resource_any(dev,
2476		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2477		if (rxr->res == NULL) {
2478			device_printf(dev,
2479			    "Unable to allocate bus resource: "
2480			    "RX MSIX Interrupt %d\n", i);
2481			return (ENXIO);
2482		}
2483		if ((error = bus_setup_intr(dev, rxr->res,
2484		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2485		    rxr, &rxr->tag)) != 0) {
2486			device_printf(dev, "Failed to register RX handler");
2487			return (error);
2488		}
2489#if __FreeBSD_version >= 800504
2490		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2491#endif
2492		rxr->msix = vector++; /* NOTE increment vector for TX */
2493		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2494		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2495		    taskqueue_thread_enqueue, &rxr->tq);
2496		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2497		    device_get_nameunit(adapter->dev));
2498		/*
2499		** Set the bit to enable interrupt
2500		** in E1000_IMS -- bits 20 and 21
2501		** are for RX0 and RX1, note this has
2502		** NOTHING to do with the MSIX vector
2503		*/
2504		rxr->ims = 1 << (20 + i);
2505		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2506
2507		/* TX ring */
2508		rid = vector + 1;
2509		txr->res = bus_alloc_resource_any(dev,
2510		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2511		if (txr->res == NULL) {
2512			device_printf(dev,
2513			    "Unable to allocate bus resource: "
2514			    "TX MSIX Interrupt %d\n", i);
2515			return (ENXIO);
2516		}
2517		if ((error = bus_setup_intr(dev, txr->res,
2518		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2519		    txr, &txr->tag)) != 0) {
2520			device_printf(dev, "Failed to register TX handler");
2521			return (error);
2522		}
2523#if __FreeBSD_version >= 800504
2524		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2525#endif
2526		txr->msix = vector++; /* Increment vector for next pass */
2527		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2528		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2529		    taskqueue_thread_enqueue, &txr->tq);
2530		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2531		    device_get_nameunit(adapter->dev));
2532		/*
2533		** Set the bit to enable interrupt
2534		** in E1000_IMS -- bits 22 and 23
2535		** are for TX0 and TX1, note this has
2536		** NOTHING to do with the MSIX vector
2537		*/
2538		txr->ims = 1 << (22 + i);
2539		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2540	}
2541
2542	/* Link interrupt */
2543	++rid;
2544	adapter->res = bus_alloc_resource_any(dev,
2545	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2546	if (!adapter->res) {
2547		device_printf(dev,"Unable to allocate "
2548		    "bus resource: Link interrupt [%d]\n", rid);
2549		return (ENXIO);
2550        }
2551	/* Set the link handler function */
2552	error = bus_setup_intr(dev, adapter->res,
2553	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2554	    em_msix_link, adapter, &adapter->tag);
2555	if (error) {
2556		adapter->res = NULL;
2557		device_printf(dev, "Failed to register LINK handler");
2558		return (error);
2559	}
2560#if __FreeBSD_version >= 800504
2561		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2562#endif
2563	adapter->linkvec = vector;
2564	adapter->ivars |=  (8 | vector) << 16;
2565	adapter->ivars |= 0x80000000;
2566
2567	return (0);
2568}
2569
2570
2571static void
2572em_free_pci_resources(struct adapter *adapter)
2573{
2574	device_t	dev = adapter->dev;
2575	struct tx_ring	*txr;
2576	struct rx_ring	*rxr;
2577	int		rid;
2578
2579
2580	/*
2581	** Release all the queue interrupt resources:
2582	*/
2583	for (int i = 0; i < adapter->num_queues; i++) {
2584		txr = &adapter->tx_rings[i];
2585		rxr = &adapter->rx_rings[i];
2586		/* an early abort? */
2587		if ((txr == NULL) || (rxr == NULL))
2588			break;
2589		rid = txr->msix +1;
2590		if (txr->tag != NULL) {
2591			bus_teardown_intr(dev, txr->res, txr->tag);
2592			txr->tag = NULL;
2593		}
2594		if (txr->res != NULL)
2595			bus_release_resource(dev, SYS_RES_IRQ,
2596			    rid, txr->res);
2597		rid = rxr->msix +1;
2598		if (rxr->tag != NULL) {
2599			bus_teardown_intr(dev, rxr->res, rxr->tag);
2600			rxr->tag = NULL;
2601		}
2602		if (rxr->res != NULL)
2603			bus_release_resource(dev, SYS_RES_IRQ,
2604			    rid, rxr->res);
2605	}
2606
2607        if (adapter->linkvec) /* we are doing MSIX */
2608                rid = adapter->linkvec + 1;
2609        else
2610                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2611
2612	if (adapter->tag != NULL) {
2613		bus_teardown_intr(dev, adapter->res, adapter->tag);
2614		adapter->tag = NULL;
2615	}
2616
2617	if (adapter->res != NULL)
2618		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2619
2620
2621	if (adapter->msix)
2622		pci_release_msi(dev);
2623
2624	if (adapter->msix_mem != NULL)
2625		bus_release_resource(dev, SYS_RES_MEMORY,
2626		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2627
2628	if (adapter->memory != NULL)
2629		bus_release_resource(dev, SYS_RES_MEMORY,
2630		    PCIR_BAR(0), adapter->memory);
2631
2632	if (adapter->flash != NULL)
2633		bus_release_resource(dev, SYS_RES_MEMORY,
2634		    EM_FLASH, adapter->flash);
2635}
2636
2637/*
2638 * Setup MSI or MSI/X
2639 */
2640static int
2641em_setup_msix(struct adapter *adapter)
2642{
2643	device_t dev = adapter->dev;
2644	int val = 0;
2645
2646
2647	/*
2648	** Setup MSI/X for Hartwell: tests have shown
2649	** use of two queues to be unstable, and to
2650	** provide no great gain anyway, so we simply
2651	** seperate the interrupts and use a single queue.
2652	*/
2653	if ((adapter->hw.mac.type == e1000_82574) &&
2654	    (em_enable_msix == TRUE)) {
2655		/* Map the MSIX BAR */
2656		int rid = PCIR_BAR(EM_MSIX_BAR);
2657		adapter->msix_mem = bus_alloc_resource_any(dev,
2658		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2659       		if (!adapter->msix_mem) {
2660			/* May not be enabled */
2661               		device_printf(adapter->dev,
2662			    "Unable to map MSIX table \n");
2663			goto msi;
2664       		}
2665		val = pci_msix_count(dev);
2666		if (val < 3) {
2667			bus_release_resource(dev, SYS_RES_MEMORY,
2668			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2669			adapter->msix_mem = NULL;
2670               		device_printf(adapter->dev,
2671			    "MSIX: insufficient vectors, using MSI\n");
2672			goto msi;
2673		}
2674		val = 3;
2675		adapter->num_queues = 1;
2676		if (pci_alloc_msix(dev, &val) == 0) {
2677			device_printf(adapter->dev,
2678			    "Using MSIX interrupts "
2679			    "with %d vectors\n", val);
2680		}
2681
2682		return (val);
2683	}
2684msi:
2685       	val = pci_msi_count(dev);
2686       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2687               	adapter->msix = 1;
2688               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2689		return (val);
2690	}
2691	/* Should only happen due to manual configuration */
2692	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2693	return (0);
2694}
2695
2696
2697/*********************************************************************
2698 *
2699 *  Initialize the hardware to a configuration
2700 *  as specified by the adapter structure.
2701 *
2702 **********************************************************************/
2703static void
2704em_reset(struct adapter *adapter)
2705{
2706	device_t	dev = adapter->dev;
2707	struct ifnet	*ifp = adapter->ifp;
2708	struct e1000_hw	*hw = &adapter->hw;
2709	u16		rx_buffer_size;
2710
2711	INIT_DEBUGOUT("em_reset: begin");
2712
2713	/* Set up smart power down as default off on newer adapters. */
2714	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2715	    hw->mac.type == e1000_82572)) {
2716		u16 phy_tmp = 0;
2717
2718		/* Speed up time to link by disabling smart power down. */
2719		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2720		phy_tmp &= ~IGP02E1000_PM_SPD;
2721		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2722	}
2723
2724	/*
2725	 * These parameters control the automatic generation (Tx) and
2726	 * response (Rx) to Ethernet PAUSE frames.
2727	 * - High water mark should allow for at least two frames to be
2728	 *   received after sending an XOFF.
2729	 * - Low water mark works best when it is very near the high water mark.
2730	 *   This allows the receiver to restart by sending XON when it has
2731	 *   drained a bit. Here we use an arbitary value of 1500 which will
2732	 *   restart after one full frame is pulled from the buffer. There
2733	 *   could be several smaller frames in the buffer and if so they will
2734	 *   not trigger the XON until their total number reduces the buffer
2735	 *   by 1500.
2736	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2737	 */
2738	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2739
2740	hw->fc.high_water = rx_buffer_size -
2741	    roundup2(adapter->max_frame_size, 1024);
2742	hw->fc.low_water = hw->fc.high_water - 1500;
2743
2744	if (hw->mac.type == e1000_80003es2lan)
2745		hw->fc.pause_time = 0xFFFF;
2746	else
2747		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2748
2749	hw->fc.send_xon = TRUE;
2750
2751        /* Set Flow control, use the tunable location if sane */
2752	hw->fc.requested_mode = adapter->fc_setting;
2753
2754	/* Workaround: no TX flow ctrl for PCH */
2755	if (hw->mac.type == e1000_pchlan)
2756                hw->fc.requested_mode = e1000_fc_rx_pause;
2757
2758	/* Override - settings for PCH2LAN, ya its magic :) */
2759	if (hw->mac.type == e1000_pch2lan) {
2760		hw->fc.high_water = 0x5C20;
2761		hw->fc.low_water = 0x5048;
2762		hw->fc.pause_time = 0x0650;
2763		hw->fc.refresh_time = 0x0400;
2764		/* Jumbos need adjusted PBA */
2765		if (ifp->if_mtu > ETHERMTU)
2766			E1000_WRITE_REG(hw, E1000_PBA, 12);
2767		else
2768			E1000_WRITE_REG(hw, E1000_PBA, 26);
2769	}
2770
2771	/* Issue a global reset */
2772	e1000_reset_hw(hw);
2773	E1000_WRITE_REG(hw, E1000_WUC, 0);
2774	em_disable_aspm(adapter);
2775
2776	if (e1000_init_hw(hw) < 0) {
2777		device_printf(dev, "Hardware Initialization Failed\n");
2778		return;
2779	}
2780
2781	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2782	e1000_get_phy_info(hw);
2783	e1000_check_for_link(hw);
2784	return;
2785}
2786
2787/*********************************************************************
2788 *
2789 *  Setup networking device structure and register an interface.
2790 *
2791 **********************************************************************/
2792static int
2793em_setup_interface(device_t dev, struct adapter *adapter)
2794{
2795	struct ifnet   *ifp;
2796
2797	INIT_DEBUGOUT("em_setup_interface: begin");
2798
2799	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2800	if (ifp == NULL) {
2801		device_printf(dev, "can not allocate ifnet structure\n");
2802		return (-1);
2803	}
2804	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2805	ifp->if_mtu = ETHERMTU;
2806	ifp->if_init =  em_init;
2807	ifp->if_softc = adapter;
2808	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2809	ifp->if_ioctl = em_ioctl;
2810	ifp->if_start = em_start;
2811	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2812	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2813	IFQ_SET_READY(&ifp->if_snd);
2814
2815	ether_ifattach(ifp, adapter->hw.mac.addr);
2816
2817	ifp->if_capabilities = ifp->if_capenable = 0;
2818
2819#ifdef EM_MULTIQUEUE
2820	/* Multiqueue tx functions */
2821	ifp->if_transmit = em_mq_start;
2822	ifp->if_qflush = em_qflush;
2823#endif
2824
2825	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2826	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2827
2828	/* Enable TSO by default, can disable with ifconfig */
2829	ifp->if_capabilities |= IFCAP_TSO4;
2830	ifp->if_capenable |= IFCAP_TSO4;
2831
2832	/*
2833	 * Tell the upper layer(s) we
2834	 * support full VLAN capability
2835	 */
2836	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2837	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2838	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2839
2840	/*
2841	** Dont turn this on by default, if vlans are
2842	** created on another pseudo device (eg. lagg)
2843	** then vlan events are not passed thru, breaking
2844	** operation, but with HW FILTER off it works. If
2845	** using vlans directly on the em driver you can
2846	** enable this and get full hardware tag filtering.
2847	*/
2848	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2849
2850#ifdef DEVICE_POLLING
2851	ifp->if_capabilities |= IFCAP_POLLING;
2852#endif
2853
2854	/* Enable only WOL MAGIC by default */
2855	if (adapter->wol) {
2856		ifp->if_capabilities |= IFCAP_WOL;
2857		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2858	}
2859
2860	/*
2861	 * Specify the media types supported by this adapter and register
2862	 * callbacks to update media and link information
2863	 */
2864	ifmedia_init(&adapter->media, IFM_IMASK,
2865	    em_media_change, em_media_status);
2866	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2867	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2868		u_char fiber_type = IFM_1000_SX;	/* default type */
2869
2870		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2871			    0, NULL);
2872		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2873	} else {
2874		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2875		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2876			    0, NULL);
2877		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2878			    0, NULL);
2879		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2880			    0, NULL);
2881		if (adapter->hw.phy.type != e1000_phy_ife) {
2882			ifmedia_add(&adapter->media,
2883				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2884			ifmedia_add(&adapter->media,
2885				IFM_ETHER | IFM_1000_T, 0, NULL);
2886		}
2887	}
2888	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2889	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2890	return (0);
2891}
2892
2893
2894/*
2895 * Manage DMA'able memory.
2896 */
2897static void
2898em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2899{
2900	if (error)
2901		return;
2902	*(bus_addr_t *) arg = segs[0].ds_addr;
2903}
2904
2905static int
2906em_dma_malloc(struct adapter *adapter, bus_size_t size,
2907        struct em_dma_alloc *dma, int mapflags)
2908{
2909	int error;
2910
2911	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2912				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2913				BUS_SPACE_MAXADDR,	/* lowaddr */
2914				BUS_SPACE_MAXADDR,	/* highaddr */
2915				NULL, NULL,		/* filter, filterarg */
2916				size,			/* maxsize */
2917				1,			/* nsegments */
2918				size,			/* maxsegsize */
2919				0,			/* flags */
2920				NULL,			/* lockfunc */
2921				NULL,			/* lockarg */
2922				&dma->dma_tag);
2923	if (error) {
2924		device_printf(adapter->dev,
2925		    "%s: bus_dma_tag_create failed: %d\n",
2926		    __func__, error);
2927		goto fail_0;
2928	}
2929
2930	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2931	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2932	if (error) {
2933		device_printf(adapter->dev,
2934		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2935		    __func__, (uintmax_t)size, error);
2936		goto fail_2;
2937	}
2938
2939	dma->dma_paddr = 0;
2940	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2941	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2942	if (error || dma->dma_paddr == 0) {
2943		device_printf(adapter->dev,
2944		    "%s: bus_dmamap_load failed: %d\n",
2945		    __func__, error);
2946		goto fail_3;
2947	}
2948
2949	return (0);
2950
2951fail_3:
2952	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2953fail_2:
2954	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2955	bus_dma_tag_destroy(dma->dma_tag);
2956fail_0:
2957	dma->dma_map = NULL;
2958	dma->dma_tag = NULL;
2959
2960	return (error);
2961}
2962
2963static void
2964em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2965{
2966	if (dma->dma_tag == NULL)
2967		return;
2968	if (dma->dma_map != NULL) {
2969		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2970		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2971		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2972		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2973		dma->dma_map = NULL;
2974	}
2975	bus_dma_tag_destroy(dma->dma_tag);
2976	dma->dma_tag = NULL;
2977}
2978
2979
2980/*********************************************************************
2981 *
2982 *  Allocate memory for the transmit and receive rings, and then
2983 *  the descriptors associated with each, called only once at attach.
2984 *
2985 **********************************************************************/
2986static int
2987em_allocate_queues(struct adapter *adapter)
2988{
2989	device_t		dev = adapter->dev;
2990	struct tx_ring		*txr = NULL;
2991	struct rx_ring		*rxr = NULL;
2992	int rsize, tsize, error = E1000_SUCCESS;
2993	int txconf = 0, rxconf = 0;
2994
2995
2996	/* Allocate the TX ring struct memory */
2997	if (!(adapter->tx_rings =
2998	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2999	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3000		device_printf(dev, "Unable to allocate TX ring memory\n");
3001		error = ENOMEM;
3002		goto fail;
3003	}
3004
3005	/* Now allocate the RX */
3006	if (!(adapter->rx_rings =
3007	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3008	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3009		device_printf(dev, "Unable to allocate RX ring memory\n");
3010		error = ENOMEM;
3011		goto rx_fail;
3012	}
3013
3014	tsize = roundup2(adapter->num_tx_desc *
3015	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3016	/*
3017	 * Now set up the TX queues, txconf is needed to handle the
3018	 * possibility that things fail midcourse and we need to
3019	 * undo memory gracefully
3020	 */
3021	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3022		/* Set up some basics */
3023		txr = &adapter->tx_rings[i];
3024		txr->adapter = adapter;
3025		txr->me = i;
3026
3027		/* Initialize the TX lock */
3028		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3029		    device_get_nameunit(dev), txr->me);
3030		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3031
3032		if (em_dma_malloc(adapter, tsize,
3033			&txr->txdma, BUS_DMA_NOWAIT)) {
3034			device_printf(dev,
3035			    "Unable to allocate TX Descriptor memory\n");
3036			error = ENOMEM;
3037			goto err_tx_desc;
3038		}
3039		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3040		bzero((void *)txr->tx_base, tsize);
3041
3042        	if (em_allocate_transmit_buffers(txr)) {
3043			device_printf(dev,
3044			    "Critical Failure setting up transmit buffers\n");
3045			error = ENOMEM;
3046			goto err_tx_desc;
3047        	}
3048#if __FreeBSD_version >= 800000
3049		/* Allocate a buf ring */
3050		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3051		    M_WAITOK, &txr->tx_mtx);
3052#endif
3053	}
3054
3055	/*
3056	 * Next the RX queues...
3057	 */
3058	rsize = roundup2(adapter->num_rx_desc *
3059	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3060	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3061		rxr = &adapter->rx_rings[i];
3062		rxr->adapter = adapter;
3063		rxr->me = i;
3064
3065		/* Initialize the RX lock */
3066		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3067		    device_get_nameunit(dev), txr->me);
3068		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3069
3070		if (em_dma_malloc(adapter, rsize,
3071			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3072			device_printf(dev,
3073			    "Unable to allocate RxDescriptor memory\n");
3074			error = ENOMEM;
3075			goto err_rx_desc;
3076		}
3077		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3078		bzero((void *)rxr->rx_base, rsize);
3079
3080        	/* Allocate receive buffers for the ring*/
3081		if (em_allocate_receive_buffers(rxr)) {
3082			device_printf(dev,
3083			    "Critical Failure setting up receive buffers\n");
3084			error = ENOMEM;
3085			goto err_rx_desc;
3086		}
3087	}
3088
3089	return (0);
3090
3091err_rx_desc:
3092	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3093		em_dma_free(adapter, &rxr->rxdma);
3094err_tx_desc:
3095	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3096		em_dma_free(adapter, &txr->txdma);
3097	free(adapter->rx_rings, M_DEVBUF);
3098rx_fail:
3099#if __FreeBSD_version >= 800000
3100	buf_ring_free(txr->br, M_DEVBUF);
3101#endif
3102	free(adapter->tx_rings, M_DEVBUF);
3103fail:
3104	return (error);
3105}
3106
3107
3108/*********************************************************************
3109 *
3110 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3111 *  the information needed to transmit a packet on the wire. This is
3112 *  called only once at attach, setup is done every reset.
3113 *
3114 **********************************************************************/
3115static int
3116em_allocate_transmit_buffers(struct tx_ring *txr)
3117{
3118	struct adapter *adapter = txr->adapter;
3119	device_t dev = adapter->dev;
3120	struct em_buffer *txbuf;
3121	int error, i;
3122
3123	/*
3124	 * Setup DMA descriptor areas.
3125	 */
3126	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3127			       1, 0,			/* alignment, bounds */
3128			       BUS_SPACE_MAXADDR,	/* lowaddr */
3129			       BUS_SPACE_MAXADDR,	/* highaddr */
3130			       NULL, NULL,		/* filter, filterarg */
3131			       EM_TSO_SIZE,		/* maxsize */
3132			       EM_MAX_SCATTER,		/* nsegments */
3133			       PAGE_SIZE,		/* maxsegsize */
3134			       0,			/* flags */
3135			       NULL,			/* lockfunc */
3136			       NULL,			/* lockfuncarg */
3137			       &txr->txtag))) {
3138		device_printf(dev,"Unable to allocate TX DMA tag\n");
3139		goto fail;
3140	}
3141
3142	if (!(txr->tx_buffers =
3143	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3144	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3145		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3146		error = ENOMEM;
3147		goto fail;
3148	}
3149
3150        /* Create the descriptor buffer dma maps */
3151	txbuf = txr->tx_buffers;
3152	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3153		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3154		if (error != 0) {
3155			device_printf(dev, "Unable to create TX DMA map\n");
3156			goto fail;
3157		}
3158	}
3159
3160	return 0;
3161fail:
3162	/* We free all, it handles case where we are in the middle */
3163	em_free_transmit_structures(adapter);
3164	return (error);
3165}
3166
3167/*********************************************************************
3168 *
3169 *  Initialize a transmit ring.
3170 *
3171 **********************************************************************/
3172static void
3173em_setup_transmit_ring(struct tx_ring *txr)
3174{
3175	struct adapter *adapter = txr->adapter;
3176	struct em_buffer *txbuf;
3177	int i;
3178
3179	/* Clear the old descriptor contents */
3180	EM_TX_LOCK(txr);
3181	bzero((void *)txr->tx_base,
3182	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3183	/* Reset indices */
3184	txr->next_avail_desc = 0;
3185	txr->next_to_clean = 0;
3186
3187	/* Free any existing tx buffers. */
3188        txbuf = txr->tx_buffers;
3189	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3190		if (txbuf->m_head != NULL) {
3191			bus_dmamap_sync(txr->txtag, txbuf->map,
3192			    BUS_DMASYNC_POSTWRITE);
3193			bus_dmamap_unload(txr->txtag, txbuf->map);
3194			m_freem(txbuf->m_head);
3195			txbuf->m_head = NULL;
3196		}
3197		/* clear the watch index */
3198		txbuf->next_eop = -1;
3199        }
3200
3201	/* Set number of descriptors available */
3202	txr->tx_avail = adapter->num_tx_desc;
3203	txr->queue_status = EM_QUEUE_IDLE;
3204
3205	/* Clear checksum offload context. */
3206	txr->last_hw_offload = 0;
3207	txr->last_hw_ipcss = 0;
3208	txr->last_hw_ipcso = 0;
3209	txr->last_hw_tucss = 0;
3210	txr->last_hw_tucso = 0;
3211
3212	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3213	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3214	EM_TX_UNLOCK(txr);
3215}
3216
3217/*********************************************************************
3218 *
3219 *  Initialize all transmit rings.
3220 *
3221 **********************************************************************/
3222static void
3223em_setup_transmit_structures(struct adapter *adapter)
3224{
3225	struct tx_ring *txr = adapter->tx_rings;
3226
3227	for (int i = 0; i < adapter->num_queues; i++, txr++)
3228		em_setup_transmit_ring(txr);
3229
3230	return;
3231}
3232
3233/*********************************************************************
3234 *
3235 *  Enable transmit unit.
3236 *
3237 **********************************************************************/
3238static void
3239em_initialize_transmit_unit(struct adapter *adapter)
3240{
3241	struct tx_ring	*txr = adapter->tx_rings;
3242	struct e1000_hw	*hw = &adapter->hw;
3243	u32	tctl, tarc, tipg = 0;
3244
3245	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3246
3247	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3248		u64 bus_addr = txr->txdma.dma_paddr;
3249		/* Base and Len of TX Ring */
3250		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3251	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3252		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3253	    	    (u32)(bus_addr >> 32));
3254		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3255	    	    (u32)bus_addr);
3256		/* Init the HEAD/TAIL indices */
3257		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3258		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3259
3260		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3261		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3262		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3263
3264		txr->queue_status = EM_QUEUE_IDLE;
3265	}
3266
3267	/* Set the default values for the Tx Inter Packet Gap timer */
3268	switch (adapter->hw.mac.type) {
3269	case e1000_82542:
3270		tipg = DEFAULT_82542_TIPG_IPGT;
3271		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3272		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3273		break;
3274	case e1000_80003es2lan:
3275		tipg = DEFAULT_82543_TIPG_IPGR1;
3276		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3277		    E1000_TIPG_IPGR2_SHIFT;
3278		break;
3279	default:
3280		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3281		    (adapter->hw.phy.media_type ==
3282		    e1000_media_type_internal_serdes))
3283			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3284		else
3285			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3286		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3287		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3288	}
3289
3290	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3291	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3292
3293	if(adapter->hw.mac.type >= e1000_82540)
3294		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3295		    adapter->tx_abs_int_delay.value);
3296
3297	if ((adapter->hw.mac.type == e1000_82571) ||
3298	    (adapter->hw.mac.type == e1000_82572)) {
3299		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3300		tarc |= SPEED_MODE_BIT;
3301		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3302	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3303		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3304		tarc |= 1;
3305		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3306		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3307		tarc |= 1;
3308		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3309	}
3310
3311	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3312	if (adapter->tx_int_delay.value > 0)
3313		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3314
3315	/* Program the Transmit Control Register */
3316	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3317	tctl &= ~E1000_TCTL_CT;
3318	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3319		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3320
3321	if (adapter->hw.mac.type >= e1000_82571)
3322		tctl |= E1000_TCTL_MULR;
3323
3324	/* This write will effectively turn on the transmit unit. */
3325	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3326
3327}
3328
3329
3330/*********************************************************************
3331 *
3332 *  Free all transmit rings.
3333 *
3334 **********************************************************************/
3335static void
3336em_free_transmit_structures(struct adapter *adapter)
3337{
3338	struct tx_ring *txr = adapter->tx_rings;
3339
3340	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3341		EM_TX_LOCK(txr);
3342		em_free_transmit_buffers(txr);
3343		em_dma_free(adapter, &txr->txdma);
3344		EM_TX_UNLOCK(txr);
3345		EM_TX_LOCK_DESTROY(txr);
3346	}
3347
3348	free(adapter->tx_rings, M_DEVBUF);
3349}
3350
3351/*********************************************************************
3352 *
3353 *  Free transmit ring related data structures.
3354 *
3355 **********************************************************************/
3356static void
3357em_free_transmit_buffers(struct tx_ring *txr)
3358{
3359	struct adapter		*adapter = txr->adapter;
3360	struct em_buffer	*txbuf;
3361
3362	INIT_DEBUGOUT("free_transmit_ring: begin");
3363
3364	if (txr->tx_buffers == NULL)
3365		return;
3366
3367	for (int i = 0; i < adapter->num_tx_desc; i++) {
3368		txbuf = &txr->tx_buffers[i];
3369		if (txbuf->m_head != NULL) {
3370			bus_dmamap_sync(txr->txtag, txbuf->map,
3371			    BUS_DMASYNC_POSTWRITE);
3372			bus_dmamap_unload(txr->txtag,
3373			    txbuf->map);
3374			m_freem(txbuf->m_head);
3375			txbuf->m_head = NULL;
3376			if (txbuf->map != NULL) {
3377				bus_dmamap_destroy(txr->txtag,
3378				    txbuf->map);
3379				txbuf->map = NULL;
3380			}
3381		} else if (txbuf->map != NULL) {
3382			bus_dmamap_unload(txr->txtag,
3383			    txbuf->map);
3384			bus_dmamap_destroy(txr->txtag,
3385			    txbuf->map);
3386			txbuf->map = NULL;
3387		}
3388	}
3389#if __FreeBSD_version >= 800000
3390	if (txr->br != NULL)
3391		buf_ring_free(txr->br, M_DEVBUF);
3392#endif
3393	if (txr->tx_buffers != NULL) {
3394		free(txr->tx_buffers, M_DEVBUF);
3395		txr->tx_buffers = NULL;
3396	}
3397	if (txr->txtag != NULL) {
3398		bus_dma_tag_destroy(txr->txtag);
3399		txr->txtag = NULL;
3400	}
3401	return;
3402}
3403
3404
3405/*********************************************************************
3406 *  The offload context is protocol specific (TCP/UDP) and thus
3407 *  only needs to be set when the protocol changes. The occasion
3408 *  of a context change can be a performance detriment, and
3409 *  might be better just disabled. The reason arises in the way
3410 *  in which the controller supports pipelined requests from the
3411 *  Tx data DMA. Up to four requests can be pipelined, and they may
3412 *  belong to the same packet or to multiple packets. However all
3413 *  requests for one packet are issued before a request is issued
3414 *  for a subsequent packet and if a request for the next packet
3415 *  requires a context change, that request will be stalled
3416 *  until the previous request completes. This means setting up
3417 *  a new context effectively disables pipelined Tx data DMA which
3418 *  in turn greatly slow down performance to send small sized
3419 *  frames.
3420 **********************************************************************/
3421static void
3422em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3423    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3424{
3425	struct adapter			*adapter = txr->adapter;
3426	struct e1000_context_desc	*TXD = NULL;
3427	struct em_buffer		*tx_buffer;
3428	int				cur, hdr_len;
3429	u32				cmd = 0;
3430	u16				offload = 0;
3431	u8				ipcso, ipcss, tucso, tucss;
3432
3433	ipcss = ipcso = tucss = tucso = 0;
3434	hdr_len = ip_off + (ip->ip_hl << 2);
3435	cur = txr->next_avail_desc;
3436
3437	/* Setup of IP header checksum. */
3438	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3439		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3440		offload |= CSUM_IP;
3441		ipcss = ip_off;
3442		ipcso = ip_off + offsetof(struct ip, ip_sum);
3443		/*
3444		 * Start offset for header checksum calculation.
3445		 * End offset for header checksum calculation.
3446		 * Offset of place to put the checksum.
3447		 */
3448		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3449		TXD->lower_setup.ip_fields.ipcss = ipcss;
3450		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3451		TXD->lower_setup.ip_fields.ipcso = ipcso;
3452		cmd |= E1000_TXD_CMD_IP;
3453	}
3454
3455	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3456 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3457 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3458 		offload |= CSUM_TCP;
3459 		tucss = hdr_len;
3460 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3461 		/*
3462 		 * Setting up new checksum offload context for every frames
3463 		 * takes a lot of processing time for hardware. This also
3464 		 * reduces performance a lot for small sized frames so avoid
3465 		 * it if driver can use previously configured checksum
3466 		 * offload context.
3467 		 */
3468 		if (txr->last_hw_offload == offload) {
3469 			if (offload & CSUM_IP) {
3470 				if (txr->last_hw_ipcss == ipcss &&
3471 				    txr->last_hw_ipcso == ipcso &&
3472 				    txr->last_hw_tucss == tucss &&
3473 				    txr->last_hw_tucso == tucso)
3474 					return;
3475 			} else {
3476 				if (txr->last_hw_tucss == tucss &&
3477 				    txr->last_hw_tucso == tucso)
3478 					return;
3479 			}
3480  		}
3481 		txr->last_hw_offload = offload;
3482 		txr->last_hw_tucss = tucss;
3483 		txr->last_hw_tucso = tucso;
3484 		/*
3485 		 * Start offset for payload checksum calculation.
3486 		 * End offset for payload checksum calculation.
3487 		 * Offset of place to put the checksum.
3488 		 */
3489		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3490 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3491 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3492 		TXD->upper_setup.tcp_fields.tucso = tucso;
3493 		cmd |= E1000_TXD_CMD_TCP;
3494 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3495 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3496 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3497 		tucss = hdr_len;
3498 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3499 		/*
3500 		 * Setting up new checksum offload context for every frames
3501 		 * takes a lot of processing time for hardware. This also
3502 		 * reduces performance a lot for small sized frames so avoid
3503 		 * it if driver can use previously configured checksum
3504 		 * offload context.
3505 		 */
3506 		if (txr->last_hw_offload == offload) {
3507 			if (offload & CSUM_IP) {
3508 				if (txr->last_hw_ipcss == ipcss &&
3509 				    txr->last_hw_ipcso == ipcso &&
3510 				    txr->last_hw_tucss == tucss &&
3511 				    txr->last_hw_tucso == tucso)
3512 					return;
3513 			} else {
3514 				if (txr->last_hw_tucss == tucss &&
3515 				    txr->last_hw_tucso == tucso)
3516 					return;
3517 			}
3518 		}
3519 		txr->last_hw_offload = offload;
3520 		txr->last_hw_tucss = tucss;
3521 		txr->last_hw_tucso = tucso;
3522 		/*
3523 		 * Start offset for header checksum calculation.
3524 		 * End offset for header checksum calculation.
3525 		 * Offset of place to put the checksum.
3526 		 */
3527		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3528 		TXD->upper_setup.tcp_fields.tucss = tucss;
3529 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3530 		TXD->upper_setup.tcp_fields.tucso = tucso;
3531  	}
3532
3533 	if (offload & CSUM_IP) {
3534 		txr->last_hw_ipcss = ipcss;
3535 		txr->last_hw_ipcso = ipcso;
3536  	}
3537
3538	TXD->tcp_seg_setup.data = htole32(0);
3539	TXD->cmd_and_length =
3540	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3541	tx_buffer = &txr->tx_buffers[cur];
3542	tx_buffer->m_head = NULL;
3543	tx_buffer->next_eop = -1;
3544
3545	if (++cur == adapter->num_tx_desc)
3546		cur = 0;
3547
3548	txr->tx_avail--;
3549	txr->next_avail_desc = cur;
3550}
3551
3552
3553/**********************************************************************
3554 *
3555 *  Setup work for hardware segmentation offload (TSO)
3556 *
3557 **********************************************************************/
3558static void
3559em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3560    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3561{
3562	struct adapter			*adapter = txr->adapter;
3563	struct e1000_context_desc	*TXD;
3564	struct em_buffer		*tx_buffer;
3565	int cur, hdr_len;
3566
3567	/*
3568	 * In theory we can use the same TSO context if and only if
3569	 * frame is the same type(IP/TCP) and the same MSS. However
3570	 * checking whether a frame has the same IP/TCP structure is
3571	 * hard thing so just ignore that and always restablish a
3572	 * new TSO context.
3573	 */
3574	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3575	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3576		      E1000_TXD_DTYP_D |	/* Data descr type */
3577		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3578
3579	/* IP and/or TCP header checksum calculation and insertion. */
3580	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3581
3582	cur = txr->next_avail_desc;
3583	tx_buffer = &txr->tx_buffers[cur];
3584	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3585
3586	/*
3587	 * Start offset for header checksum calculation.
3588	 * End offset for header checksum calculation.
3589	 * Offset of place put the checksum.
3590	 */
3591	TXD->lower_setup.ip_fields.ipcss = ip_off;
3592	TXD->lower_setup.ip_fields.ipcse =
3593	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3594	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3595	/*
3596	 * Start offset for payload checksum calculation.
3597	 * End offset for payload checksum calculation.
3598	 * Offset of place to put the checksum.
3599	 */
3600	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3601	TXD->upper_setup.tcp_fields.tucse = 0;
3602	TXD->upper_setup.tcp_fields.tucso =
3603	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3604	/*
3605	 * Payload size per packet w/o any headers.
3606	 * Length of all headers up to payload.
3607	 */
3608	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3609	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3610
3611	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3612				E1000_TXD_CMD_DEXT |	/* Extended descr */
3613				E1000_TXD_CMD_TSE |	/* TSE context */
3614				E1000_TXD_CMD_IP |	/* Do IP csum */
3615				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3616				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3617
3618	tx_buffer->m_head = NULL;
3619	tx_buffer->next_eop = -1;
3620
3621	if (++cur == adapter->num_tx_desc)
3622		cur = 0;
3623
3624	txr->tx_avail--;
3625	txr->next_avail_desc = cur;
3626	txr->tx_tso = TRUE;
3627}
3628
3629
3630/**********************************************************************
3631 *
3632 *  Examine each tx_buffer in the used queue. If the hardware is done
3633 *  processing the packet then free associated resources. The
3634 *  tx_buffer is put back on the free queue.
3635 *
3636 **********************************************************************/
3637static bool
3638em_txeof(struct tx_ring *txr)
3639{
3640	struct adapter	*adapter = txr->adapter;
3641        int first, last, done, processed;
3642        struct em_buffer *tx_buffer;
3643        struct e1000_tx_desc   *tx_desc, *eop_desc;
3644	struct ifnet   *ifp = adapter->ifp;
3645
3646	EM_TX_LOCK_ASSERT(txr);
3647
3648	/* No work, make sure watchdog is off */
3649        if (txr->tx_avail == adapter->num_tx_desc) {
3650		txr->queue_status = EM_QUEUE_IDLE;
3651                return (FALSE);
3652	}
3653
3654	processed = 0;
3655        first = txr->next_to_clean;
3656        tx_desc = &txr->tx_base[first];
3657        tx_buffer = &txr->tx_buffers[first];
3658	last = tx_buffer->next_eop;
3659        eop_desc = &txr->tx_base[last];
3660
3661	/*
3662	 * What this does is get the index of the
3663	 * first descriptor AFTER the EOP of the
3664	 * first packet, that way we can do the
3665	 * simple comparison on the inner while loop.
3666	 */
3667	if (++last == adapter->num_tx_desc)
3668 		last = 0;
3669	done = last;
3670
3671        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3672            BUS_DMASYNC_POSTREAD);
3673
3674        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3675		/* We clean the range of the packet */
3676		while (first != done) {
3677                	tx_desc->upper.data = 0;
3678                	tx_desc->lower.data = 0;
3679                	tx_desc->buffer_addr = 0;
3680                	++txr->tx_avail;
3681			++processed;
3682
3683			if (tx_buffer->m_head) {
3684				bus_dmamap_sync(txr->txtag,
3685				    tx_buffer->map,
3686				    BUS_DMASYNC_POSTWRITE);
3687				bus_dmamap_unload(txr->txtag,
3688				    tx_buffer->map);
3689                        	m_freem(tx_buffer->m_head);
3690                        	tx_buffer->m_head = NULL;
3691                	}
3692			tx_buffer->next_eop = -1;
3693			txr->watchdog_time = ticks;
3694
3695	                if (++first == adapter->num_tx_desc)
3696				first = 0;
3697
3698	                tx_buffer = &txr->tx_buffers[first];
3699			tx_desc = &txr->tx_base[first];
3700		}
3701		++ifp->if_opackets;
3702		/* See if we can continue to the next packet */
3703		last = tx_buffer->next_eop;
3704		if (last != -1) {
3705        		eop_desc = &txr->tx_base[last];
3706			/* Get new done point */
3707			if (++last == adapter->num_tx_desc) last = 0;
3708			done = last;
3709		} else
3710			break;
3711        }
3712        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3713            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3714
3715        txr->next_to_clean = first;
3716
3717	/*
3718	** Watchdog calculation, we know there's
3719	** work outstanding or the first return
3720	** would have been taken, so none processed
3721	** for too long indicates a hang. local timer
3722	** will examine this and do a reset if needed.
3723	*/
3724	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3725		txr->queue_status = EM_QUEUE_HUNG;
3726
3727        /*
3728         * If we have enough room, clear IFF_DRV_OACTIVE
3729         * to tell the stack that it is OK to send packets.
3730         */
3731        if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3732                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3733		/* Disable watchdog if all clean */
3734                if (txr->tx_avail == adapter->num_tx_desc) {
3735			txr->queue_status = EM_QUEUE_IDLE;
3736			return (FALSE);
3737		}
3738        }
3739
3740	return (TRUE);
3741}
3742
3743
3744/*********************************************************************
3745 *
3746 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3747 *
3748 **********************************************************************/
3749static void
3750em_refresh_mbufs(struct rx_ring *rxr, int limit)
3751{
3752	struct adapter		*adapter = rxr->adapter;
3753	struct mbuf		*m;
3754	bus_dma_segment_t	segs[1];
3755	struct em_buffer	*rxbuf;
3756	int			i, error, nsegs, cleaned;
3757
3758	i = rxr->next_to_refresh;
3759	cleaned = -1;
3760	while (i != limit) {
3761		rxbuf = &rxr->rx_buffers[i];
3762		if (rxbuf->m_head == NULL) {
3763			m = m_getjcl(M_DONTWAIT, MT_DATA,
3764			    M_PKTHDR, adapter->rx_mbuf_sz);
3765			/*
3766			** If we have a temporary resource shortage
3767			** that causes a failure, just abort refresh
3768			** for now, we will return to this point when
3769			** reinvoked from em_rxeof.
3770			*/
3771			if (m == NULL)
3772				goto update;
3773		} else
3774			m = rxbuf->m_head;
3775
3776		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3777		m->m_flags |= M_PKTHDR;
3778		m->m_data = m->m_ext.ext_buf;
3779
3780		/* Use bus_dma machinery to setup the memory mapping  */
3781		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3782		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3783		if (error != 0) {
3784			printf("Refresh mbufs: hdr dmamap load"
3785			    " failure - %d\n", error);
3786			m_free(m);
3787			rxbuf->m_head = NULL;
3788			goto update;
3789		}
3790		rxbuf->m_head = m;
3791		bus_dmamap_sync(rxr->rxtag,
3792		    rxbuf->map, BUS_DMASYNC_PREREAD);
3793		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3794
3795		cleaned = i;
3796		/* Calculate next index */
3797		if (++i == adapter->num_rx_desc)
3798			i = 0;
3799		rxr->next_to_refresh = i;
3800	}
3801update:
3802	/*
3803	** Update the tail pointer only if,
3804	** and as far as we have refreshed.
3805	*/
3806	if (cleaned != -1) /* Update tail index */
3807		E1000_WRITE_REG(&adapter->hw,
3808		    E1000_RDT(rxr->me), cleaned);
3809
3810	return;
3811}
3812
3813
3814/*********************************************************************
3815 *
3816 *  Allocate memory for rx_buffer structures. Since we use one
3817 *  rx_buffer per received packet, the maximum number of rx_buffer's
3818 *  that we'll need is equal to the number of receive descriptors
3819 *  that we've allocated.
3820 *
3821 **********************************************************************/
3822static int
3823em_allocate_receive_buffers(struct rx_ring *rxr)
3824{
3825	struct adapter		*adapter = rxr->adapter;
3826	device_t		dev = adapter->dev;
3827	struct em_buffer	*rxbuf;
3828	int			error;
3829
3830	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3831	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3832	if (rxr->rx_buffers == NULL) {
3833		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3834		return (ENOMEM);
3835	}
3836
3837	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3838				1, 0,			/* alignment, bounds */
3839				BUS_SPACE_MAXADDR,	/* lowaddr */
3840				BUS_SPACE_MAXADDR,	/* highaddr */
3841				NULL, NULL,		/* filter, filterarg */
3842				MJUM9BYTES,		/* maxsize */
3843				1,			/* nsegments */
3844				MJUM9BYTES,		/* maxsegsize */
3845				0,			/* flags */
3846				NULL,			/* lockfunc */
3847				NULL,			/* lockarg */
3848				&rxr->rxtag);
3849	if (error) {
3850		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3851		    __func__, error);
3852		goto fail;
3853	}
3854
3855	rxbuf = rxr->rx_buffers;
3856	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3857		rxbuf = &rxr->rx_buffers[i];
3858		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3859		    &rxbuf->map);
3860		if (error) {
3861			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3862			    __func__, error);
3863			goto fail;
3864		}
3865	}
3866
3867	return (0);
3868
3869fail:
3870	em_free_receive_structures(adapter);
3871	return (error);
3872}
3873
3874
3875/*********************************************************************
3876 *
3877 *  Initialize a receive ring and its buffers.
3878 *
3879 **********************************************************************/
3880static int
3881em_setup_receive_ring(struct rx_ring *rxr)
3882{
3883	struct	adapter 	*adapter = rxr->adapter;
3884	struct em_buffer	*rxbuf;
3885	bus_dma_segment_t	seg[1];
3886	int			rsize, nsegs, error;
3887
3888
3889	/* Clear the ring contents */
3890	EM_RX_LOCK(rxr);
3891	rsize = roundup2(adapter->num_rx_desc *
3892	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3893	bzero((void *)rxr->rx_base, rsize);
3894
3895	/*
3896	** Free current RX buffer structs and their mbufs
3897	*/
3898	for (int i = 0; i < adapter->num_rx_desc; i++) {
3899		rxbuf = &rxr->rx_buffers[i];
3900		if (rxbuf->m_head != NULL) {
3901			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3902			    BUS_DMASYNC_POSTREAD);
3903			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3904			m_freem(rxbuf->m_head);
3905		}
3906	}
3907
3908	/* Now replenish the mbufs */
3909	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3910
3911		rxbuf = &rxr->rx_buffers[j];
3912		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3913		    M_PKTHDR, adapter->rx_mbuf_sz);
3914		if (rxbuf->m_head == NULL)
3915			return (ENOBUFS);
3916		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3917		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3918		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3919
3920		/* Get the memory mapping */
3921		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3922		    rxbuf->map, rxbuf->m_head, seg,
3923		    &nsegs, BUS_DMA_NOWAIT);
3924		if (error != 0) {
3925			m_freem(rxbuf->m_head);
3926			rxbuf->m_head = NULL;
3927			return (error);
3928		}
3929		bus_dmamap_sync(rxr->rxtag,
3930		    rxbuf->map, BUS_DMASYNC_PREREAD);
3931
3932		/* Update descriptor */
3933		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3934	}
3935
3936
3937	/* Setup our descriptor indices */
3938	rxr->next_to_check = 0;
3939	rxr->next_to_refresh = 0;
3940
3941	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3942	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3943
3944	EM_RX_UNLOCK(rxr);
3945	return (0);
3946}
3947
3948/*********************************************************************
3949 *
3950 *  Initialize all receive rings.
3951 *
3952 **********************************************************************/
3953static int
3954em_setup_receive_structures(struct adapter *adapter)
3955{
3956	struct rx_ring *rxr = adapter->rx_rings;
3957	int j;
3958
3959	for (j = 0; j < adapter->num_queues; j++, rxr++)
3960		if (em_setup_receive_ring(rxr))
3961			goto fail;
3962
3963	return (0);
3964fail:
3965	/*
3966	 * Free RX buffers allocated so far, we will only handle
3967	 * the rings that completed, the failing case will have
3968	 * cleaned up for itself. 'j' failed, so its the terminus.
3969	 */
3970	for (int i = 0; i < j; ++i) {
3971		rxr = &adapter->rx_rings[i];
3972		for (int n = 0; n < adapter->num_rx_desc; n++) {
3973			struct em_buffer *rxbuf;
3974			rxbuf = &rxr->rx_buffers[n];
3975			if (rxbuf->m_head != NULL) {
3976				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3977			  	  BUS_DMASYNC_POSTREAD);
3978				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3979				m_freem(rxbuf->m_head);
3980				rxbuf->m_head = NULL;
3981			}
3982		}
3983	}
3984
3985	return (ENOBUFS);
3986}
3987
3988/*********************************************************************
3989 *
3990 *  Free all receive rings.
3991 *
3992 **********************************************************************/
3993static void
3994em_free_receive_structures(struct adapter *adapter)
3995{
3996	struct rx_ring *rxr = adapter->rx_rings;
3997
3998	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3999		em_free_receive_buffers(rxr);
4000		/* Free the ring memory as well */
4001		em_dma_free(adapter, &rxr->rxdma);
4002		EM_RX_LOCK_DESTROY(rxr);
4003	}
4004
4005	free(adapter->rx_rings, M_DEVBUF);
4006}
4007
4008
4009/*********************************************************************
4010 *
4011 *  Free receive ring data structures
4012 *
4013 **********************************************************************/
4014static void
4015em_free_receive_buffers(struct rx_ring *rxr)
4016{
4017	struct adapter		*adapter = rxr->adapter;
4018	struct em_buffer	*rxbuf = NULL;
4019
4020	INIT_DEBUGOUT("free_receive_buffers: begin");
4021
4022	if (rxr->rx_buffers != NULL) {
4023		for (int i = 0; i < adapter->num_rx_desc; i++) {
4024			rxbuf = &rxr->rx_buffers[i];
4025			if (rxbuf->map != NULL) {
4026				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4027				    BUS_DMASYNC_POSTREAD);
4028				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4029				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4030			}
4031			if (rxbuf->m_head != NULL) {
4032				m_freem(rxbuf->m_head);
4033				rxbuf->m_head = NULL;
4034			}
4035		}
4036		free(rxr->rx_buffers, M_DEVBUF);
4037		rxr->rx_buffers = NULL;
4038	}
4039
4040	if (rxr->rxtag != NULL) {
4041		bus_dma_tag_destroy(rxr->rxtag);
4042		rxr->rxtag = NULL;
4043	}
4044
4045	return;
4046}
4047
4048
4049/*********************************************************************
4050 *
4051 *  Enable receive unit.
4052 *
4053 **********************************************************************/
4054#define MAX_INTS_PER_SEC	8000
4055#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4056
4057static void
4058em_initialize_receive_unit(struct adapter *adapter)
4059{
4060	struct rx_ring	*rxr = adapter->rx_rings;
4061	struct ifnet	*ifp = adapter->ifp;
4062	struct e1000_hw	*hw = &adapter->hw;
4063	u64	bus_addr;
4064	u32	rctl, rxcsum;
4065
4066	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4067
4068	/*
4069	 * Make sure receives are disabled while setting
4070	 * up the descriptor ring
4071	 */
4072	rctl = E1000_READ_REG(hw, E1000_RCTL);
4073	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4074
4075	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4076	    adapter->rx_abs_int_delay.value);
4077	/*
4078	 * Set the interrupt throttling rate. Value is calculated
4079	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4080	 */
4081	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4082
4083	/*
4084	** When using MSIX interrupts we need to throttle
4085	** using the EITR register (82574 only)
4086	*/
4087	if (hw->mac.type == e1000_82574)
4088		for (int i = 0; i < 4; i++)
4089			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4090			    DEFAULT_ITR);
4091
4092	/* Disable accelerated ackknowledge */
4093	if (adapter->hw.mac.type == e1000_82574)
4094		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4095
4096	if (ifp->if_capenable & IFCAP_RXCSUM) {
4097		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4098		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4099		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4100	}
4101
4102	/*
4103	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4104	** long latencies are observed, like Lenovo X60. This
4105	** change eliminates the problem, but since having positive
4106	** values in RDTR is a known source of problems on other
4107	** platforms another solution is being sought.
4108	*/
4109	if (hw->mac.type == e1000_82573)
4110		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4111
4112	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4113		/* Setup the Base and Length of the Rx Descriptor Ring */
4114		bus_addr = rxr->rxdma.dma_paddr;
4115		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4116		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4117		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4118		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4119		/* Setup the Head and Tail Descriptor Pointers */
4120		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4121		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4122	}
4123
4124	/* Set early receive threshold on appropriate hw */
4125	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4126	    (adapter->hw.mac.type == e1000_pch2lan) ||
4127	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4128	    (ifp->if_mtu > ETHERMTU)) {
4129		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4130		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4131		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4132	}
4133
4134	if (adapter->hw.mac.type == e1000_pch2lan) {
4135		if (ifp->if_mtu > ETHERMTU)
4136			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4137		else
4138			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4139	}
4140
4141	/* Setup the Receive Control Register */
4142	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4143	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4144	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4145	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4146
4147        /* Strip the CRC */
4148        rctl |= E1000_RCTL_SECRC;
4149
4150        /* Make sure VLAN Filters are off */
4151        rctl &= ~E1000_RCTL_VFE;
4152	rctl &= ~E1000_RCTL_SBP;
4153
4154	if (adapter->rx_mbuf_sz == MCLBYTES)
4155		rctl |= E1000_RCTL_SZ_2048;
4156	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4157		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4158	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4159		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4160
4161	if (ifp->if_mtu > ETHERMTU)
4162		rctl |= E1000_RCTL_LPE;
4163	else
4164		rctl &= ~E1000_RCTL_LPE;
4165
4166	/* Write out the settings */
4167	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4168
4169	return;
4170}
4171
4172
4173/*********************************************************************
4174 *
4175 *  This routine executes in interrupt context. It replenishes
4176 *  the mbufs in the descriptor and sends data which has been
4177 *  dma'ed into host memory to upper layer.
4178 *
4179 *  We loop at most count times if count is > 0, or until done if
4180 *  count < 0.
4181 *
4182 *  For polling we also now return the number of cleaned packets
4183 *********************************************************************/
4184static bool
4185em_rxeof(struct rx_ring *rxr, int count, int *done)
4186{
4187	struct adapter		*adapter = rxr->adapter;
4188	struct ifnet		*ifp = adapter->ifp;
4189	struct mbuf		*mp, *sendmp;
4190	u8			status = 0;
4191	u16 			len;
4192	int			i, processed, rxdone = 0;
4193	bool			eop;
4194	struct e1000_rx_desc	*cur;
4195
4196	EM_RX_LOCK(rxr);
4197
4198	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4199
4200		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4201			break;
4202
4203		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4204		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4205
4206		cur = &rxr->rx_base[i];
4207		status = cur->status;
4208		mp = sendmp = NULL;
4209
4210		if ((status & E1000_RXD_STAT_DD) == 0)
4211			break;
4212
4213		len = le16toh(cur->length);
4214		eop = (status & E1000_RXD_STAT_EOP) != 0;
4215
4216		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4217		    (rxr->discard == TRUE)) {
4218			ifp->if_ierrors++;
4219			++rxr->rx_discarded;
4220			if (!eop) /* Catch subsequent segs */
4221				rxr->discard = TRUE;
4222			else
4223				rxr->discard = FALSE;
4224			em_rx_discard(rxr, i);
4225			goto next_desc;
4226		}
4227
4228		/* Assign correct length to the current fragment */
4229		mp = rxr->rx_buffers[i].m_head;
4230		mp->m_len = len;
4231
4232		/* Trigger for refresh */
4233		rxr->rx_buffers[i].m_head = NULL;
4234
4235		/* First segment? */
4236		if (rxr->fmp == NULL) {
4237			mp->m_pkthdr.len = len;
4238			rxr->fmp = rxr->lmp = mp;
4239		} else {
4240			/* Chain mbuf's together */
4241			mp->m_flags &= ~M_PKTHDR;
4242			rxr->lmp->m_next = mp;
4243			rxr->lmp = mp;
4244			rxr->fmp->m_pkthdr.len += len;
4245		}
4246
4247		if (eop) {
4248			--count;
4249			sendmp = rxr->fmp;
4250			sendmp->m_pkthdr.rcvif = ifp;
4251			ifp->if_ipackets++;
4252			em_receive_checksum(cur, sendmp);
4253#ifndef __NO_STRICT_ALIGNMENT
4254			if (adapter->max_frame_size >
4255			    (MCLBYTES - ETHER_ALIGN) &&
4256			    em_fixup_rx(rxr) != 0)
4257				goto skip;
4258#endif
4259			if (status & E1000_RXD_STAT_VP) {
4260				sendmp->m_pkthdr.ether_vtag =
4261				    (le16toh(cur->special) &
4262				    E1000_RXD_SPC_VLAN_MASK);
4263				sendmp->m_flags |= M_VLANTAG;
4264			}
4265#ifdef EM_MULTIQUEUE
4266			sendmp->m_pkthdr.flowid = rxr->msix;
4267			sendmp->m_flags |= M_FLOWID;
4268#endif
4269#ifndef __NO_STRICT_ALIGNMENT
4270skip:
4271#endif
4272			rxr->fmp = rxr->lmp = NULL;
4273		}
4274next_desc:
4275		/* Zero out the receive descriptors status. */
4276		cur->status = 0;
4277		++rxdone;	/* cumulative for POLL */
4278		++processed;
4279
4280		/* Advance our pointers to the next descriptor. */
4281		if (++i == adapter->num_rx_desc)
4282			i = 0;
4283
4284		/* Send to the stack */
4285		if (sendmp != NULL) {
4286			rxr->next_to_check = i;
4287			EM_RX_UNLOCK(rxr);
4288			(*ifp->if_input)(ifp, sendmp);
4289			EM_RX_LOCK(rxr);
4290			i = rxr->next_to_check;
4291		}
4292
4293		/* Only refresh mbufs every 8 descriptors */
4294		if (processed == 8) {
4295			em_refresh_mbufs(rxr, i);
4296			processed = 0;
4297		}
4298	}
4299
4300	/* Catch any remaining refresh work */
4301	em_refresh_mbufs(rxr, i);
4302
4303	rxr->next_to_check = i;
4304	if (done != NULL)
4305		*done = rxdone;
4306	EM_RX_UNLOCK(rxr);
4307
4308	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4309}
4310
4311static __inline void
4312em_rx_discard(struct rx_ring *rxr, int i)
4313{
4314	struct em_buffer	*rbuf;
4315
4316	rbuf = &rxr->rx_buffers[i];
4317	/* Free any previous pieces */
4318	if (rxr->fmp != NULL) {
4319		rxr->fmp->m_flags |= M_PKTHDR;
4320		m_freem(rxr->fmp);
4321		rxr->fmp = NULL;
4322		rxr->lmp = NULL;
4323	}
4324	/*
4325	** Free buffer and allow em_refresh_mbufs()
4326	** to clean up and recharge buffer.
4327	*/
4328	if (rbuf->m_head) {
4329		m_free(rbuf->m_head);
4330		rbuf->m_head = NULL;
4331	}
4332	return;
4333}
4334
4335#ifndef __NO_STRICT_ALIGNMENT
4336/*
4337 * When jumbo frames are enabled we should realign entire payload on
4338 * architecures with strict alignment. This is serious design mistake of 8254x
4339 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4340 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4341 * payload. On architecures without strict alignment restrictions 8254x still
4342 * performs unaligned memory access which would reduce the performance too.
4343 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4344 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4345 * existing mbuf chain.
4346 *
4347 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4348 * not used at all on architectures with strict alignment.
4349 */
4350static int
4351em_fixup_rx(struct rx_ring *rxr)
4352{
4353	struct adapter *adapter = rxr->adapter;
4354	struct mbuf *m, *n;
4355	int error;
4356
4357	error = 0;
4358	m = rxr->fmp;
4359	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4360		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4361		m->m_data += ETHER_HDR_LEN;
4362	} else {
4363		MGETHDR(n, M_DONTWAIT, MT_DATA);
4364		if (n != NULL) {
4365			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4366			m->m_data += ETHER_HDR_LEN;
4367			m->m_len -= ETHER_HDR_LEN;
4368			n->m_len = ETHER_HDR_LEN;
4369			M_MOVE_PKTHDR(n, m);
4370			n->m_next = m;
4371			rxr->fmp = n;
4372		} else {
4373			adapter->dropped_pkts++;
4374			m_freem(rxr->fmp);
4375			rxr->fmp = NULL;
4376			error = ENOMEM;
4377		}
4378	}
4379
4380	return (error);
4381}
4382#endif
4383
4384/*********************************************************************
4385 *
4386 *  Verify that the hardware indicated that the checksum is valid.
4387 *  Inform the stack about the status of checksum so that stack
4388 *  doesn't spend time verifying the checksum.
4389 *
4390 *********************************************************************/
4391static void
4392em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4393{
4394	/* Ignore Checksum bit is set */
4395	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4396		mp->m_pkthdr.csum_flags = 0;
4397		return;
4398	}
4399
4400	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4401		/* Did it pass? */
4402		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4403			/* IP Checksum Good */
4404			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4405			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4406
4407		} else {
4408			mp->m_pkthdr.csum_flags = 0;
4409		}
4410	}
4411
4412	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4413		/* Did it pass? */
4414		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4415			mp->m_pkthdr.csum_flags |=
4416			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4417			mp->m_pkthdr.csum_data = htons(0xffff);
4418		}
4419	}
4420}
4421
4422/*
4423 * This routine is run via an vlan
4424 * config EVENT
4425 */
4426static void
4427em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4428{
4429	struct adapter	*adapter = ifp->if_softc;
4430	u32		index, bit;
4431
4432	if (ifp->if_softc !=  arg)   /* Not our event */
4433		return;
4434
4435	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4436                return;
4437
4438	EM_CORE_LOCK(adapter);
4439	index = (vtag >> 5) & 0x7F;
4440	bit = vtag & 0x1F;
4441	adapter->shadow_vfta[index] |= (1 << bit);
4442	++adapter->num_vlans;
4443	/* Re-init to load the changes */
4444	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4445		em_init_locked(adapter);
4446	EM_CORE_UNLOCK(adapter);
4447}
4448
4449/*
4450 * This routine is run via an vlan
4451 * unconfig EVENT
4452 */
4453static void
4454em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4455{
4456	struct adapter	*adapter = ifp->if_softc;
4457	u32		index, bit;
4458
4459	if (ifp->if_softc !=  arg)
4460		return;
4461
4462	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4463                return;
4464
4465	EM_CORE_LOCK(adapter);
4466	index = (vtag >> 5) & 0x7F;
4467	bit = vtag & 0x1F;
4468	adapter->shadow_vfta[index] &= ~(1 << bit);
4469	--adapter->num_vlans;
4470	/* Re-init to load the changes */
4471	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4472		em_init_locked(adapter);
4473	EM_CORE_UNLOCK(adapter);
4474}
4475
4476static void
4477em_setup_vlan_hw_support(struct adapter *adapter)
4478{
4479	struct e1000_hw *hw = &adapter->hw;
4480	u32             reg;
4481
4482	/*
4483	** We get here thru init_locked, meaning
4484	** a soft reset, this has already cleared
4485	** the VFTA and other state, so if there
4486	** have been no vlan's registered do nothing.
4487	*/
4488	if (adapter->num_vlans == 0)
4489                return;
4490
4491	/*
4492	** A soft reset zero's out the VFTA, so
4493	** we need to repopulate it now.
4494	*/
4495	for (int i = 0; i < EM_VFTA_SIZE; i++)
4496                if (adapter->shadow_vfta[i] != 0)
4497			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4498                            i, adapter->shadow_vfta[i]);
4499
4500	reg = E1000_READ_REG(hw, E1000_CTRL);
4501	reg |= E1000_CTRL_VME;
4502	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4503
4504	/* Enable the Filter Table */
4505	reg = E1000_READ_REG(hw, E1000_RCTL);
4506	reg &= ~E1000_RCTL_CFIEN;
4507	reg |= E1000_RCTL_VFE;
4508	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4509}
4510
4511static void
4512em_enable_intr(struct adapter *adapter)
4513{
4514	struct e1000_hw *hw = &adapter->hw;
4515	u32 ims_mask = IMS_ENABLE_MASK;
4516
4517	if (hw->mac.type == e1000_82574) {
4518		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4519		ims_mask |= EM_MSIX_MASK;
4520	}
4521	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4522}
4523
4524static void
4525em_disable_intr(struct adapter *adapter)
4526{
4527	struct e1000_hw *hw = &adapter->hw;
4528
4529	if (hw->mac.type == e1000_82574)
4530		E1000_WRITE_REG(hw, EM_EIAC, 0);
4531	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4532}
4533
4534/*
4535 * Bit of a misnomer, what this really means is
4536 * to enable OS management of the system... aka
4537 * to disable special hardware management features
4538 */
4539static void
4540em_init_manageability(struct adapter *adapter)
4541{
4542	/* A shared code workaround */
4543#define E1000_82542_MANC2H E1000_MANC2H
4544	if (adapter->has_manage) {
4545		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4546		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4547
4548		/* disable hardware interception of ARP */
4549		manc &= ~(E1000_MANC_ARP_EN);
4550
4551                /* enable receiving management packets to the host */
4552		manc |= E1000_MANC_EN_MNG2HOST;
4553#define E1000_MNG2HOST_PORT_623 (1 << 5)
4554#define E1000_MNG2HOST_PORT_664 (1 << 6)
4555		manc2h |= E1000_MNG2HOST_PORT_623;
4556		manc2h |= E1000_MNG2HOST_PORT_664;
4557		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4558		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4559	}
4560}
4561
4562/*
4563 * Give control back to hardware management
4564 * controller if there is one.
4565 */
4566static void
4567em_release_manageability(struct adapter *adapter)
4568{
4569	if (adapter->has_manage) {
4570		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4571
4572		/* re-enable hardware interception of ARP */
4573		manc |= E1000_MANC_ARP_EN;
4574		manc &= ~E1000_MANC_EN_MNG2HOST;
4575
4576		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4577	}
4578}
4579
4580/*
4581 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4582 * For ASF and Pass Through versions of f/w this means
4583 * that the driver is loaded. For AMT version type f/w
4584 * this means that the network i/f is open.
4585 */
4586static void
4587em_get_hw_control(struct adapter *adapter)
4588{
4589	u32 ctrl_ext, swsm;
4590
4591	if (adapter->hw.mac.type == e1000_82573) {
4592		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4593		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4594		    swsm | E1000_SWSM_DRV_LOAD);
4595		return;
4596	}
4597	/* else */
4598	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4599	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4600	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4601	return;
4602}
4603
4604/*
4605 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4606 * For ASF and Pass Through versions of f/w this means that
4607 * the driver is no longer loaded. For AMT versions of the
4608 * f/w this means that the network i/f is closed.
4609 */
4610static void
4611em_release_hw_control(struct adapter *adapter)
4612{
4613	u32 ctrl_ext, swsm;
4614
4615	if (!adapter->has_manage)
4616		return;
4617
4618	if (adapter->hw.mac.type == e1000_82573) {
4619		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4620		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4621		    swsm & ~E1000_SWSM_DRV_LOAD);
4622		return;
4623	}
4624	/* else */
4625	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4626	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4627	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4628	return;
4629}
4630
4631static int
4632em_is_valid_ether_addr(u8 *addr)
4633{
4634	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4635
4636	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4637		return (FALSE);
4638	}
4639
4640	return (TRUE);
4641}
4642
4643/*
4644** Parse the interface capabilities with regard
4645** to both system management and wake-on-lan for
4646** later use.
4647*/
4648static void
4649em_get_wakeup(device_t dev)
4650{
4651	struct adapter	*adapter = device_get_softc(dev);
4652	u16		eeprom_data = 0, device_id, apme_mask;
4653
4654	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4655	apme_mask = EM_EEPROM_APME;
4656
4657	switch (adapter->hw.mac.type) {
4658	case e1000_82573:
4659	case e1000_82583:
4660		adapter->has_amt = TRUE;
4661		/* Falls thru */
4662	case e1000_82571:
4663	case e1000_82572:
4664	case e1000_80003es2lan:
4665		if (adapter->hw.bus.func == 1) {
4666			e1000_read_nvm(&adapter->hw,
4667			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4668			break;
4669		} else
4670			e1000_read_nvm(&adapter->hw,
4671			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4672		break;
4673	case e1000_ich8lan:
4674	case e1000_ich9lan:
4675	case e1000_ich10lan:
4676	case e1000_pchlan:
4677	case e1000_pch2lan:
4678		apme_mask = E1000_WUC_APME;
4679		adapter->has_amt = TRUE;
4680		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4681		break;
4682	default:
4683		e1000_read_nvm(&adapter->hw,
4684		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4685		break;
4686	}
4687	if (eeprom_data & apme_mask)
4688		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4689	/*
4690         * We have the eeprom settings, now apply the special cases
4691         * where the eeprom may be wrong or the board won't support
4692         * wake on lan on a particular port
4693	 */
4694	device_id = pci_get_device(dev);
4695        switch (device_id) {
4696	case E1000_DEV_ID_82571EB_FIBER:
4697		/* Wake events only supported on port A for dual fiber
4698		 * regardless of eeprom setting */
4699		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4700		    E1000_STATUS_FUNC_1)
4701			adapter->wol = 0;
4702		break;
4703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4706                /* if quad port adapter, disable WoL on all but port A */
4707		if (global_quad_port_a != 0)
4708			adapter->wol = 0;
4709		/* Reset for multiple quad port adapters */
4710		if (++global_quad_port_a == 4)
4711			global_quad_port_a = 0;
4712                break;
4713	}
4714	return;
4715}
4716
4717
4718/*
4719 * Enable PCI Wake On Lan capability
4720 */
4721static void
4722em_enable_wakeup(device_t dev)
4723{
4724	struct adapter	*adapter = device_get_softc(dev);
4725	struct ifnet	*ifp = adapter->ifp;
4726	u32		pmc, ctrl, ctrl_ext, rctl;
4727	u16     	status;
4728
4729	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4730		return;
4731
4732	/* Advertise the wakeup capability */
4733	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4734	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4735	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4736	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4737
4738	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4739	    (adapter->hw.mac.type == e1000_pchlan) ||
4740	    (adapter->hw.mac.type == e1000_ich9lan) ||
4741	    (adapter->hw.mac.type == e1000_ich10lan)) {
4742		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4743		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4744	}
4745
4746	/* Keep the laser running on Fiber adapters */
4747	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4748	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4749		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4750		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4751		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4752	}
4753
4754	/*
4755	** Determine type of Wakeup: note that wol
4756	** is set with all bits on by default.
4757	*/
4758	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4759		adapter->wol &= ~E1000_WUFC_MAG;
4760
4761	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4762		adapter->wol &= ~E1000_WUFC_MC;
4763	else {
4764		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4765		rctl |= E1000_RCTL_MPE;
4766		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4767	}
4768
4769	if ((adapter->hw.mac.type == e1000_pchlan) ||
4770	    (adapter->hw.mac.type == e1000_pch2lan)) {
4771		if (em_enable_phy_wakeup(adapter))
4772			return;
4773	} else {
4774		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4775		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4776	}
4777
4778	if (adapter->hw.phy.type == e1000_phy_igp_3)
4779		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4780
4781        /* Request PME */
4782        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4783	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4784	if (ifp->if_capenable & IFCAP_WOL)
4785		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4786        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4787
4788	return;
4789}
4790
4791/*
4792** WOL in the newer chipset interfaces (pchlan)
4793** require thing to be copied into the phy
4794*/
4795static int
4796em_enable_phy_wakeup(struct adapter *adapter)
4797{
4798	struct e1000_hw *hw = &adapter->hw;
4799	u32 mreg, ret = 0;
4800	u16 preg;
4801
4802	/* copy MAC RARs to PHY RARs */
4803	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4804
4805	/* copy MAC MTA to PHY MTA */
4806	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4807		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4808		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4809		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4810		    (u16)((mreg >> 16) & 0xFFFF));
4811	}
4812
4813	/* configure PHY Rx Control register */
4814	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4815	mreg = E1000_READ_REG(hw, E1000_RCTL);
4816	if (mreg & E1000_RCTL_UPE)
4817		preg |= BM_RCTL_UPE;
4818	if (mreg & E1000_RCTL_MPE)
4819		preg |= BM_RCTL_MPE;
4820	preg &= ~(BM_RCTL_MO_MASK);
4821	if (mreg & E1000_RCTL_MO_3)
4822		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4823				<< BM_RCTL_MO_SHIFT);
4824	if (mreg & E1000_RCTL_BAM)
4825		preg |= BM_RCTL_BAM;
4826	if (mreg & E1000_RCTL_PMCF)
4827		preg |= BM_RCTL_PMCF;
4828	mreg = E1000_READ_REG(hw, E1000_CTRL);
4829	if (mreg & E1000_CTRL_RFCE)
4830		preg |= BM_RCTL_RFCE;
4831	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4832
4833	/* enable PHY wakeup in MAC register */
4834	E1000_WRITE_REG(hw, E1000_WUC,
4835	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4836	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4837
4838	/* configure and enable PHY wakeup in PHY registers */
4839	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4840	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4841
4842	/* activate PHY wakeup */
4843	ret = hw->phy.ops.acquire(hw);
4844	if (ret) {
4845		printf("Could not acquire PHY\n");
4846		return ret;
4847	}
4848	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4849	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4850	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4851	if (ret) {
4852		printf("Could not read PHY page 769\n");
4853		goto out;
4854	}
4855	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4856	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4857	if (ret)
4858		printf("Could not set PHY Host Wakeup bit\n");
4859out:
4860	hw->phy.ops.release(hw);
4861
4862	return ret;
4863}
4864
4865static void
4866em_led_func(void *arg, int onoff)
4867{
4868	struct adapter	*adapter = arg;
4869
4870	EM_CORE_LOCK(adapter);
4871	if (onoff) {
4872		e1000_setup_led(&adapter->hw);
4873		e1000_led_on(&adapter->hw);
4874	} else {
4875		e1000_led_off(&adapter->hw);
4876		e1000_cleanup_led(&adapter->hw);
4877	}
4878	EM_CORE_UNLOCK(adapter);
4879}
4880
4881/*
4882** Disable the L0S and L1 LINK states
4883*/
4884static void
4885em_disable_aspm(struct adapter *adapter)
4886{
4887	int		base, reg;
4888	u16		link_cap,link_ctrl;
4889	device_t	dev = adapter->dev;
4890
4891	switch (adapter->hw.mac.type) {
4892		case e1000_82573:
4893		case e1000_82574:
4894		case e1000_82583:
4895			break;
4896		default:
4897			return;
4898	}
4899	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4900		return;
4901	reg = base + PCIR_EXPRESS_LINK_CAP;
4902	link_cap = pci_read_config(dev, reg, 2);
4903	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4904		return;
4905	reg = base + PCIR_EXPRESS_LINK_CTL;
4906	link_ctrl = pci_read_config(dev, reg, 2);
4907	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4908	pci_write_config(dev, reg, link_ctrl, 2);
4909	return;
4910}
4911
4912/**********************************************************************
4913 *
4914 *  Update the board statistics counters.
4915 *
4916 **********************************************************************/
4917static void
4918em_update_stats_counters(struct adapter *adapter)
4919{
4920	struct ifnet   *ifp;
4921
4922	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4923	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4924		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4925		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4926	}
4927	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4928	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4929	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4930	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4931
4932	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4933	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4934	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4935	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4936	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4937	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4938	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4939	/*
4940	** For watchdog management we need to know if we have been
4941	** paused during the last interval, so capture that here.
4942	*/
4943	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4944	adapter->stats.xoffrxc += adapter->pause_frames;
4945	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4946	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4947	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4948	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4949	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4950	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4951	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4952	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4953	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4954	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4955	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4956	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4957
4958	/* For the 64-bit byte counters the low dword must be read first. */
4959	/* Both registers clear on the read of the high dword */
4960
4961	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4962	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4963	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4964	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4965
4966	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4967	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4968	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4969	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4970	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4971
4972	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4973	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4974
4975	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4976	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4977	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4978	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4979	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4980	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4981	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4982	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4983	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4984	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4985
4986	/* Interrupt Counts */
4987
4988	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4989	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4990	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4991	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4992	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4993	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4994	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4995	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4996	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4997
4998	if (adapter->hw.mac.type >= e1000_82543) {
4999		adapter->stats.algnerrc +=
5000		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5001		adapter->stats.rxerrc +=
5002		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5003		adapter->stats.tncrs +=
5004		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5005		adapter->stats.cexterr +=
5006		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5007		adapter->stats.tsctc +=
5008		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5009		adapter->stats.tsctfc +=
5010		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5011	}
5012	ifp = adapter->ifp;
5013
5014	ifp->if_collisions = adapter->stats.colc;
5015
5016	/* Rx Errors */
5017	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5018	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5019	    adapter->stats.ruc + adapter->stats.roc +
5020	    adapter->stats.mpc + adapter->stats.cexterr;
5021
5022	/* Tx Errors */
5023	ifp->if_oerrors = adapter->stats.ecol +
5024	    adapter->stats.latecol + adapter->watchdog_events;
5025}
5026
5027/* Export a single 32-bit register via a read-only sysctl. */
5028static int
5029em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5030{
5031	struct adapter *adapter;
5032	u_int val;
5033
5034	adapter = oidp->oid_arg1;
5035	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5036	return (sysctl_handle_int(oidp, &val, 0, req));
5037}
5038
5039/*
5040 * Add sysctl variables, one per statistic, to the system.
5041 */
5042static void
5043em_add_hw_stats(struct adapter *adapter)
5044{
5045	device_t dev = adapter->dev;
5046
5047	struct tx_ring *txr = adapter->tx_rings;
5048	struct rx_ring *rxr = adapter->rx_rings;
5049
5050	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5051	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5052	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5053	struct e1000_hw_stats *stats = &adapter->stats;
5054
5055	struct sysctl_oid *stat_node, *queue_node, *int_node;
5056	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5057
5058#define QUEUE_NAME_LEN 32
5059	char namebuf[QUEUE_NAME_LEN];
5060
5061	/* Driver Statistics */
5062	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5063			CTLFLAG_RD, &adapter->link_irq,
5064			"Link MSIX IRQ Handled");
5065	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5066			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5067			 "Std mbuf failed");
5068	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5069			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5070			 "Std mbuf cluster failed");
5071	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5072			CTLFLAG_RD, &adapter->dropped_pkts,
5073			"Driver dropped packets");
5074	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5075			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5076			"Driver tx dma failure in xmit");
5077	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5078			CTLFLAG_RD, &adapter->rx_overruns,
5079			"RX overruns");
5080	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5081			CTLFLAG_RD, &adapter->watchdog_events,
5082			"Watchdog timeouts");
5083
5084	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5085			CTLFLAG_RD, adapter, E1000_CTRL,
5086			em_sysctl_reg_handler, "IU",
5087			"Device Control Register");
5088	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5089			CTLFLAG_RD, adapter, E1000_RCTL,
5090			em_sysctl_reg_handler, "IU",
5091			"Receiver Control Register");
5092	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5093			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5094			"Flow Control High Watermark");
5095	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5096			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5097			"Flow Control Low Watermark");
5098
5099	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5100		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5101		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5102					    CTLFLAG_RD, NULL, "Queue Name");
5103		queue_list = SYSCTL_CHILDREN(queue_node);
5104
5105		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5106				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5107				em_sysctl_reg_handler, "IU",
5108 				"Transmit Descriptor Head");
5109		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5110				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5111				em_sysctl_reg_handler, "IU",
5112 				"Transmit Descriptor Tail");
5113		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5114				CTLFLAG_RD, &txr->tx_irq,
5115				"Queue MSI-X Transmit Interrupts");
5116		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5117				CTLFLAG_RD, &txr->no_desc_avail,
5118				"Queue No Descriptor Available");
5119
5120		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5121				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5122				em_sysctl_reg_handler, "IU",
5123				"Receive Descriptor Head");
5124		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5125				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5126				em_sysctl_reg_handler, "IU",
5127				"Receive Descriptor Tail");
5128		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5129				CTLFLAG_RD, &rxr->rx_irq,
5130				"Queue MSI-X Receive Interrupts");
5131	}
5132
5133	/* MAC stats get their own sub node */
5134
5135	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5136				    CTLFLAG_RD, NULL, "Statistics");
5137	stat_list = SYSCTL_CHILDREN(stat_node);
5138
5139	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5140			CTLFLAG_RD, &stats->ecol,
5141			"Excessive collisions");
5142	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5143			CTLFLAG_RD, &stats->scc,
5144			"Single collisions");
5145	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5146			CTLFLAG_RD, &stats->mcc,
5147			"Multiple collisions");
5148	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5149			CTLFLAG_RD, &stats->latecol,
5150			"Late collisions");
5151	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5152			CTLFLAG_RD, &stats->colc,
5153			"Collision Count");
5154	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5155			CTLFLAG_RD, &adapter->stats.symerrs,
5156			"Symbol Errors");
5157	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5158			CTLFLAG_RD, &adapter->stats.sec,
5159			"Sequence Errors");
5160	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5161			CTLFLAG_RD, &adapter->stats.dc,
5162			"Defer Count");
5163	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5164			CTLFLAG_RD, &adapter->stats.mpc,
5165			"Missed Packets");
5166	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5167			CTLFLAG_RD, &adapter->stats.rnbc,
5168			"Receive No Buffers");
5169	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5170			CTLFLAG_RD, &adapter->stats.ruc,
5171			"Receive Undersize");
5172	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5173			CTLFLAG_RD, &adapter->stats.rfc,
5174			"Fragmented Packets Received ");
5175	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5176			CTLFLAG_RD, &adapter->stats.roc,
5177			"Oversized Packets Received");
5178	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5179			CTLFLAG_RD, &adapter->stats.rjc,
5180			"Recevied Jabber");
5181	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5182			CTLFLAG_RD, &adapter->stats.rxerrc,
5183			"Receive Errors");
5184	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5185			CTLFLAG_RD, &adapter->stats.crcerrs,
5186			"CRC errors");
5187	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5188			CTLFLAG_RD, &adapter->stats.algnerrc,
5189			"Alignment Errors");
5190	/* On 82575 these are collision counts */
5191	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5192			CTLFLAG_RD, &adapter->stats.cexterr,
5193			"Collision/Carrier extension errors");
5194	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5195			CTLFLAG_RD, &adapter->stats.xonrxc,
5196			"XON Received");
5197	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5198			CTLFLAG_RD, &adapter->stats.xontxc,
5199			"XON Transmitted");
5200	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5201			CTLFLAG_RD, &adapter->stats.xoffrxc,
5202			"XOFF Received");
5203	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5204			CTLFLAG_RD, &adapter->stats.xofftxc,
5205			"XOFF Transmitted");
5206
5207	/* Packet Reception Stats */
5208	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5209			CTLFLAG_RD, &adapter->stats.tpr,
5210			"Total Packets Received ");
5211	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5212			CTLFLAG_RD, &adapter->stats.gprc,
5213			"Good Packets Received");
5214	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5215			CTLFLAG_RD, &adapter->stats.bprc,
5216			"Broadcast Packets Received");
5217	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5218			CTLFLAG_RD, &adapter->stats.mprc,
5219			"Multicast Packets Received");
5220	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5221			CTLFLAG_RD, &adapter->stats.prc64,
5222			"64 byte frames received ");
5223	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5224			CTLFLAG_RD, &adapter->stats.prc127,
5225			"65-127 byte frames received");
5226	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5227			CTLFLAG_RD, &adapter->stats.prc255,
5228			"128-255 byte frames received");
5229	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5230			CTLFLAG_RD, &adapter->stats.prc511,
5231			"256-511 byte frames received");
5232	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5233			CTLFLAG_RD, &adapter->stats.prc1023,
5234			"512-1023 byte frames received");
5235	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5236			CTLFLAG_RD, &adapter->stats.prc1522,
5237			"1023-1522 byte frames received");
5238 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5239 			CTLFLAG_RD, &adapter->stats.gorc,
5240 			"Good Octets Received");
5241
5242	/* Packet Transmission Stats */
5243 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5244 			CTLFLAG_RD, &adapter->stats.gotc,
5245 			"Good Octets Transmitted");
5246	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5247			CTLFLAG_RD, &adapter->stats.tpt,
5248			"Total Packets Transmitted");
5249	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5250			CTLFLAG_RD, &adapter->stats.gptc,
5251			"Good Packets Transmitted");
5252	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5253			CTLFLAG_RD, &adapter->stats.bptc,
5254			"Broadcast Packets Transmitted");
5255	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5256			CTLFLAG_RD, &adapter->stats.mptc,
5257			"Multicast Packets Transmitted");
5258	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5259			CTLFLAG_RD, &adapter->stats.ptc64,
5260			"64 byte frames transmitted ");
5261	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5262			CTLFLAG_RD, &adapter->stats.ptc127,
5263			"65-127 byte frames transmitted");
5264	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5265			CTLFLAG_RD, &adapter->stats.ptc255,
5266			"128-255 byte frames transmitted");
5267	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5268			CTLFLAG_RD, &adapter->stats.ptc511,
5269			"256-511 byte frames transmitted");
5270	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5271			CTLFLAG_RD, &adapter->stats.ptc1023,
5272			"512-1023 byte frames transmitted");
5273	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5274			CTLFLAG_RD, &adapter->stats.ptc1522,
5275			"1024-1522 byte frames transmitted");
5276	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5277			CTLFLAG_RD, &adapter->stats.tsctc,
5278			"TSO Contexts Transmitted");
5279	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5280			CTLFLAG_RD, &adapter->stats.tsctfc,
5281			"TSO Contexts Failed");
5282
5283
5284	/* Interrupt Stats */
5285
5286	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5287				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5288	int_list = SYSCTL_CHILDREN(int_node);
5289
5290	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5291			CTLFLAG_RD, &adapter->stats.iac,
5292			"Interrupt Assertion Count");
5293
5294	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5295			CTLFLAG_RD, &adapter->stats.icrxptc,
5296			"Interrupt Cause Rx Pkt Timer Expire Count");
5297
5298	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5299			CTLFLAG_RD, &adapter->stats.icrxatc,
5300			"Interrupt Cause Rx Abs Timer Expire Count");
5301
5302	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5303			CTLFLAG_RD, &adapter->stats.ictxptc,
5304			"Interrupt Cause Tx Pkt Timer Expire Count");
5305
5306	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5307			CTLFLAG_RD, &adapter->stats.ictxatc,
5308			"Interrupt Cause Tx Abs Timer Expire Count");
5309
5310	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5311			CTLFLAG_RD, &adapter->stats.ictxqec,
5312			"Interrupt Cause Tx Queue Empty Count");
5313
5314	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5315			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5316			"Interrupt Cause Tx Queue Min Thresh Count");
5317
5318	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5319			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5320			"Interrupt Cause Rx Desc Min Thresh Count");
5321
5322	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5323			CTLFLAG_RD, &adapter->stats.icrxoc,
5324			"Interrupt Cause Receiver Overrun Count");
5325}
5326
5327/**********************************************************************
5328 *
5329 *  This routine provides a way to dump out the adapter eeprom,
5330 *  often a useful debug/service tool. This only dumps the first
5331 *  32 words, stuff that matters is in that extent.
5332 *
5333 **********************************************************************/
5334static int
5335em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5336{
5337	struct adapter *adapter;
5338	int error;
5339	int result;
5340
5341	result = -1;
5342	error = sysctl_handle_int(oidp, &result, 0, req);
5343
5344	if (error || !req->newptr)
5345		return (error);
5346
5347	/*
5348	 * This value will cause a hex dump of the
5349	 * first 32 16-bit words of the EEPROM to
5350	 * the screen.
5351	 */
5352	if (result == 1) {
5353		adapter = (struct adapter *)arg1;
5354		em_print_nvm_info(adapter);
5355        }
5356
5357	return (error);
5358}
5359
5360static void
5361em_print_nvm_info(struct adapter *adapter)
5362{
5363	u16	eeprom_data;
5364	int	i, j, row = 0;
5365
5366	/* Its a bit crude, but it gets the job done */
5367	printf("\nInterface EEPROM Dump:\n");
5368	printf("Offset\n0x0000  ");
5369	for (i = 0, j = 0; i < 32; i++, j++) {
5370		if (j == 8) { /* Make the offset block */
5371			j = 0; ++row;
5372			printf("\n0x00%x0  ",row);
5373		}
5374		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5375		printf("%04x ", eeprom_data);
5376	}
5377	printf("\n");
5378}
5379
5380static int
5381em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5382{
5383	struct em_int_delay_info *info;
5384	struct adapter *adapter;
5385	u32 regval;
5386	int error, usecs, ticks;
5387
5388	info = (struct em_int_delay_info *)arg1;
5389	usecs = info->value;
5390	error = sysctl_handle_int(oidp, &usecs, 0, req);
5391	if (error != 0 || req->newptr == NULL)
5392		return (error);
5393	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5394		return (EINVAL);
5395	info->value = usecs;
5396	ticks = EM_USECS_TO_TICKS(usecs);
5397
5398	adapter = info->adapter;
5399
5400	EM_CORE_LOCK(adapter);
5401	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5402	regval = (regval & ~0xffff) | (ticks & 0xffff);
5403	/* Handle a few special cases. */
5404	switch (info->offset) {
5405	case E1000_RDTR:
5406		break;
5407	case E1000_TIDV:
5408		if (ticks == 0) {
5409			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5410			/* Don't write 0 into the TIDV register. */
5411			regval++;
5412		} else
5413			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5414		break;
5415	}
5416	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5417	EM_CORE_UNLOCK(adapter);
5418	return (0);
5419}
5420
5421static void
5422em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5423	const char *description, struct em_int_delay_info *info,
5424	int offset, int value)
5425{
5426	info->adapter = adapter;
5427	info->offset = offset;
5428	info->value = value;
5429	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5430	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5431	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5432	    info, 0, em_sysctl_int_delay, "I", description);
5433}
5434
5435static void
5436em_add_rx_process_limit(struct adapter *adapter, const char *name,
5437	const char *description, int *limit, int value)
5438{
5439	*limit = value;
5440	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5441	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5442	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5443}
5444
5445static void
5446em_set_flow_cntrl(struct adapter *adapter, const char *name,
5447	const char *description, int *limit, int value)
5448{
5449	*limit = value;
5450	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5451	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5452	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5453}
5454
5455static int
5456em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5457{
5458	struct adapter *adapter;
5459	int error;
5460	int result;
5461
5462	result = -1;
5463	error = sysctl_handle_int(oidp, &result, 0, req);
5464
5465	if (error || !req->newptr)
5466		return (error);
5467
5468	if (result == 1) {
5469		adapter = (struct adapter *)arg1;
5470		em_print_debug_info(adapter);
5471        }
5472
5473	return (error);
5474}
5475
5476/*
5477** This routine is meant to be fluid, add whatever is
5478** needed for debugging a problem.  -jfv
5479*/
5480static void
5481em_print_debug_info(struct adapter *adapter)
5482{
5483	device_t dev = adapter->dev;
5484	struct tx_ring *txr = adapter->tx_rings;
5485	struct rx_ring *rxr = adapter->rx_rings;
5486
5487	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5488		printf("Interface is RUNNING ");
5489	else
5490		printf("Interface is NOT RUNNING\n");
5491	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5492		printf("and ACTIVE\n");
5493	else
5494		printf("and INACTIVE\n");
5495
5496	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5497	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5498	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5499	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5500	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5501	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5502	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5503	device_printf(dev, "TX descriptors avail = %d\n",
5504	    txr->tx_avail);
5505	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5506	    txr->no_desc_avail);
5507	device_printf(dev, "RX discarded packets = %ld\n",
5508	    rxr->rx_discarded);
5509	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5510	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5511}
5512