if_em.c revision 220251
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 220251 2011-04-01 18:48:31Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.2.3";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275static void	em_disable_aspm(struct adapter *);
276
277static int	em_irq_fast(void *);
278
279/* MSIX handlers */
280static void	em_msix_tx(void *);
281static void	em_msix_rx(void *);
282static void	em_msix_link(void *);
283static void	em_handle_tx(void *context, int pending);
284static void	em_handle_rx(void *context, int pending);
285static void	em_handle_link(void *context, int pending);
286
287static void	em_set_sysctl_value(struct adapter *, const char *,
288		    const char *, int *, int);
289
290static __inline void em_rx_discard(struct rx_ring *, int);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t em_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t em_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, em_probe),
303	DEVMETHOD(device_attach, em_attach),
304	DEVMETHOD(device_detach, em_detach),
305	DEVMETHOD(device_shutdown, em_shutdown),
306	DEVMETHOD(device_suspend, em_suspend),
307	DEVMETHOD(device_resume, em_resume),
308	{0, 0}
309};
310
311static driver_t em_driver = {
312	"em", em_methods, sizeof(struct adapter),
313};
314
315devclass_t em_devclass;
316DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
317MODULE_DEPEND(em, pci, 1, 1, 1);
318MODULE_DEPEND(em, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
325#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
326#define M_TSO_LEN			66
327
328/* Allow common code without TSO */
329#ifndef CSUM_TSO
330#define CSUM_TSO	0
331#endif
332
333static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
334static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
335TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
336TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
337
338static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
339static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
340TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
341TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
342
343static int em_rxd = EM_DEFAULT_RXD;
344static int em_txd = EM_DEFAULT_TXD;
345TUNABLE_INT("hw.em.rxd", &em_rxd);
346TUNABLE_INT("hw.em.txd", &em_txd);
347
348static int em_smart_pwr_down = FALSE;
349TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
350
351/* Controls whether promiscuous also shows bad packets */
352static int em_debug_sbp = FALSE;
353TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
354
355static int em_enable_msix = TRUE;
356TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
357
358/* How many packets rxeof tries to clean at a time */
359static int em_rx_process_limit = 100;
360TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361
362/* Flow control setting - default to FULL */
363static int em_fc_setting = e1000_fc_full;
364TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365
366/* Energy efficient ethernet - default to OFF */
367static int eee_setting = 0;
368TUNABLE_INT("hw.em.eee_setting", &eee_setting);
369
370/* Global used in WOL setup with multiport cards */
371static int global_quad_port_a = 0;
372
373/*********************************************************************
374 *  Device identification routine
375 *
376 *  em_probe determines if the driver should be loaded on
377 *  adapter based on PCI vendor/device id of the adapter.
378 *
379 *  return BUS_PROBE_DEFAULT on success, positive on failure
380 *********************************************************************/
381
382static int
383em_probe(device_t dev)
384{
385	char		adapter_name[60];
386	u16		pci_vendor_id = 0;
387	u16		pci_device_id = 0;
388	u16		pci_subvendor_id = 0;
389	u16		pci_subdevice_id = 0;
390	em_vendor_info_t *ent;
391
392	INIT_DEBUGOUT("em_probe: begin");
393
394	pci_vendor_id = pci_get_vendor(dev);
395	if (pci_vendor_id != EM_VENDOR_ID)
396		return (ENXIO);
397
398	pci_device_id = pci_get_device(dev);
399	pci_subvendor_id = pci_get_subvendor(dev);
400	pci_subdevice_id = pci_get_subdevice(dev);
401
402	ent = em_vendor_info_array;
403	while (ent->vendor_id != 0) {
404		if ((pci_vendor_id == ent->vendor_id) &&
405		    (pci_device_id == ent->device_id) &&
406
407		    ((pci_subvendor_id == ent->subvendor_id) ||
408		    (ent->subvendor_id == PCI_ANY_ID)) &&
409
410		    ((pci_subdevice_id == ent->subdevice_id) ||
411		    (ent->subdevice_id == PCI_ANY_ID))) {
412			sprintf(adapter_name, "%s %s",
413				em_strings[ent->index],
414				em_driver_version);
415			device_set_desc_copy(dev, adapter_name);
416			return (BUS_PROBE_DEFAULT);
417		}
418		ent++;
419	}
420
421	return (ENXIO);
422}
423
424/*********************************************************************
425 *  Device initialization routine
426 *
427 *  The attach entry point is called when the driver is being loaded.
428 *  This routine identifies the type of hardware, allocates all resources
429 *  and initializes the hardware.
430 *
431 *  return 0 on success, positive on failure
432 *********************************************************************/
433
434static int
435em_attach(device_t dev)
436{
437	struct adapter	*adapter;
438	struct e1000_hw	*hw;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	hw = &adapter->hw;
446	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
447
448	/* SYSCTL stuff */
449	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
450	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
451	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
452	    em_sysctl_nvm_info, "I", "NVM Information");
453
454	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
455	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
456	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
457	    em_sysctl_debug_info, "I", "Debug Information");
458
459	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
460
461	/* Determine hardware and mac info */
462	em_identify_hardware(adapter);
463
464	/* Setup PCI resources */
465	if (em_allocate_pci_resources(adapter)) {
466		device_printf(dev, "Allocation of PCI resources failed\n");
467		error = ENXIO;
468		goto err_pci;
469	}
470
471	/*
472	** For ICH8 and family we need to
473	** map the flash memory, and this
474	** must happen after the MAC is
475	** identified
476	*/
477	if ((hw->mac.type == e1000_ich8lan) ||
478	    (hw->mac.type == e1000_ich9lan) ||
479	    (hw->mac.type == e1000_ich10lan) ||
480	    (hw->mac.type == e1000_pchlan) ||
481	    (hw->mac.type == e1000_pch2lan)) {
482		int rid = EM_BAR_TYPE_FLASH;
483		adapter->flash = bus_alloc_resource_any(dev,
484		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
485		if (adapter->flash == NULL) {
486			device_printf(dev, "Mapping of Flash failed\n");
487			error = ENXIO;
488			goto err_pci;
489		}
490		/* This is used in the shared code */
491		hw->flash_address = (u8 *)adapter->flash;
492		adapter->osdep.flash_bus_space_tag =
493		    rman_get_bustag(adapter->flash);
494		adapter->osdep.flash_bus_space_handle =
495		    rman_get_bushandle(adapter->flash);
496	}
497
498	/* Do Shared Code initialization */
499	if (e1000_setup_init_funcs(hw, TRUE)) {
500		device_printf(dev, "Setup of Shared code failed\n");
501		error = ENXIO;
502		goto err_pci;
503	}
504
505	e1000_get_bus_info(hw);
506
507	/* Set up some sysctls for the tunable interrupt delays */
508	em_add_int_delay_sysctl(adapter, "rx_int_delay",
509	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
510	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
511	em_add_int_delay_sysctl(adapter, "tx_int_delay",
512	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
513	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
514	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
515	    "receive interrupt delay limit in usecs",
516	    &adapter->rx_abs_int_delay,
517	    E1000_REGISTER(hw, E1000_RADV),
518	    em_rx_abs_int_delay_dflt);
519	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
520	    "transmit interrupt delay limit in usecs",
521	    &adapter->tx_abs_int_delay,
522	    E1000_REGISTER(hw, E1000_TADV),
523	    em_tx_abs_int_delay_dflt);
524
525	/* Sysctl for limiting the amount of work done in the taskqueue */
526	em_set_sysctl_value(adapter, "rx_processing_limit",
527	    "max number of rx packets to process", &adapter->rx_process_limit,
528	    em_rx_process_limit);
529
530	/* Sysctl for setting the interface flow control */
531	em_set_sysctl_value(adapter, "flow_control",
532	    "configure flow control",
533	    &adapter->fc_setting, em_fc_setting);
534
535	/*
536	 * Validate number of transmit and receive descriptors. It
537	 * must not exceed hardware maximum, and must be multiple
538	 * of E1000_DBA_ALIGN.
539	 */
540	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
541	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
542		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
543		    EM_DEFAULT_TXD, em_txd);
544		adapter->num_tx_desc = EM_DEFAULT_TXD;
545	} else
546		adapter->num_tx_desc = em_txd;
547
548	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
549	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
550		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
551		    EM_DEFAULT_RXD, em_rxd);
552		adapter->num_rx_desc = EM_DEFAULT_RXD;
553	} else
554		adapter->num_rx_desc = em_rxd;
555
556	hw->mac.autoneg = DO_AUTO_NEG;
557	hw->phy.autoneg_wait_to_complete = FALSE;
558	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
559
560	/* Copper options */
561	if (hw->phy.media_type == e1000_media_type_copper) {
562		hw->phy.mdix = AUTO_ALL_MODES;
563		hw->phy.disable_polarity_correction = FALSE;
564		hw->phy.ms_type = EM_MASTER_SLAVE;
565	}
566
567	/*
568	 * Set the frame limits assuming
569	 * standard ethernet sized frames.
570	 */
571	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
572	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
573
574	/*
575	 * This controls when hardware reports transmit completion
576	 * status.
577	 */
578	hw->mac.report_tx_early = 1;
579
580	/*
581	** Get queue/ring memory
582	*/
583	if (em_allocate_queues(adapter)) {
584		error = ENOMEM;
585		goto err_pci;
586	}
587
588	/* Allocate multicast array memory. */
589	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
590	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
591	if (adapter->mta == NULL) {
592		device_printf(dev, "Can not allocate multicast setup array\n");
593		error = ENOMEM;
594		goto err_late;
595	}
596
597	/* Check SOL/IDER usage */
598	if (e1000_check_reset_block(hw))
599		device_printf(dev, "PHY reset is blocked"
600		    " due to SOL/IDER session.\n");
601
602	/* Sysctl for setting Energy Efficient Ethernet */
603	em_set_sysctl_value(adapter, "eee_control",
604	    "enable Energy Efficient Ethernet",
605	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
606
607	/*
608	** Start from a known state, this is
609	** important in reading the nvm and
610	** mac from that.
611	*/
612	e1000_reset_hw(hw);
613
614
615	/* Make sure we have a good EEPROM before we read from it */
616	if (e1000_validate_nvm_checksum(hw) < 0) {
617		/*
618		** Some PCI-E parts fail the first check due to
619		** the link being in sleep state, call it again,
620		** if it fails a second time its a real issue.
621		*/
622		if (e1000_validate_nvm_checksum(hw) < 0) {
623			device_printf(dev,
624			    "The EEPROM Checksum Is Not Valid\n");
625			error = EIO;
626			goto err_late;
627		}
628	}
629
630	/* Copy the permanent MAC address out of the EEPROM */
631	if (e1000_read_mac_addr(hw) < 0) {
632		device_printf(dev, "EEPROM read error while reading MAC"
633		    " address\n");
634		error = EIO;
635		goto err_late;
636	}
637
638	if (!em_is_valid_ether_addr(hw->mac.addr)) {
639		device_printf(dev, "Invalid MAC address\n");
640		error = EIO;
641		goto err_late;
642	}
643
644	/*
645	**  Do interrupt configuration
646	*/
647	if (adapter->msix > 1) /* Do MSIX */
648		error = em_allocate_msix(adapter);
649	else  /* MSI or Legacy */
650		error = em_allocate_legacy(adapter);
651	if (error)
652		goto err_late;
653
654	/*
655	 * Get Wake-on-Lan and Management info for later use
656	 */
657	em_get_wakeup(dev);
658
659	/* Setup OS specific network interface */
660	if (em_setup_interface(dev, adapter) != 0)
661		goto err_late;
662
663	em_reset(adapter);
664
665	/* Initialize statistics */
666	em_update_stats_counters(adapter);
667
668	hw->mac.get_link_status = 1;
669	em_update_link_status(adapter);
670
671	/* Register for VLAN events */
672	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
673	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
674	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
675	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
676
677	em_add_hw_stats(adapter);
678
679	/* Non-AMT based hardware can now take control from firmware */
680	if (adapter->has_manage && !adapter->has_amt)
681		em_get_hw_control(adapter);
682
683	/* Tell the stack that the interface is not active */
684	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
685
686	adapter->led_dev = led_create(em_led_func, adapter,
687	    device_get_nameunit(dev));
688
689	INIT_DEBUGOUT("em_attach: end");
690
691	return (0);
692
693err_late:
694	em_free_transmit_structures(adapter);
695	em_free_receive_structures(adapter);
696	em_release_hw_control(adapter);
697	if (adapter->ifp != NULL)
698		if_free(adapter->ifp);
699err_pci:
700	em_free_pci_resources(adapter);
701	free(adapter->mta, M_DEVBUF);
702	EM_CORE_LOCK_DESTROY(adapter);
703
704	return (error);
705}
706
707/*********************************************************************
708 *  Device removal routine
709 *
710 *  The detach entry point is called when the driver is being removed.
711 *  This routine stops the adapter and deallocates all the resources
712 *  that were allocated for driver operation.
713 *
714 *  return 0 on success, positive on failure
715 *********************************************************************/
716
717static int
718em_detach(device_t dev)
719{
720	struct adapter	*adapter = device_get_softc(dev);
721	struct ifnet	*ifp = adapter->ifp;
722
723	INIT_DEBUGOUT("em_detach: begin");
724
725	/* Make sure VLANS are not using driver */
726	if (adapter->ifp->if_vlantrunk != NULL) {
727		device_printf(dev,"Vlan in use, detach first\n");
728		return (EBUSY);
729	}
730
731#ifdef DEVICE_POLLING
732	if (ifp->if_capenable & IFCAP_POLLING)
733		ether_poll_deregister(ifp);
734#endif
735
736	if (adapter->led_dev != NULL)
737		led_destroy(adapter->led_dev);
738
739	EM_CORE_LOCK(adapter);
740	adapter->in_detach = 1;
741	em_stop(adapter);
742	EM_CORE_UNLOCK(adapter);
743	EM_CORE_LOCK_DESTROY(adapter);
744
745	e1000_phy_hw_reset(&adapter->hw);
746
747	em_release_manageability(adapter);
748	em_release_hw_control(adapter);
749
750	/* Unregister VLAN events */
751	if (adapter->vlan_attach != NULL)
752		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
753	if (adapter->vlan_detach != NULL)
754		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
755
756	ether_ifdetach(adapter->ifp);
757	callout_drain(&adapter->timer);
758
759	em_free_pci_resources(adapter);
760	bus_generic_detach(dev);
761	if_free(ifp);
762
763	em_free_transmit_structures(adapter);
764	em_free_receive_structures(adapter);
765
766	em_release_hw_control(adapter);
767	free(adapter->mta, M_DEVBUF);
768
769	return (0);
770}
771
772/*********************************************************************
773 *
774 *  Shutdown entry point
775 *
776 **********************************************************************/
777
778static int
779em_shutdown(device_t dev)
780{
781	return em_suspend(dev);
782}
783
784/*
785 * Suspend/resume device methods.
786 */
787static int
788em_suspend(device_t dev)
789{
790	struct adapter *adapter = device_get_softc(dev);
791
792	EM_CORE_LOCK(adapter);
793
794        em_release_manageability(adapter);
795	em_release_hw_control(adapter);
796	em_enable_wakeup(dev);
797
798	EM_CORE_UNLOCK(adapter);
799
800	return bus_generic_suspend(dev);
801}
802
803static int
804em_resume(device_t dev)
805{
806	struct adapter *adapter = device_get_softc(dev);
807	struct ifnet *ifp = adapter->ifp;
808
809	EM_CORE_LOCK(adapter);
810	em_init_locked(adapter);
811	em_init_manageability(adapter);
812	EM_CORE_UNLOCK(adapter);
813	em_start(ifp);
814
815	return bus_generic_resume(dev);
816}
817
818
819/*********************************************************************
820 *  Transmit entry point
821 *
822 *  em_start is called by the stack to initiate a transmit.
823 *  The driver will remain in this routine as long as there are
824 *  packets to transmit and transmit resources are available.
825 *  In case resources are not available stack is notified and
826 *  the packet is requeued.
827 **********************************************************************/
828
829#ifdef EM_MULTIQUEUE
830static int
831em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
832{
833	struct adapter  *adapter = txr->adapter;
834        struct mbuf     *next;
835        int             err = 0, enq = 0;
836
837	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
838	    IFF_DRV_RUNNING || adapter->link_active == 0) {
839		if (m != NULL)
840			err = drbr_enqueue(ifp, txr->br, m);
841		return (err);
842	}
843
844        /* Call cleanup if number of TX descriptors low */
845	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
846		em_txeof(txr);
847
848	enq = 0;
849	if (m == NULL) {
850		next = drbr_dequeue(ifp, txr->br);
851	} else if (drbr_needs_enqueue(ifp, txr->br)) {
852		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
853			return (err);
854		next = drbr_dequeue(ifp, txr->br);
855	} else
856		next = m;
857
858	/* Process the queue */
859	while (next != NULL) {
860		if ((err = em_xmit(txr, &next)) != 0) {
861                        if (next != NULL)
862                                err = drbr_enqueue(ifp, txr->br, next);
863                        break;
864		}
865		enq++;
866		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
867		ETHER_BPF_MTAP(ifp, next);
868		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
869                        break;
870		if (txr->tx_avail < EM_MAX_SCATTER) {
871			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
872			break;
873		}
874		next = drbr_dequeue(ifp, txr->br);
875	}
876
877	if (enq > 0) {
878                /* Set the watchdog */
879                txr->queue_status = EM_QUEUE_WORKING;
880		txr->watchdog_time = ticks;
881	}
882	return (err);
883}
884
885/*
886** Multiqueue capable stack interface
887*/
888static int
889em_mq_start(struct ifnet *ifp, struct mbuf *m)
890{
891	struct adapter	*adapter = ifp->if_softc;
892	struct tx_ring	*txr = adapter->tx_rings;
893	int 		error;
894
895	if (EM_TX_TRYLOCK(txr)) {
896		error = em_mq_start_locked(ifp, txr, m);
897		EM_TX_UNLOCK(txr);
898	} else
899		error = drbr_enqueue(ifp, txr->br, m);
900
901	return (error);
902}
903
904/*
905** Flush all ring buffers
906*/
907static void
908em_qflush(struct ifnet *ifp)
909{
910	struct adapter  *adapter = ifp->if_softc;
911	struct tx_ring  *txr = adapter->tx_rings;
912	struct mbuf     *m;
913
914	for (int i = 0; i < adapter->num_queues; i++, txr++) {
915		EM_TX_LOCK(txr);
916		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
917			m_freem(m);
918		EM_TX_UNLOCK(txr);
919	}
920	if_qflush(ifp);
921}
922
923#endif /* EM_MULTIQUEUE */
924
925static void
926em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
927{
928	struct adapter	*adapter = ifp->if_softc;
929	struct mbuf	*m_head;
930
931	EM_TX_LOCK_ASSERT(txr);
932
933	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
934	    IFF_DRV_RUNNING)
935		return;
936
937	if (!adapter->link_active)
938		return;
939
940	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
941        	/* Call cleanup if number of TX descriptors low */
942		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
943			em_txeof(txr);
944		if (txr->tx_avail < EM_MAX_SCATTER) {
945			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
946			break;
947		}
948                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
949		if (m_head == NULL)
950			break;
951		/*
952		 *  Encapsulation can modify our pointer, and or make it
953		 *  NULL on failure.  In that event, we can't requeue.
954		 */
955		if (em_xmit(txr, &m_head)) {
956			if (m_head == NULL)
957				break;
958			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
960			break;
961		}
962
963		/* Send a copy of the frame to the BPF listener */
964		ETHER_BPF_MTAP(ifp, m_head);
965
966		/* Set timeout in case hardware has problems transmitting. */
967		txr->watchdog_time = ticks;
968                txr->queue_status = EM_QUEUE_WORKING;
969	}
970
971	return;
972}
973
974static void
975em_start(struct ifnet *ifp)
976{
977	struct adapter	*adapter = ifp->if_softc;
978	struct tx_ring	*txr = adapter->tx_rings;
979
980	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
981		EM_TX_LOCK(txr);
982		em_start_locked(ifp, txr);
983		EM_TX_UNLOCK(txr);
984	}
985	return;
986}
987
988/*********************************************************************
989 *  Ioctl entry point
990 *
991 *  em_ioctl is called when the user wants to configure the
992 *  interface.
993 *
994 *  return 0 on success, positive on failure
995 **********************************************************************/
996
997static int
998em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
999{
1000	struct adapter	*adapter = ifp->if_softc;
1001	struct ifreq *ifr = (struct ifreq *)data;
1002#ifdef INET
1003	struct ifaddr *ifa = (struct ifaddr *)data;
1004#endif
1005	int error = 0;
1006
1007	if (adapter->in_detach)
1008		return (error);
1009
1010	switch (command) {
1011	case SIOCSIFADDR:
1012#ifdef INET
1013		if (ifa->ifa_addr->sa_family == AF_INET) {
1014			/*
1015			 * XXX
1016			 * Since resetting hardware takes a very long time
1017			 * and results in link renegotiation we only
1018			 * initialize the hardware only when it is absolutely
1019			 * required.
1020			 */
1021			ifp->if_flags |= IFF_UP;
1022			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1023				EM_CORE_LOCK(adapter);
1024				em_init_locked(adapter);
1025				EM_CORE_UNLOCK(adapter);
1026			}
1027			arp_ifinit(ifp, ifa);
1028		} else
1029#endif
1030			error = ether_ioctl(ifp, command, data);
1031		break;
1032	case SIOCSIFMTU:
1033	    {
1034		int max_frame_size;
1035
1036		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1037
1038		EM_CORE_LOCK(adapter);
1039		switch (adapter->hw.mac.type) {
1040		case e1000_82571:
1041		case e1000_82572:
1042		case e1000_ich9lan:
1043		case e1000_ich10lan:
1044		case e1000_pch2lan:
1045		case e1000_82574:
1046		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1047			max_frame_size = 9234;
1048			break;
1049		case e1000_pchlan:
1050			max_frame_size = 4096;
1051			break;
1052			/* Adapters that do not support jumbo frames */
1053		case e1000_82583:
1054		case e1000_ich8lan:
1055			max_frame_size = ETHER_MAX_LEN;
1056			break;
1057		default:
1058			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1059		}
1060		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1061		    ETHER_CRC_LEN) {
1062			EM_CORE_UNLOCK(adapter);
1063			error = EINVAL;
1064			break;
1065		}
1066
1067		ifp->if_mtu = ifr->ifr_mtu;
1068		adapter->max_frame_size =
1069		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1070		em_init_locked(adapter);
1071		EM_CORE_UNLOCK(adapter);
1072		break;
1073	    }
1074	case SIOCSIFFLAGS:
1075		IOCTL_DEBUGOUT("ioctl rcv'd:\
1076		    SIOCSIFFLAGS (Set Interface Flags)");
1077		EM_CORE_LOCK(adapter);
1078		if (ifp->if_flags & IFF_UP) {
1079			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1080				if ((ifp->if_flags ^ adapter->if_flags) &
1081				    (IFF_PROMISC | IFF_ALLMULTI)) {
1082					em_disable_promisc(adapter);
1083					em_set_promisc(adapter);
1084				}
1085			} else
1086				em_init_locked(adapter);
1087		} else
1088			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1089				em_stop(adapter);
1090		adapter->if_flags = ifp->if_flags;
1091		EM_CORE_UNLOCK(adapter);
1092		break;
1093	case SIOCADDMULTI:
1094	case SIOCDELMULTI:
1095		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1096		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1097			EM_CORE_LOCK(adapter);
1098			em_disable_intr(adapter);
1099			em_set_multi(adapter);
1100#ifdef DEVICE_POLLING
1101			if (!(ifp->if_capenable & IFCAP_POLLING))
1102#endif
1103				em_enable_intr(adapter);
1104			EM_CORE_UNLOCK(adapter);
1105		}
1106		break;
1107	case SIOCSIFMEDIA:
1108		/*
1109		** As the speed/duplex settings are being
1110		** changed, we need to reset the PHY.
1111		*/
1112		adapter->hw.phy.reset_disable = FALSE;
1113		/* Check SOL/IDER usage */
1114		EM_CORE_LOCK(adapter);
1115		if (e1000_check_reset_block(&adapter->hw)) {
1116			EM_CORE_UNLOCK(adapter);
1117			device_printf(adapter->dev, "Media change is"
1118			    " blocked due to SOL/IDER session.\n");
1119			break;
1120		}
1121		EM_CORE_UNLOCK(adapter);
1122		/* falls thru */
1123	case SIOCGIFMEDIA:
1124		IOCTL_DEBUGOUT("ioctl rcv'd: \
1125		    SIOCxIFMEDIA (Get/Set Interface Media)");
1126		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1127		break;
1128	case SIOCSIFCAP:
1129	    {
1130		int mask, reinit;
1131
1132		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1133		reinit = 0;
1134		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1135#ifdef DEVICE_POLLING
1136		if (mask & IFCAP_POLLING) {
1137			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1138				error = ether_poll_register(em_poll, ifp);
1139				if (error)
1140					return (error);
1141				EM_CORE_LOCK(adapter);
1142				em_disable_intr(adapter);
1143				ifp->if_capenable |= IFCAP_POLLING;
1144				EM_CORE_UNLOCK(adapter);
1145			} else {
1146				error = ether_poll_deregister(ifp);
1147				/* Enable interrupt even in error case */
1148				EM_CORE_LOCK(adapter);
1149				em_enable_intr(adapter);
1150				ifp->if_capenable &= ~IFCAP_POLLING;
1151				EM_CORE_UNLOCK(adapter);
1152			}
1153		}
1154#endif
1155		if (mask & IFCAP_HWCSUM) {
1156			ifp->if_capenable ^= IFCAP_HWCSUM;
1157			reinit = 1;
1158		}
1159		if (mask & IFCAP_TSO4) {
1160			ifp->if_capenable ^= IFCAP_TSO4;
1161			reinit = 1;
1162		}
1163		if (mask & IFCAP_VLAN_HWTAGGING) {
1164			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1165			reinit = 1;
1166		}
1167		if (mask & IFCAP_VLAN_HWFILTER) {
1168			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1169			reinit = 1;
1170		}
1171		if ((mask & IFCAP_WOL) &&
1172		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1173			if (mask & IFCAP_WOL_MCAST)
1174				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1175			if (mask & IFCAP_WOL_MAGIC)
1176				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1177		}
1178		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1179			em_init(adapter);
1180		VLAN_CAPABILITIES(ifp);
1181		break;
1182	    }
1183
1184	default:
1185		error = ether_ioctl(ifp, command, data);
1186		break;
1187	}
1188
1189	return (error);
1190}
1191
1192
1193/*********************************************************************
1194 *  Init entry point
1195 *
1196 *  This routine is used in two ways. It is used by the stack as
1197 *  init entry point in network interface structure. It is also used
1198 *  by the driver as a hw/sw initialization routine to get to a
1199 *  consistent state.
1200 *
1201 *  return 0 on success, positive on failure
1202 **********************************************************************/
1203
1204static void
1205em_init_locked(struct adapter *adapter)
1206{
1207	struct ifnet	*ifp = adapter->ifp;
1208	device_t	dev = adapter->dev;
1209	u32		pba;
1210
1211	INIT_DEBUGOUT("em_init: begin");
1212
1213	EM_CORE_LOCK_ASSERT(adapter);
1214
1215	em_disable_intr(adapter);
1216	callout_stop(&adapter->timer);
1217
1218	/*
1219	 * Packet Buffer Allocation (PBA)
1220	 * Writing PBA sets the receive portion of the buffer
1221	 * the remainder is used for the transmit buffer.
1222	 */
1223	switch (adapter->hw.mac.type) {
1224	/* Total Packet Buffer on these is 48K */
1225	case e1000_82571:
1226	case e1000_82572:
1227	case e1000_80003es2lan:
1228			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1229		break;
1230	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1231			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1232		break;
1233	case e1000_82574:
1234	case e1000_82583:
1235			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1236		break;
1237	case e1000_ich8lan:
1238		pba = E1000_PBA_8K;
1239		break;
1240	case e1000_ich9lan:
1241	case e1000_ich10lan:
1242		pba = E1000_PBA_10K;
1243		break;
1244	case e1000_pchlan:
1245	case e1000_pch2lan:
1246		pba = E1000_PBA_26K;
1247		break;
1248	default:
1249		if (adapter->max_frame_size > 8192)
1250			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1251		else
1252			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1253	}
1254
1255	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1256	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1257
1258	/* Get the latest mac address, User can use a LAA */
1259        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1260              ETHER_ADDR_LEN);
1261
1262	/* Put the address into the Receive Address Array */
1263	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1264
1265	/*
1266	 * With the 82571 adapter, RAR[0] may be overwritten
1267	 * when the other port is reset, we make a duplicate
1268	 * in RAR[14] for that eventuality, this assures
1269	 * the interface continues to function.
1270	 */
1271	if (adapter->hw.mac.type == e1000_82571) {
1272		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1273		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1274		    E1000_RAR_ENTRIES - 1);
1275	}
1276
1277	/* Initialize the hardware */
1278	em_reset(adapter);
1279	em_update_link_status(adapter);
1280
1281	/* Setup VLAN support, basic and offload if available */
1282	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1283
1284	/* Set hardware offload abilities */
1285	ifp->if_hwassist = 0;
1286	if (ifp->if_capenable & IFCAP_TXCSUM)
1287		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1288	if (ifp->if_capenable & IFCAP_TSO4)
1289		ifp->if_hwassist |= CSUM_TSO;
1290
1291	/* Configure for OS presence */
1292	em_init_manageability(adapter);
1293
1294	/* Prepare transmit descriptors and buffers */
1295	em_setup_transmit_structures(adapter);
1296	em_initialize_transmit_unit(adapter);
1297
1298	/* Setup Multicast table */
1299	em_set_multi(adapter);
1300
1301	/*
1302	** Figure out the desired mbuf
1303	** pool for doing jumbos
1304	*/
1305	if (adapter->max_frame_size <= 2048)
1306		adapter->rx_mbuf_sz = MCLBYTES;
1307	else if (adapter->max_frame_size <= 4096)
1308		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1309	else
1310		adapter->rx_mbuf_sz = MJUM9BYTES;
1311
1312	/* Prepare receive descriptors and buffers */
1313	if (em_setup_receive_structures(adapter)) {
1314		device_printf(dev, "Could not setup receive structures\n");
1315		em_stop(adapter);
1316		return;
1317	}
1318	em_initialize_receive_unit(adapter);
1319
1320	/* Use real VLAN Filter support? */
1321	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1322		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1323			/* Use real VLAN Filter support */
1324			em_setup_vlan_hw_support(adapter);
1325		else {
1326			u32 ctrl;
1327			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1328			ctrl |= E1000_CTRL_VME;
1329			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1330		}
1331	}
1332
1333	/* Don't lose promiscuous settings */
1334	em_set_promisc(adapter);
1335
1336	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1340	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342	/* MSI/X configuration for 82574 */
1343	if (adapter->hw.mac.type == e1000_82574) {
1344		int tmp;
1345		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1346		tmp |= E1000_CTRL_EXT_PBA_CLR;
1347		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1348		/* Set the IVAR - interrupt vector routing. */
1349		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1350	}
1351
1352#ifdef DEVICE_POLLING
1353	/*
1354	 * Only enable interrupts if we are not polling, make sure
1355	 * they are off otherwise.
1356	 */
1357	if (ifp->if_capenable & IFCAP_POLLING)
1358		em_disable_intr(adapter);
1359	else
1360#endif /* DEVICE_POLLING */
1361		em_enable_intr(adapter);
1362
1363	/* AMT based hardware can now take control from firmware */
1364	if (adapter->has_manage && adapter->has_amt)
1365		em_get_hw_control(adapter);
1366
1367	/* Don't reset the phy next time init gets called */
1368	adapter->hw.phy.reset_disable = TRUE;
1369}
1370
1371static void
1372em_init(void *arg)
1373{
1374	struct adapter *adapter = arg;
1375
1376	EM_CORE_LOCK(adapter);
1377	em_init_locked(adapter);
1378	EM_CORE_UNLOCK(adapter);
1379}
1380
1381
1382#ifdef DEVICE_POLLING
1383/*********************************************************************
1384 *
1385 *  Legacy polling routine: note this only works with single queue
1386 *
1387 *********************************************************************/
1388static int
1389em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1390{
1391	struct adapter *adapter = ifp->if_softc;
1392	struct tx_ring	*txr = adapter->tx_rings;
1393	struct rx_ring	*rxr = adapter->rx_rings;
1394	u32		reg_icr;
1395	int		rx_done;
1396
1397	EM_CORE_LOCK(adapter);
1398	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1399		EM_CORE_UNLOCK(adapter);
1400		return (0);
1401	}
1402
1403	if (cmd == POLL_AND_CHECK_STATUS) {
1404		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1405		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1406			callout_stop(&adapter->timer);
1407			adapter->hw.mac.get_link_status = 1;
1408			em_update_link_status(adapter);
1409			callout_reset(&adapter->timer, hz,
1410			    em_local_timer, adapter);
1411		}
1412	}
1413	EM_CORE_UNLOCK(adapter);
1414
1415	em_rxeof(rxr, count, &rx_done);
1416
1417	EM_TX_LOCK(txr);
1418	em_txeof(txr);
1419#ifdef EM_MULTIQUEUE
1420	if (!drbr_empty(ifp, txr->br))
1421		em_mq_start_locked(ifp, txr, NULL);
1422#else
1423	em_start_locked(ifp, txr);
1424#endif
1425	EM_TX_UNLOCK(txr);
1426
1427	return (rx_done);
1428}
1429#endif /* DEVICE_POLLING */
1430
1431
1432/*********************************************************************
1433 *
1434 *  Fast Legacy/MSI Combined Interrupt Service routine
1435 *
1436 *********************************************************************/
1437static int
1438em_irq_fast(void *arg)
1439{
1440	struct adapter	*adapter = arg;
1441	struct ifnet	*ifp;
1442	u32		reg_icr;
1443
1444	ifp = adapter->ifp;
1445
1446	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1447
1448	/* Hot eject?  */
1449	if (reg_icr == 0xffffffff)
1450		return FILTER_STRAY;
1451
1452	/* Definitely not our interrupt.  */
1453	if (reg_icr == 0x0)
1454		return FILTER_STRAY;
1455
1456	/*
1457	 * Starting with the 82571 chip, bit 31 should be used to
1458	 * determine whether the interrupt belongs to us.
1459	 */
1460	if (adapter->hw.mac.type >= e1000_82571 &&
1461	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1462		return FILTER_STRAY;
1463
1464	em_disable_intr(adapter);
1465	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466
1467	/* Link status change */
1468	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1469		adapter->hw.mac.get_link_status = 1;
1470		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1471	}
1472
1473	if (reg_icr & E1000_ICR_RXO)
1474		adapter->rx_overruns++;
1475	return FILTER_HANDLED;
1476}
1477
1478/* Combined RX/TX handler, used by Legacy and MSI */
1479static void
1480em_handle_que(void *context, int pending)
1481{
1482	struct adapter	*adapter = context;
1483	struct ifnet	*ifp = adapter->ifp;
1484	struct tx_ring	*txr = adapter->tx_rings;
1485	struct rx_ring	*rxr = adapter->rx_rings;
1486
1487
1488	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1489		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1490		EM_TX_LOCK(txr);
1491		em_txeof(txr);
1492#ifdef EM_MULTIQUEUE
1493		if (!drbr_empty(ifp, txr->br))
1494			em_mq_start_locked(ifp, txr, NULL);
1495#else
1496		em_start_locked(ifp, txr);
1497#endif
1498		EM_TX_UNLOCK(txr);
1499		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1500			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1501			return;
1502		}
1503	}
1504
1505	em_enable_intr(adapter);
1506	return;
1507}
1508
1509
1510/*********************************************************************
1511 *
1512 *  MSIX Interrupt Service Routines
1513 *
1514 **********************************************************************/
1515static void
1516em_msix_tx(void *arg)
1517{
1518	struct tx_ring *txr = arg;
1519	struct adapter *adapter = txr->adapter;
1520	bool		more;
1521
1522	++txr->tx_irq;
1523	EM_TX_LOCK(txr);
1524	more = em_txeof(txr);
1525	EM_TX_UNLOCK(txr);
1526	if (more)
1527		taskqueue_enqueue(txr->tq, &txr->tx_task);
1528	else
1529		/* Reenable this interrupt */
1530		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1531	return;
1532}
1533
1534/*********************************************************************
1535 *
1536 *  MSIX RX Interrupt Service routine
1537 *
1538 **********************************************************************/
1539
1540static void
1541em_msix_rx(void *arg)
1542{
1543	struct rx_ring	*rxr = arg;
1544	struct adapter	*adapter = rxr->adapter;
1545	bool		more;
1546
1547	++rxr->rx_irq;
1548	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1549	if (more)
1550		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1551	else
1552		/* Reenable this interrupt */
1553		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1554	return;
1555}
1556
1557/*********************************************************************
1558 *
1559 *  MSIX Link Fast Interrupt Service routine
1560 *
1561 **********************************************************************/
1562static void
1563em_msix_link(void *arg)
1564{
1565	struct adapter	*adapter = arg;
1566	u32		reg_icr;
1567
1568	++adapter->link_irq;
1569	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1570
1571	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1572		adapter->hw.mac.get_link_status = 1;
1573		em_handle_link(adapter, 0);
1574	} else
1575		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1576		    EM_MSIX_LINK | E1000_IMS_LSC);
1577	return;
1578}
1579
1580static void
1581em_handle_rx(void *context, int pending)
1582{
1583	struct rx_ring	*rxr = context;
1584	struct adapter	*adapter = rxr->adapter;
1585        bool            more;
1586
1587	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1588	if (more)
1589		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1590	else
1591		/* Reenable this interrupt */
1592		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1593}
1594
1595static void
1596em_handle_tx(void *context, int pending)
1597{
1598	struct tx_ring	*txr = context;
1599	struct adapter	*adapter = txr->adapter;
1600	struct ifnet	*ifp = adapter->ifp;
1601
1602	EM_TX_LOCK(txr);
1603	em_txeof(txr);
1604#ifdef EM_MULTIQUEUE
1605	if (!drbr_empty(ifp, txr->br))
1606		em_mq_start_locked(ifp, txr, NULL);
1607#else
1608	em_start_locked(ifp, txr);
1609#endif
1610	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1611	EM_TX_UNLOCK(txr);
1612}
1613
1614static void
1615em_handle_link(void *context, int pending)
1616{
1617	struct adapter	*adapter = context;
1618	struct ifnet *ifp = adapter->ifp;
1619
1620	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1621		return;
1622
1623	EM_CORE_LOCK(adapter);
1624	callout_stop(&adapter->timer);
1625	em_update_link_status(adapter);
1626	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1627	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628	    EM_MSIX_LINK | E1000_IMS_LSC);
1629	EM_CORE_UNLOCK(adapter);
1630}
1631
1632
1633/*********************************************************************
1634 *
1635 *  Media Ioctl callback
1636 *
1637 *  This routine is called whenever the user queries the status of
1638 *  the interface using ifconfig.
1639 *
1640 **********************************************************************/
1641static void
1642em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1643{
1644	struct adapter *adapter = ifp->if_softc;
1645	u_char fiber_type = IFM_1000_SX;
1646
1647	INIT_DEBUGOUT("em_media_status: begin");
1648
1649	EM_CORE_LOCK(adapter);
1650	em_update_link_status(adapter);
1651
1652	ifmr->ifm_status = IFM_AVALID;
1653	ifmr->ifm_active = IFM_ETHER;
1654
1655	if (!adapter->link_active) {
1656		EM_CORE_UNLOCK(adapter);
1657		return;
1658	}
1659
1660	ifmr->ifm_status |= IFM_ACTIVE;
1661
1662	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1663	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1664		ifmr->ifm_active |= fiber_type | IFM_FDX;
1665	} else {
1666		switch (adapter->link_speed) {
1667		case 10:
1668			ifmr->ifm_active |= IFM_10_T;
1669			break;
1670		case 100:
1671			ifmr->ifm_active |= IFM_100_TX;
1672			break;
1673		case 1000:
1674			ifmr->ifm_active |= IFM_1000_T;
1675			break;
1676		}
1677		if (adapter->link_duplex == FULL_DUPLEX)
1678			ifmr->ifm_active |= IFM_FDX;
1679		else
1680			ifmr->ifm_active |= IFM_HDX;
1681	}
1682	EM_CORE_UNLOCK(adapter);
1683}
1684
1685/*********************************************************************
1686 *
1687 *  Media Ioctl callback
1688 *
1689 *  This routine is called when the user changes speed/duplex using
1690 *  media/mediopt option with ifconfig.
1691 *
1692 **********************************************************************/
1693static int
1694em_media_change(struct ifnet *ifp)
1695{
1696	struct adapter *adapter = ifp->if_softc;
1697	struct ifmedia  *ifm = &adapter->media;
1698
1699	INIT_DEBUGOUT("em_media_change: begin");
1700
1701	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702		return (EINVAL);
1703
1704	EM_CORE_LOCK(adapter);
1705	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1706	case IFM_AUTO:
1707		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1708		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1709		break;
1710	case IFM_1000_LX:
1711	case IFM_1000_SX:
1712	case IFM_1000_T:
1713		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1714		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1715		break;
1716	case IFM_100_TX:
1717		adapter->hw.mac.autoneg = FALSE;
1718		adapter->hw.phy.autoneg_advertised = 0;
1719		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1720			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1721		else
1722			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1723		break;
1724	case IFM_10_T:
1725		adapter->hw.mac.autoneg = FALSE;
1726		adapter->hw.phy.autoneg_advertised = 0;
1727		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1728			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1729		else
1730			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1731		break;
1732	default:
1733		device_printf(adapter->dev, "Unsupported media type\n");
1734	}
1735
1736	em_init_locked(adapter);
1737	EM_CORE_UNLOCK(adapter);
1738
1739	return (0);
1740}
1741
1742/*********************************************************************
1743 *
1744 *  This routine maps the mbufs to tx descriptors.
1745 *
1746 *  return 0 on success, positive on failure
1747 **********************************************************************/
1748
1749static int
1750em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1751{
1752	struct adapter		*adapter = txr->adapter;
1753	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1754	bus_dmamap_t		map;
1755	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1756	struct e1000_tx_desc	*ctxd = NULL;
1757	struct mbuf		*m_head;
1758	struct ether_header	*eh;
1759	struct ip		*ip = NULL;
1760	struct tcphdr		*tp = NULL;
1761	u32			txd_upper, txd_lower, txd_used, txd_saved;
1762	int			ip_off, poff;
1763	int			nsegs, i, j, first, last = 0;
1764	int			error, do_tso, tso_desc = 0;
1765
1766	m_head = *m_headp;
1767	txd_upper = txd_lower = txd_used = txd_saved = 0;
1768	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1769	ip_off = poff = 0;
1770
1771	/*
1772	 * Intel recommends entire IP/TCP header length reside in a single
1773	 * buffer. If multiple descriptors are used to describe the IP and
1774	 * TCP header, each descriptor should describe one or more
1775	 * complete headers; descriptors referencing only parts of headers
1776	 * are not supported. If all layer headers are not coalesced into
1777	 * a single buffer, each buffer should not cross a 4KB boundary,
1778	 * or be larger than the maximum read request size.
1779	 * Controller also requires modifing IP/TCP header to make TSO work
1780	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1781	 * IP/TCP header into a single buffer to meet the requirement of
1782	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1783	 * which also has similiar restrictions.
1784	 */
1785	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1786		if (do_tso || (m_head->m_next != NULL &&
1787		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1788			if (M_WRITABLE(*m_headp) == 0) {
1789				m_head = m_dup(*m_headp, M_DONTWAIT);
1790				m_freem(*m_headp);
1791				if (m_head == NULL) {
1792					*m_headp = NULL;
1793					return (ENOBUFS);
1794				}
1795				*m_headp = m_head;
1796			}
1797		}
1798		/*
1799		 * XXX
1800		 * Assume IPv4, we don't have TSO/checksum offload support
1801		 * for IPv6 yet.
1802		 */
1803		ip_off = sizeof(struct ether_header);
1804		m_head = m_pullup(m_head, ip_off);
1805		if (m_head == NULL) {
1806			*m_headp = NULL;
1807			return (ENOBUFS);
1808		}
1809		eh = mtod(m_head, struct ether_header *);
1810		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1811			ip_off = sizeof(struct ether_vlan_header);
1812			m_head = m_pullup(m_head, ip_off);
1813			if (m_head == NULL) {
1814				*m_headp = NULL;
1815				return (ENOBUFS);
1816			}
1817		}
1818		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1819		if (m_head == NULL) {
1820			*m_headp = NULL;
1821			return (ENOBUFS);
1822		}
1823		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1824		poff = ip_off + (ip->ip_hl << 2);
1825		if (do_tso) {
1826			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1827			if (m_head == NULL) {
1828				*m_headp = NULL;
1829				return (ENOBUFS);
1830			}
1831			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1832			/*
1833			 * TSO workaround:
1834			 *   pull 4 more bytes of data into it.
1835			 */
1836			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1837			if (m_head == NULL) {
1838				*m_headp = NULL;
1839				return (ENOBUFS);
1840			}
1841			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1842			ip->ip_len = 0;
1843			ip->ip_sum = 0;
1844			/*
1845			 * The pseudo TCP checksum does not include TCP payload
1846			 * length so driver should recompute the checksum here
1847			 * what hardware expect to see. This is adherence of
1848			 * Microsoft's Large Send specification.
1849			 */
1850			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1851			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1852			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1853		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1854			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1855			if (m_head == NULL) {
1856				*m_headp = NULL;
1857				return (ENOBUFS);
1858			}
1859			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1861			if (m_head == NULL) {
1862				*m_headp = NULL;
1863				return (ENOBUFS);
1864			}
1865			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1866			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1867		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1868			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1869			if (m_head == NULL) {
1870				*m_headp = NULL;
1871				return (ENOBUFS);
1872			}
1873			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1874		}
1875		*m_headp = m_head;
1876	}
1877
1878	/*
1879	 * Map the packet for DMA
1880	 *
1881	 * Capture the first descriptor index,
1882	 * this descriptor will have the index
1883	 * of the EOP which is the only one that
1884	 * now gets a DONE bit writeback.
1885	 */
1886	first = txr->next_avail_desc;
1887	tx_buffer = &txr->tx_buffers[first];
1888	tx_buffer_mapped = tx_buffer;
1889	map = tx_buffer->map;
1890
1891	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1892	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1893
1894	/*
1895	 * There are two types of errors we can (try) to handle:
1896	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1897	 *   out of segments.  Defragment the mbuf chain and try again.
1898	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1899	 *   at this point in time.  Defer sending and try again later.
1900	 * All other errors, in particular EINVAL, are fatal and prevent the
1901	 * mbuf chain from ever going through.  Drop it and report error.
1902	 */
1903	if (error == EFBIG) {
1904		struct mbuf *m;
1905
1906		m = m_defrag(*m_headp, M_DONTWAIT);
1907		if (m == NULL) {
1908			adapter->mbuf_alloc_failed++;
1909			m_freem(*m_headp);
1910			*m_headp = NULL;
1911			return (ENOBUFS);
1912		}
1913		*m_headp = m;
1914
1915		/* Try it again */
1916		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1917		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1918
1919		if (error == ENOMEM) {
1920			adapter->no_tx_dma_setup++;
1921			return (error);
1922		} else if (error != 0) {
1923			adapter->no_tx_dma_setup++;
1924			m_freem(*m_headp);
1925			*m_headp = NULL;
1926			return (error);
1927		}
1928
1929	} else if (error == ENOMEM) {
1930		adapter->no_tx_dma_setup++;
1931		return (error);
1932	} else if (error != 0) {
1933		adapter->no_tx_dma_setup++;
1934		m_freem(*m_headp);
1935		*m_headp = NULL;
1936		return (error);
1937	}
1938
1939	/*
1940	 * TSO Hardware workaround, if this packet is not
1941	 * TSO, and is only a single descriptor long, and
1942	 * it follows a TSO burst, then we need to add a
1943	 * sentinel descriptor to prevent premature writeback.
1944	 */
1945	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1946		if (nsegs == 1)
1947			tso_desc = TRUE;
1948		txr->tx_tso = FALSE;
1949	}
1950
1951        if (nsegs > (txr->tx_avail - 2)) {
1952                txr->no_desc_avail++;
1953		bus_dmamap_unload(txr->txtag, map);
1954		return (ENOBUFS);
1955        }
1956	m_head = *m_headp;
1957
1958	/* Do hardware assists */
1959	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1960		em_tso_setup(txr, m_head, ip_off, ip, tp,
1961		    &txd_upper, &txd_lower);
1962		/* we need to make a final sentinel transmit desc */
1963		tso_desc = TRUE;
1964	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1965		em_transmit_checksum_setup(txr, m_head,
1966		    ip_off, ip, &txd_upper, &txd_lower);
1967
1968	i = txr->next_avail_desc;
1969
1970	/* Set up our transmit descriptors */
1971	for (j = 0; j < nsegs; j++) {
1972		bus_size_t seg_len;
1973		bus_addr_t seg_addr;
1974
1975		tx_buffer = &txr->tx_buffers[i];
1976		ctxd = &txr->tx_base[i];
1977		seg_addr = segs[j].ds_addr;
1978		seg_len  = segs[j].ds_len;
1979		/*
1980		** TSO Workaround:
1981		** If this is the last descriptor, we want to
1982		** split it so we have a small final sentinel
1983		*/
1984		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1985			seg_len -= 4;
1986			ctxd->buffer_addr = htole64(seg_addr);
1987			ctxd->lower.data = htole32(
1988			adapter->txd_cmd | txd_lower | seg_len);
1989			ctxd->upper.data =
1990			    htole32(txd_upper);
1991			if (++i == adapter->num_tx_desc)
1992				i = 0;
1993			/* Now make the sentinel */
1994			++txd_used; /* using an extra txd */
1995			ctxd = &txr->tx_base[i];
1996			tx_buffer = &txr->tx_buffers[i];
1997			ctxd->buffer_addr =
1998			    htole64(seg_addr + seg_len);
1999			ctxd->lower.data = htole32(
2000			adapter->txd_cmd | txd_lower | 4);
2001			ctxd->upper.data =
2002			    htole32(txd_upper);
2003			last = i;
2004			if (++i == adapter->num_tx_desc)
2005				i = 0;
2006		} else {
2007			ctxd->buffer_addr = htole64(seg_addr);
2008			ctxd->lower.data = htole32(
2009			adapter->txd_cmd | txd_lower | seg_len);
2010			ctxd->upper.data =
2011			    htole32(txd_upper);
2012			last = i;
2013			if (++i == adapter->num_tx_desc)
2014				i = 0;
2015		}
2016		tx_buffer->m_head = NULL;
2017		tx_buffer->next_eop = -1;
2018	}
2019
2020	txr->next_avail_desc = i;
2021	txr->tx_avail -= nsegs;
2022	if (tso_desc) /* TSO used an extra for sentinel */
2023		txr->tx_avail -= txd_used;
2024
2025	if (m_head->m_flags & M_VLANTAG) {
2026		/* Set the vlan id. */
2027		ctxd->upper.fields.special =
2028		    htole16(m_head->m_pkthdr.ether_vtag);
2029                /* Tell hardware to add tag */
2030                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2031        }
2032
2033        tx_buffer->m_head = m_head;
2034	tx_buffer_mapped->map = tx_buffer->map;
2035	tx_buffer->map = map;
2036        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2037
2038        /*
2039         * Last Descriptor of Packet
2040	 * needs End Of Packet (EOP)
2041	 * and Report Status (RS)
2042         */
2043        ctxd->lower.data |=
2044	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2045	/*
2046	 * Keep track in the first buffer which
2047	 * descriptor will be written back
2048	 */
2049	tx_buffer = &txr->tx_buffers[first];
2050	tx_buffer->next_eop = last;
2051	/* Update the watchdog time early and often */
2052	txr->watchdog_time = ticks;
2053
2054	/*
2055	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2056	 * that this frame is available to transmit.
2057	 */
2058	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2059	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2060	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2061
2062	return (0);
2063}
2064
2065static void
2066em_set_promisc(struct adapter *adapter)
2067{
2068	struct ifnet	*ifp = adapter->ifp;
2069	u32		reg_rctl;
2070
2071	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2072
2073	if (ifp->if_flags & IFF_PROMISC) {
2074		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2075		/* Turn this on if you want to see bad packets */
2076		if (em_debug_sbp)
2077			reg_rctl |= E1000_RCTL_SBP;
2078		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079	} else if (ifp->if_flags & IFF_ALLMULTI) {
2080		reg_rctl |= E1000_RCTL_MPE;
2081		reg_rctl &= ~E1000_RCTL_UPE;
2082		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2083	}
2084}
2085
2086static void
2087em_disable_promisc(struct adapter *adapter)
2088{
2089	u32	reg_rctl;
2090
2091	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2092
2093	reg_rctl &=  (~E1000_RCTL_UPE);
2094	reg_rctl &=  (~E1000_RCTL_MPE);
2095	reg_rctl &=  (~E1000_RCTL_SBP);
2096	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2097}
2098
2099
2100/*********************************************************************
2101 *  Multicast Update
2102 *
2103 *  This routine is called whenever multicast address list is updated.
2104 *
2105 **********************************************************************/
2106
2107static void
2108em_set_multi(struct adapter *adapter)
2109{
2110	struct ifnet	*ifp = adapter->ifp;
2111	struct ifmultiaddr *ifma;
2112	u32 reg_rctl = 0;
2113	u8  *mta; /* Multicast array memory */
2114	int mcnt = 0;
2115
2116	IOCTL_DEBUGOUT("em_set_multi: begin");
2117
2118	mta = adapter->mta;
2119	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2120
2121	if (adapter->hw.mac.type == e1000_82542 &&
2122	    adapter->hw.revision_id == E1000_REVISION_2) {
2123		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2125			e1000_pci_clear_mwi(&adapter->hw);
2126		reg_rctl |= E1000_RCTL_RST;
2127		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2128		msec_delay(5);
2129	}
2130
2131#if __FreeBSD_version < 800000
2132	IF_ADDR_LOCK(ifp);
2133#else
2134	if_maddr_rlock(ifp);
2135#endif
2136	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2137		if (ifma->ifma_addr->sa_family != AF_LINK)
2138			continue;
2139
2140		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2141			break;
2142
2143		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2144		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2145		mcnt++;
2146	}
2147#if __FreeBSD_version < 800000
2148	IF_ADDR_UNLOCK(ifp);
2149#else
2150	if_maddr_runlock(ifp);
2151#endif
2152	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2153		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2154		reg_rctl |= E1000_RCTL_MPE;
2155		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2156	} else
2157		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2158
2159	if (adapter->hw.mac.type == e1000_82542 &&
2160	    adapter->hw.revision_id == E1000_REVISION_2) {
2161		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162		reg_rctl &= ~E1000_RCTL_RST;
2163		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164		msec_delay(5);
2165		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2166			e1000_pci_set_mwi(&adapter->hw);
2167	}
2168}
2169
2170
2171/*********************************************************************
2172 *  Timer routine
2173 *
2174 *  This routine checks for link status and updates statistics.
2175 *
2176 **********************************************************************/
2177
2178static void
2179em_local_timer(void *arg)
2180{
2181	struct adapter	*adapter = arg;
2182	struct ifnet	*ifp = adapter->ifp;
2183	struct tx_ring	*txr = adapter->tx_rings;
2184	struct rx_ring	*rxr = adapter->rx_rings;
2185	u32		trigger;
2186
2187	EM_CORE_LOCK_ASSERT(adapter);
2188
2189	em_update_link_status(adapter);
2190	em_update_stats_counters(adapter);
2191
2192	/* Reset LAA into RAR[0] on 82571 */
2193	if ((adapter->hw.mac.type == e1000_82571) &&
2194	    e1000_get_laa_state_82571(&adapter->hw))
2195		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2196
2197	/* Mask to use in the irq trigger */
2198	if (adapter->msix_mem)
2199		trigger = rxr->ims; /* RX for 82574 */
2200	else
2201		trigger = E1000_ICS_RXDMT0;
2202
2203	/*
2204	** Don't do TX watchdog check if we've been paused
2205	*/
2206	if (adapter->pause_frames) {
2207		adapter->pause_frames = 0;
2208		goto out;
2209	}
2210	/*
2211	** Check on the state of the TX queue(s), this
2212	** can be done without the lock because its RO
2213	** and the HUNG state will be static if set.
2214	*/
2215	for (int i = 0; i < adapter->num_queues; i++, txr++)
2216		if (txr->queue_status == EM_QUEUE_HUNG)
2217			goto hung;
2218out:
2219	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2220#ifndef DEVICE_POLLING
2221	/* Trigger an RX interrupt to guarantee mbuf refresh */
2222	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2223#endif
2224	return;
2225hung:
2226	/* Looks like we're hung */
2227	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2228	device_printf(adapter->dev,
2229	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2230	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2231	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2232	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2233	    "Next TX to Clean = %d\n",
2234	    txr->me, txr->tx_avail, txr->next_to_clean);
2235	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2236	adapter->watchdog_events++;
2237	em_init_locked(adapter);
2238}
2239
2240
2241static void
2242em_update_link_status(struct adapter *adapter)
2243{
2244	struct e1000_hw *hw = &adapter->hw;
2245	struct ifnet *ifp = adapter->ifp;
2246	device_t dev = adapter->dev;
2247	struct tx_ring *txr = adapter->tx_rings;
2248	u32 link_check = 0;
2249
2250	/* Get the cached link value or read phy for real */
2251	switch (hw->phy.media_type) {
2252	case e1000_media_type_copper:
2253		if (hw->mac.get_link_status) {
2254			/* Do the work to read phy */
2255			e1000_check_for_link(hw);
2256			link_check = !hw->mac.get_link_status;
2257			if (link_check) /* ESB2 fix */
2258				e1000_cfg_on_link_up(hw);
2259		} else
2260			link_check = TRUE;
2261		break;
2262	case e1000_media_type_fiber:
2263		e1000_check_for_link(hw);
2264		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2265                                 E1000_STATUS_LU);
2266		break;
2267	case e1000_media_type_internal_serdes:
2268		e1000_check_for_link(hw);
2269		link_check = adapter->hw.mac.serdes_has_link;
2270		break;
2271	default:
2272	case e1000_media_type_unknown:
2273		break;
2274	}
2275
2276	/* Now check for a transition */
2277	if (link_check && (adapter->link_active == 0)) {
2278		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2279		    &adapter->link_duplex);
2280		/* Check if we must disable SPEED_MODE bit on PCI-E */
2281		if ((adapter->link_speed != SPEED_1000) &&
2282		    ((hw->mac.type == e1000_82571) ||
2283		    (hw->mac.type == e1000_82572))) {
2284			int tarc0;
2285			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2286			tarc0 &= ~SPEED_MODE_BIT;
2287			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2288		}
2289		if (bootverbose)
2290			device_printf(dev, "Link is up %d Mbps %s\n",
2291			    adapter->link_speed,
2292			    ((adapter->link_duplex == FULL_DUPLEX) ?
2293			    "Full Duplex" : "Half Duplex"));
2294		adapter->link_active = 1;
2295		adapter->smartspeed = 0;
2296		ifp->if_baudrate = adapter->link_speed * 1000000;
2297		if_link_state_change(ifp, LINK_STATE_UP);
2298	} else if (!link_check && (adapter->link_active == 1)) {
2299		ifp->if_baudrate = adapter->link_speed = 0;
2300		adapter->link_duplex = 0;
2301		if (bootverbose)
2302			device_printf(dev, "Link is Down\n");
2303		adapter->link_active = 0;
2304		/* Link down, disable watchdog */
2305		for (int i = 0; i < adapter->num_queues; i++, txr++)
2306			txr->queue_status = EM_QUEUE_IDLE;
2307		if_link_state_change(ifp, LINK_STATE_DOWN);
2308	}
2309}
2310
2311/*********************************************************************
2312 *
2313 *  This routine disables all traffic on the adapter by issuing a
2314 *  global reset on the MAC and deallocates TX/RX buffers.
2315 *
2316 *  This routine should always be called with BOTH the CORE
2317 *  and TX locks.
2318 **********************************************************************/
2319
2320static void
2321em_stop(void *arg)
2322{
2323	struct adapter	*adapter = arg;
2324	struct ifnet	*ifp = adapter->ifp;
2325	struct tx_ring	*txr = adapter->tx_rings;
2326
2327	EM_CORE_LOCK_ASSERT(adapter);
2328
2329	INIT_DEBUGOUT("em_stop: begin");
2330
2331	em_disable_intr(adapter);
2332	callout_stop(&adapter->timer);
2333
2334	/* Tell the stack that the interface is no longer active */
2335	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2336
2337        /* Unarm watchdog timer. */
2338	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2339		EM_TX_LOCK(txr);
2340		txr->queue_status = EM_QUEUE_IDLE;
2341		EM_TX_UNLOCK(txr);
2342	}
2343
2344	e1000_reset_hw(&adapter->hw);
2345	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2346
2347	e1000_led_off(&adapter->hw);
2348	e1000_cleanup_led(&adapter->hw);
2349}
2350
2351
2352/*********************************************************************
2353 *
2354 *  Determine hardware revision.
2355 *
2356 **********************************************************************/
2357static void
2358em_identify_hardware(struct adapter *adapter)
2359{
2360	device_t dev = adapter->dev;
2361
2362	/* Make sure our PCI config space has the necessary stuff set */
2363	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2364	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2365	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2366		device_printf(dev, "Memory Access and/or Bus Master bits "
2367		    "were not set!\n");
2368		adapter->hw.bus.pci_cmd_word |=
2369		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2370		pci_write_config(dev, PCIR_COMMAND,
2371		    adapter->hw.bus.pci_cmd_word, 2);
2372	}
2373
2374	/* Save off the information about this board */
2375	adapter->hw.vendor_id = pci_get_vendor(dev);
2376	adapter->hw.device_id = pci_get_device(dev);
2377	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2378	adapter->hw.subsystem_vendor_id =
2379	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2380	adapter->hw.subsystem_device_id =
2381	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2382
2383	/* Do Shared Code Init and Setup */
2384	if (e1000_set_mac_type(&adapter->hw)) {
2385		device_printf(dev, "Setup init failure\n");
2386		return;
2387	}
2388}
2389
2390static int
2391em_allocate_pci_resources(struct adapter *adapter)
2392{
2393	device_t	dev = adapter->dev;
2394	int		rid;
2395
2396	rid = PCIR_BAR(0);
2397	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2398	    &rid, RF_ACTIVE);
2399	if (adapter->memory == NULL) {
2400		device_printf(dev, "Unable to allocate bus resource: memory\n");
2401		return (ENXIO);
2402	}
2403	adapter->osdep.mem_bus_space_tag =
2404	    rman_get_bustag(adapter->memory);
2405	adapter->osdep.mem_bus_space_handle =
2406	    rman_get_bushandle(adapter->memory);
2407	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2408
2409	/* Default to a single queue */
2410	adapter->num_queues = 1;
2411
2412	/*
2413	 * Setup MSI/X or MSI if PCI Express
2414	 */
2415	adapter->msix = em_setup_msix(adapter);
2416
2417	adapter->hw.back = &adapter->osdep;
2418
2419	return (0);
2420}
2421
2422/*********************************************************************
2423 *
2424 *  Setup the Legacy or MSI Interrupt handler
2425 *
2426 **********************************************************************/
2427int
2428em_allocate_legacy(struct adapter *adapter)
2429{
2430	device_t dev = adapter->dev;
2431	int error, rid = 0;
2432
2433	/* Manually turn off all interrupts */
2434	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2435
2436	if (adapter->msix == 1) /* using MSI */
2437		rid = 1;
2438	/* We allocate a single interrupt resource */
2439	adapter->res = bus_alloc_resource_any(dev,
2440	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2441	if (adapter->res == NULL) {
2442		device_printf(dev, "Unable to allocate bus resource: "
2443		    "interrupt\n");
2444		return (ENXIO);
2445	}
2446
2447	/*
2448	 * Allocate a fast interrupt and the associated
2449	 * deferred processing contexts.
2450	 */
2451	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2452	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2453	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2454	    taskqueue_thread_enqueue, &adapter->tq);
2455	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2456	    device_get_nameunit(adapter->dev));
2457	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2458	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2459		device_printf(dev, "Failed to register fast interrupt "
2460			    "handler: %d\n", error);
2461		taskqueue_free(adapter->tq);
2462		adapter->tq = NULL;
2463		return (error);
2464	}
2465
2466	return (0);
2467}
2468
2469/*********************************************************************
2470 *
2471 *  Setup the MSIX Interrupt handlers
2472 *   This is not really Multiqueue, rather
2473 *   its just multiple interrupt vectors.
2474 *
2475 **********************************************************************/
2476int
2477em_allocate_msix(struct adapter *adapter)
2478{
2479	device_t	dev = adapter->dev;
2480	struct		tx_ring *txr = adapter->tx_rings;
2481	struct		rx_ring *rxr = adapter->rx_rings;
2482	int		error, rid, vector = 0;
2483
2484
2485	/* Make sure all interrupts are disabled */
2486	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2487
2488	/* First set up ring resources */
2489	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2490
2491		/* RX ring */
2492		rid = vector + 1;
2493
2494		rxr->res = bus_alloc_resource_any(dev,
2495		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2496		if (rxr->res == NULL) {
2497			device_printf(dev,
2498			    "Unable to allocate bus resource: "
2499			    "RX MSIX Interrupt %d\n", i);
2500			return (ENXIO);
2501		}
2502		if ((error = bus_setup_intr(dev, rxr->res,
2503		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2504		    rxr, &rxr->tag)) != 0) {
2505			device_printf(dev, "Failed to register RX handler");
2506			return (error);
2507		}
2508#if __FreeBSD_version >= 800504
2509		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2510#endif
2511		rxr->msix = vector++; /* NOTE increment vector for TX */
2512		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2513		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2514		    taskqueue_thread_enqueue, &rxr->tq);
2515		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2516		    device_get_nameunit(adapter->dev));
2517		/*
2518		** Set the bit to enable interrupt
2519		** in E1000_IMS -- bits 20 and 21
2520		** are for RX0 and RX1, note this has
2521		** NOTHING to do with the MSIX vector
2522		*/
2523		rxr->ims = 1 << (20 + i);
2524		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2525
2526		/* TX ring */
2527		rid = vector + 1;
2528		txr->res = bus_alloc_resource_any(dev,
2529		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2530		if (txr->res == NULL) {
2531			device_printf(dev,
2532			    "Unable to allocate bus resource: "
2533			    "TX MSIX Interrupt %d\n", i);
2534			return (ENXIO);
2535		}
2536		if ((error = bus_setup_intr(dev, txr->res,
2537		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2538		    txr, &txr->tag)) != 0) {
2539			device_printf(dev, "Failed to register TX handler");
2540			return (error);
2541		}
2542#if __FreeBSD_version >= 800504
2543		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2544#endif
2545		txr->msix = vector++; /* Increment vector for next pass */
2546		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2547		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2548		    taskqueue_thread_enqueue, &txr->tq);
2549		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2550		    device_get_nameunit(adapter->dev));
2551		/*
2552		** Set the bit to enable interrupt
2553		** in E1000_IMS -- bits 22 and 23
2554		** are for TX0 and TX1, note this has
2555		** NOTHING to do with the MSIX vector
2556		*/
2557		txr->ims = 1 << (22 + i);
2558		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2559	}
2560
2561	/* Link interrupt */
2562	++rid;
2563	adapter->res = bus_alloc_resource_any(dev,
2564	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2565	if (!adapter->res) {
2566		device_printf(dev,"Unable to allocate "
2567		    "bus resource: Link interrupt [%d]\n", rid);
2568		return (ENXIO);
2569        }
2570	/* Set the link handler function */
2571	error = bus_setup_intr(dev, adapter->res,
2572	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2573	    em_msix_link, adapter, &adapter->tag);
2574	if (error) {
2575		adapter->res = NULL;
2576		device_printf(dev, "Failed to register LINK handler");
2577		return (error);
2578	}
2579#if __FreeBSD_version >= 800504
2580		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2581#endif
2582	adapter->linkvec = vector;
2583	adapter->ivars |=  (8 | vector) << 16;
2584	adapter->ivars |= 0x80000000;
2585
2586	return (0);
2587}
2588
2589
2590static void
2591em_free_pci_resources(struct adapter *adapter)
2592{
2593	device_t	dev = adapter->dev;
2594	struct tx_ring	*txr;
2595	struct rx_ring	*rxr;
2596	int		rid;
2597
2598
2599	/*
2600	** Release all the queue interrupt resources:
2601	*/
2602	for (int i = 0; i < adapter->num_queues; i++) {
2603		txr = &adapter->tx_rings[i];
2604		rxr = &adapter->rx_rings[i];
2605		/* an early abort? */
2606		if ((txr == NULL) || (rxr == NULL))
2607			break;
2608		rid = txr->msix +1;
2609		if (txr->tag != NULL) {
2610			bus_teardown_intr(dev, txr->res, txr->tag);
2611			txr->tag = NULL;
2612		}
2613		if (txr->res != NULL)
2614			bus_release_resource(dev, SYS_RES_IRQ,
2615			    rid, txr->res);
2616		rid = rxr->msix +1;
2617		if (rxr->tag != NULL) {
2618			bus_teardown_intr(dev, rxr->res, rxr->tag);
2619			rxr->tag = NULL;
2620		}
2621		if (rxr->res != NULL)
2622			bus_release_resource(dev, SYS_RES_IRQ,
2623			    rid, rxr->res);
2624	}
2625
2626        if (adapter->linkvec) /* we are doing MSIX */
2627                rid = adapter->linkvec + 1;
2628        else
2629                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2630
2631	if (adapter->tag != NULL) {
2632		bus_teardown_intr(dev, adapter->res, adapter->tag);
2633		adapter->tag = NULL;
2634	}
2635
2636	if (adapter->res != NULL)
2637		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2638
2639
2640	if (adapter->msix)
2641		pci_release_msi(dev);
2642
2643	if (adapter->msix_mem != NULL)
2644		bus_release_resource(dev, SYS_RES_MEMORY,
2645		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2646
2647	if (adapter->memory != NULL)
2648		bus_release_resource(dev, SYS_RES_MEMORY,
2649		    PCIR_BAR(0), adapter->memory);
2650
2651	if (adapter->flash != NULL)
2652		bus_release_resource(dev, SYS_RES_MEMORY,
2653		    EM_FLASH, adapter->flash);
2654}
2655
2656/*
2657 * Setup MSI or MSI/X
2658 */
2659static int
2660em_setup_msix(struct adapter *adapter)
2661{
2662	device_t dev = adapter->dev;
2663	int val = 0;
2664
2665
2666	/*
2667	** Setup MSI/X for Hartwell: tests have shown
2668	** use of two queues to be unstable, and to
2669	** provide no great gain anyway, so we simply
2670	** seperate the interrupts and use a single queue.
2671	*/
2672	if ((adapter->hw.mac.type == e1000_82574) &&
2673	    (em_enable_msix == TRUE)) {
2674		/* Map the MSIX BAR */
2675		int rid = PCIR_BAR(EM_MSIX_BAR);
2676		adapter->msix_mem = bus_alloc_resource_any(dev,
2677		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2678       		if (!adapter->msix_mem) {
2679			/* May not be enabled */
2680               		device_printf(adapter->dev,
2681			    "Unable to map MSIX table \n");
2682			goto msi;
2683       		}
2684		val = pci_msix_count(dev);
2685		if (val < 3) {
2686			bus_release_resource(dev, SYS_RES_MEMORY,
2687			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2688			adapter->msix_mem = NULL;
2689               		device_printf(adapter->dev,
2690			    "MSIX: insufficient vectors, using MSI\n");
2691			goto msi;
2692		}
2693		val = 3;
2694		adapter->num_queues = 1;
2695		if (pci_alloc_msix(dev, &val) == 0) {
2696			device_printf(adapter->dev,
2697			    "Using MSIX interrupts "
2698			    "with %d vectors\n", val);
2699		}
2700
2701		return (val);
2702	}
2703msi:
2704       	val = pci_msi_count(dev);
2705       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2706               	adapter->msix = 1;
2707               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2708		return (val);
2709	}
2710	/* Should only happen due to manual configuration */
2711	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2712	return (0);
2713}
2714
2715
2716/*********************************************************************
2717 *
2718 *  Initialize the hardware to a configuration
2719 *  as specified by the adapter structure.
2720 *
2721 **********************************************************************/
2722static void
2723em_reset(struct adapter *adapter)
2724{
2725	device_t	dev = adapter->dev;
2726	struct ifnet	*ifp = adapter->ifp;
2727	struct e1000_hw	*hw = &adapter->hw;
2728	u16		rx_buffer_size;
2729
2730	INIT_DEBUGOUT("em_reset: begin");
2731
2732	/* Set up smart power down as default off on newer adapters. */
2733	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2734	    hw->mac.type == e1000_82572)) {
2735		u16 phy_tmp = 0;
2736
2737		/* Speed up time to link by disabling smart power down. */
2738		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2739		phy_tmp &= ~IGP02E1000_PM_SPD;
2740		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2741	}
2742
2743	/*
2744	 * These parameters control the automatic generation (Tx) and
2745	 * response (Rx) to Ethernet PAUSE frames.
2746	 * - High water mark should allow for at least two frames to be
2747	 *   received after sending an XOFF.
2748	 * - Low water mark works best when it is very near the high water mark.
2749	 *   This allows the receiver to restart by sending XON when it has
2750	 *   drained a bit. Here we use an arbitary value of 1500 which will
2751	 *   restart after one full frame is pulled from the buffer. There
2752	 *   could be several smaller frames in the buffer and if so they will
2753	 *   not trigger the XON until their total number reduces the buffer
2754	 *   by 1500.
2755	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2756	 */
2757	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2758
2759	hw->fc.high_water = rx_buffer_size -
2760	    roundup2(adapter->max_frame_size, 1024);
2761	hw->fc.low_water = hw->fc.high_water - 1500;
2762
2763	if (hw->mac.type == e1000_80003es2lan)
2764		hw->fc.pause_time = 0xFFFF;
2765	else
2766		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2767
2768	hw->fc.send_xon = TRUE;
2769
2770        /* Set Flow control, use the tunable location if sane */
2771	hw->fc.requested_mode = adapter->fc_setting;
2772
2773	/* Workaround: no TX flow ctrl for PCH */
2774	if (hw->mac.type == e1000_pchlan)
2775                hw->fc.requested_mode = e1000_fc_rx_pause;
2776
2777	/* Override - settings for PCH2LAN, ya its magic :) */
2778	if (hw->mac.type == e1000_pch2lan) {
2779		hw->fc.high_water = 0x5C20;
2780		hw->fc.low_water = 0x5048;
2781		hw->fc.pause_time = 0x0650;
2782		hw->fc.refresh_time = 0x0400;
2783		/* Jumbos need adjusted PBA */
2784		if (ifp->if_mtu > ETHERMTU)
2785			E1000_WRITE_REG(hw, E1000_PBA, 12);
2786		else
2787			E1000_WRITE_REG(hw, E1000_PBA, 26);
2788	}
2789
2790	/* Issue a global reset */
2791	e1000_reset_hw(hw);
2792	E1000_WRITE_REG(hw, E1000_WUC, 0);
2793	em_disable_aspm(adapter);
2794
2795	if (e1000_init_hw(hw) < 0) {
2796		device_printf(dev, "Hardware Initialization Failed\n");
2797		return;
2798	}
2799
2800	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2801	e1000_get_phy_info(hw);
2802	e1000_check_for_link(hw);
2803	return;
2804}
2805
2806/*********************************************************************
2807 *
2808 *  Setup networking device structure and register an interface.
2809 *
2810 **********************************************************************/
2811static int
2812em_setup_interface(device_t dev, struct adapter *adapter)
2813{
2814	struct ifnet   *ifp;
2815
2816	INIT_DEBUGOUT("em_setup_interface: begin");
2817
2818	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2819	if (ifp == NULL) {
2820		device_printf(dev, "can not allocate ifnet structure\n");
2821		return (-1);
2822	}
2823	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2824	ifp->if_mtu = ETHERMTU;
2825	ifp->if_init =  em_init;
2826	ifp->if_softc = adapter;
2827	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2828	ifp->if_ioctl = em_ioctl;
2829	ifp->if_start = em_start;
2830	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2831	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2832	IFQ_SET_READY(&ifp->if_snd);
2833
2834	ether_ifattach(ifp, adapter->hw.mac.addr);
2835
2836	ifp->if_capabilities = ifp->if_capenable = 0;
2837
2838#ifdef EM_MULTIQUEUE
2839	/* Multiqueue tx functions */
2840	ifp->if_transmit = em_mq_start;
2841	ifp->if_qflush = em_qflush;
2842#endif
2843
2844	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2845	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2846
2847	/* Enable TSO by default, can disable with ifconfig */
2848	ifp->if_capabilities |= IFCAP_TSO4;
2849	ifp->if_capenable |= IFCAP_TSO4;
2850
2851	/*
2852	 * Tell the upper layer(s) we
2853	 * support full VLAN capability
2854	 */
2855	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2856	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2857	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2858
2859	/*
2860	** Dont turn this on by default, if vlans are
2861	** created on another pseudo device (eg. lagg)
2862	** then vlan events are not passed thru, breaking
2863	** operation, but with HW FILTER off it works. If
2864	** using vlans directly on the em driver you can
2865	** enable this and get full hardware tag filtering.
2866	*/
2867	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2868
2869#ifdef DEVICE_POLLING
2870	ifp->if_capabilities |= IFCAP_POLLING;
2871#endif
2872
2873	/* Enable only WOL MAGIC by default */
2874	if (adapter->wol) {
2875		ifp->if_capabilities |= IFCAP_WOL;
2876		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2877	}
2878
2879	/*
2880	 * Specify the media types supported by this adapter and register
2881	 * callbacks to update media and link information
2882	 */
2883	ifmedia_init(&adapter->media, IFM_IMASK,
2884	    em_media_change, em_media_status);
2885	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2886	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2887		u_char fiber_type = IFM_1000_SX;	/* default type */
2888
2889		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2890			    0, NULL);
2891		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2892	} else {
2893		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2894		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2895			    0, NULL);
2896		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2897			    0, NULL);
2898		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2899			    0, NULL);
2900		if (adapter->hw.phy.type != e1000_phy_ife) {
2901			ifmedia_add(&adapter->media,
2902				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2903			ifmedia_add(&adapter->media,
2904				IFM_ETHER | IFM_1000_T, 0, NULL);
2905		}
2906	}
2907	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2908	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2909	return (0);
2910}
2911
2912
2913/*
2914 * Manage DMA'able memory.
2915 */
2916static void
2917em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2918{
2919	if (error)
2920		return;
2921	*(bus_addr_t *) arg = segs[0].ds_addr;
2922}
2923
2924static int
2925em_dma_malloc(struct adapter *adapter, bus_size_t size,
2926        struct em_dma_alloc *dma, int mapflags)
2927{
2928	int error;
2929
2930	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2931				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2932				BUS_SPACE_MAXADDR,	/* lowaddr */
2933				BUS_SPACE_MAXADDR,	/* highaddr */
2934				NULL, NULL,		/* filter, filterarg */
2935				size,			/* maxsize */
2936				1,			/* nsegments */
2937				size,			/* maxsegsize */
2938				0,			/* flags */
2939				NULL,			/* lockfunc */
2940				NULL,			/* lockarg */
2941				&dma->dma_tag);
2942	if (error) {
2943		device_printf(adapter->dev,
2944		    "%s: bus_dma_tag_create failed: %d\n",
2945		    __func__, error);
2946		goto fail_0;
2947	}
2948
2949	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2950	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2951	if (error) {
2952		device_printf(adapter->dev,
2953		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2954		    __func__, (uintmax_t)size, error);
2955		goto fail_2;
2956	}
2957
2958	dma->dma_paddr = 0;
2959	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2960	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2961	if (error || dma->dma_paddr == 0) {
2962		device_printf(adapter->dev,
2963		    "%s: bus_dmamap_load failed: %d\n",
2964		    __func__, error);
2965		goto fail_3;
2966	}
2967
2968	return (0);
2969
2970fail_3:
2971	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2972fail_2:
2973	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2974	bus_dma_tag_destroy(dma->dma_tag);
2975fail_0:
2976	dma->dma_map = NULL;
2977	dma->dma_tag = NULL;
2978
2979	return (error);
2980}
2981
2982static void
2983em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2984{
2985	if (dma->dma_tag == NULL)
2986		return;
2987	if (dma->dma_map != NULL) {
2988		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2989		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2990		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2991		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2992		dma->dma_map = NULL;
2993	}
2994	bus_dma_tag_destroy(dma->dma_tag);
2995	dma->dma_tag = NULL;
2996}
2997
2998
2999/*********************************************************************
3000 *
3001 *  Allocate memory for the transmit and receive rings, and then
3002 *  the descriptors associated with each, called only once at attach.
3003 *
3004 **********************************************************************/
3005static int
3006em_allocate_queues(struct adapter *adapter)
3007{
3008	device_t		dev = adapter->dev;
3009	struct tx_ring		*txr = NULL;
3010	struct rx_ring		*rxr = NULL;
3011	int rsize, tsize, error = E1000_SUCCESS;
3012	int txconf = 0, rxconf = 0;
3013
3014
3015	/* Allocate the TX ring struct memory */
3016	if (!(adapter->tx_rings =
3017	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3018	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3019		device_printf(dev, "Unable to allocate TX ring memory\n");
3020		error = ENOMEM;
3021		goto fail;
3022	}
3023
3024	/* Now allocate the RX */
3025	if (!(adapter->rx_rings =
3026	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3027	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3028		device_printf(dev, "Unable to allocate RX ring memory\n");
3029		error = ENOMEM;
3030		goto rx_fail;
3031	}
3032
3033	tsize = roundup2(adapter->num_tx_desc *
3034	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3035	/*
3036	 * Now set up the TX queues, txconf is needed to handle the
3037	 * possibility that things fail midcourse and we need to
3038	 * undo memory gracefully
3039	 */
3040	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3041		/* Set up some basics */
3042		txr = &adapter->tx_rings[i];
3043		txr->adapter = adapter;
3044		txr->me = i;
3045
3046		/* Initialize the TX lock */
3047		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3048		    device_get_nameunit(dev), txr->me);
3049		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3050
3051		if (em_dma_malloc(adapter, tsize,
3052			&txr->txdma, BUS_DMA_NOWAIT)) {
3053			device_printf(dev,
3054			    "Unable to allocate TX Descriptor memory\n");
3055			error = ENOMEM;
3056			goto err_tx_desc;
3057		}
3058		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3059		bzero((void *)txr->tx_base, tsize);
3060
3061        	if (em_allocate_transmit_buffers(txr)) {
3062			device_printf(dev,
3063			    "Critical Failure setting up transmit buffers\n");
3064			error = ENOMEM;
3065			goto err_tx_desc;
3066        	}
3067#if __FreeBSD_version >= 800000
3068		/* Allocate a buf ring */
3069		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3070		    M_WAITOK, &txr->tx_mtx);
3071#endif
3072	}
3073
3074	/*
3075	 * Next the RX queues...
3076	 */
3077	rsize = roundup2(adapter->num_rx_desc *
3078	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3079	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3080		rxr = &adapter->rx_rings[i];
3081		rxr->adapter = adapter;
3082		rxr->me = i;
3083
3084		/* Initialize the RX lock */
3085		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3086		    device_get_nameunit(dev), txr->me);
3087		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3088
3089		if (em_dma_malloc(adapter, rsize,
3090			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3091			device_printf(dev,
3092			    "Unable to allocate RxDescriptor memory\n");
3093			error = ENOMEM;
3094			goto err_rx_desc;
3095		}
3096		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3097		bzero((void *)rxr->rx_base, rsize);
3098
3099        	/* Allocate receive buffers for the ring*/
3100		if (em_allocate_receive_buffers(rxr)) {
3101			device_printf(dev,
3102			    "Critical Failure setting up receive buffers\n");
3103			error = ENOMEM;
3104			goto err_rx_desc;
3105		}
3106	}
3107
3108	return (0);
3109
3110err_rx_desc:
3111	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3112		em_dma_free(adapter, &rxr->rxdma);
3113err_tx_desc:
3114	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3115		em_dma_free(adapter, &txr->txdma);
3116	free(adapter->rx_rings, M_DEVBUF);
3117rx_fail:
3118#if __FreeBSD_version >= 800000
3119	buf_ring_free(txr->br, M_DEVBUF);
3120#endif
3121	free(adapter->tx_rings, M_DEVBUF);
3122fail:
3123	return (error);
3124}
3125
3126
3127/*********************************************************************
3128 *
3129 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3130 *  the information needed to transmit a packet on the wire. This is
3131 *  called only once at attach, setup is done every reset.
3132 *
3133 **********************************************************************/
3134static int
3135em_allocate_transmit_buffers(struct tx_ring *txr)
3136{
3137	struct adapter *adapter = txr->adapter;
3138	device_t dev = adapter->dev;
3139	struct em_buffer *txbuf;
3140	int error, i;
3141
3142	/*
3143	 * Setup DMA descriptor areas.
3144	 */
3145	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3146			       1, 0,			/* alignment, bounds */
3147			       BUS_SPACE_MAXADDR,	/* lowaddr */
3148			       BUS_SPACE_MAXADDR,	/* highaddr */
3149			       NULL, NULL,		/* filter, filterarg */
3150			       EM_TSO_SIZE,		/* maxsize */
3151			       EM_MAX_SCATTER,		/* nsegments */
3152			       PAGE_SIZE,		/* maxsegsize */
3153			       0,			/* flags */
3154			       NULL,			/* lockfunc */
3155			       NULL,			/* lockfuncarg */
3156			       &txr->txtag))) {
3157		device_printf(dev,"Unable to allocate TX DMA tag\n");
3158		goto fail;
3159	}
3160
3161	if (!(txr->tx_buffers =
3162	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3163	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3164		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3165		error = ENOMEM;
3166		goto fail;
3167	}
3168
3169        /* Create the descriptor buffer dma maps */
3170	txbuf = txr->tx_buffers;
3171	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3172		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3173		if (error != 0) {
3174			device_printf(dev, "Unable to create TX DMA map\n");
3175			goto fail;
3176		}
3177	}
3178
3179	return 0;
3180fail:
3181	/* We free all, it handles case where we are in the middle */
3182	em_free_transmit_structures(adapter);
3183	return (error);
3184}
3185
3186/*********************************************************************
3187 *
3188 *  Initialize a transmit ring.
3189 *
3190 **********************************************************************/
3191static void
3192em_setup_transmit_ring(struct tx_ring *txr)
3193{
3194	struct adapter *adapter = txr->adapter;
3195	struct em_buffer *txbuf;
3196	int i;
3197
3198	/* Clear the old descriptor contents */
3199	EM_TX_LOCK(txr);
3200	bzero((void *)txr->tx_base,
3201	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3202	/* Reset indices */
3203	txr->next_avail_desc = 0;
3204	txr->next_to_clean = 0;
3205
3206	/* Free any existing tx buffers. */
3207        txbuf = txr->tx_buffers;
3208	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3209		if (txbuf->m_head != NULL) {
3210			bus_dmamap_sync(txr->txtag, txbuf->map,
3211			    BUS_DMASYNC_POSTWRITE);
3212			bus_dmamap_unload(txr->txtag, txbuf->map);
3213			m_freem(txbuf->m_head);
3214			txbuf->m_head = NULL;
3215		}
3216		/* clear the watch index */
3217		txbuf->next_eop = -1;
3218        }
3219
3220	/* Set number of descriptors available */
3221	txr->tx_avail = adapter->num_tx_desc;
3222	txr->queue_status = EM_QUEUE_IDLE;
3223
3224	/* Clear checksum offload context. */
3225	txr->last_hw_offload = 0;
3226	txr->last_hw_ipcss = 0;
3227	txr->last_hw_ipcso = 0;
3228	txr->last_hw_tucss = 0;
3229	txr->last_hw_tucso = 0;
3230
3231	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3232	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3233	EM_TX_UNLOCK(txr);
3234}
3235
3236/*********************************************************************
3237 *
3238 *  Initialize all transmit rings.
3239 *
3240 **********************************************************************/
3241static void
3242em_setup_transmit_structures(struct adapter *adapter)
3243{
3244	struct tx_ring *txr = adapter->tx_rings;
3245
3246	for (int i = 0; i < adapter->num_queues; i++, txr++)
3247		em_setup_transmit_ring(txr);
3248
3249	return;
3250}
3251
3252/*********************************************************************
3253 *
3254 *  Enable transmit unit.
3255 *
3256 **********************************************************************/
3257static void
3258em_initialize_transmit_unit(struct adapter *adapter)
3259{
3260	struct tx_ring	*txr = adapter->tx_rings;
3261	struct e1000_hw	*hw = &adapter->hw;
3262	u32	tctl, tarc, tipg = 0;
3263
3264	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3265
3266	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3267		u64 bus_addr = txr->txdma.dma_paddr;
3268		/* Base and Len of TX Ring */
3269		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3270	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3271		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3272	    	    (u32)(bus_addr >> 32));
3273		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3274	    	    (u32)bus_addr);
3275		/* Init the HEAD/TAIL indices */
3276		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3277		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3278
3279		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3280		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3281		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3282
3283		txr->queue_status = EM_QUEUE_IDLE;
3284	}
3285
3286	/* Set the default values for the Tx Inter Packet Gap timer */
3287	switch (adapter->hw.mac.type) {
3288	case e1000_82542:
3289		tipg = DEFAULT_82542_TIPG_IPGT;
3290		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3291		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3292		break;
3293	case e1000_80003es2lan:
3294		tipg = DEFAULT_82543_TIPG_IPGR1;
3295		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3296		    E1000_TIPG_IPGR2_SHIFT;
3297		break;
3298	default:
3299		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3300		    (adapter->hw.phy.media_type ==
3301		    e1000_media_type_internal_serdes))
3302			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3303		else
3304			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3305		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3306		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3307	}
3308
3309	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3310	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3311
3312	if(adapter->hw.mac.type >= e1000_82540)
3313		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3314		    adapter->tx_abs_int_delay.value);
3315
3316	if ((adapter->hw.mac.type == e1000_82571) ||
3317	    (adapter->hw.mac.type == e1000_82572)) {
3318		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3319		tarc |= SPEED_MODE_BIT;
3320		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3321	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3322		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3323		tarc |= 1;
3324		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3325		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3326		tarc |= 1;
3327		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3328	}
3329
3330	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3331	if (adapter->tx_int_delay.value > 0)
3332		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3333
3334	/* Program the Transmit Control Register */
3335	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3336	tctl &= ~E1000_TCTL_CT;
3337	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3338		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3339
3340	if (adapter->hw.mac.type >= e1000_82571)
3341		tctl |= E1000_TCTL_MULR;
3342
3343	/* This write will effectively turn on the transmit unit. */
3344	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3345
3346}
3347
3348
3349/*********************************************************************
3350 *
3351 *  Free all transmit rings.
3352 *
3353 **********************************************************************/
3354static void
3355em_free_transmit_structures(struct adapter *adapter)
3356{
3357	struct tx_ring *txr = adapter->tx_rings;
3358
3359	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3360		EM_TX_LOCK(txr);
3361		em_free_transmit_buffers(txr);
3362		em_dma_free(adapter, &txr->txdma);
3363		EM_TX_UNLOCK(txr);
3364		EM_TX_LOCK_DESTROY(txr);
3365	}
3366
3367	free(adapter->tx_rings, M_DEVBUF);
3368}
3369
3370/*********************************************************************
3371 *
3372 *  Free transmit ring related data structures.
3373 *
3374 **********************************************************************/
3375static void
3376em_free_transmit_buffers(struct tx_ring *txr)
3377{
3378	struct adapter		*adapter = txr->adapter;
3379	struct em_buffer	*txbuf;
3380
3381	INIT_DEBUGOUT("free_transmit_ring: begin");
3382
3383	if (txr->tx_buffers == NULL)
3384		return;
3385
3386	for (int i = 0; i < adapter->num_tx_desc; i++) {
3387		txbuf = &txr->tx_buffers[i];
3388		if (txbuf->m_head != NULL) {
3389			bus_dmamap_sync(txr->txtag, txbuf->map,
3390			    BUS_DMASYNC_POSTWRITE);
3391			bus_dmamap_unload(txr->txtag,
3392			    txbuf->map);
3393			m_freem(txbuf->m_head);
3394			txbuf->m_head = NULL;
3395			if (txbuf->map != NULL) {
3396				bus_dmamap_destroy(txr->txtag,
3397				    txbuf->map);
3398				txbuf->map = NULL;
3399			}
3400		} else if (txbuf->map != NULL) {
3401			bus_dmamap_unload(txr->txtag,
3402			    txbuf->map);
3403			bus_dmamap_destroy(txr->txtag,
3404			    txbuf->map);
3405			txbuf->map = NULL;
3406		}
3407	}
3408#if __FreeBSD_version >= 800000
3409	if (txr->br != NULL)
3410		buf_ring_free(txr->br, M_DEVBUF);
3411#endif
3412	if (txr->tx_buffers != NULL) {
3413		free(txr->tx_buffers, M_DEVBUF);
3414		txr->tx_buffers = NULL;
3415	}
3416	if (txr->txtag != NULL) {
3417		bus_dma_tag_destroy(txr->txtag);
3418		txr->txtag = NULL;
3419	}
3420	return;
3421}
3422
3423
3424/*********************************************************************
3425 *  The offload context is protocol specific (TCP/UDP) and thus
3426 *  only needs to be set when the protocol changes. The occasion
3427 *  of a context change can be a performance detriment, and
3428 *  might be better just disabled. The reason arises in the way
3429 *  in which the controller supports pipelined requests from the
3430 *  Tx data DMA. Up to four requests can be pipelined, and they may
3431 *  belong to the same packet or to multiple packets. However all
3432 *  requests for one packet are issued before a request is issued
3433 *  for a subsequent packet and if a request for the next packet
3434 *  requires a context change, that request will be stalled
3435 *  until the previous request completes. This means setting up
3436 *  a new context effectively disables pipelined Tx data DMA which
3437 *  in turn greatly slow down performance to send small sized
3438 *  frames.
3439 **********************************************************************/
3440static void
3441em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3442    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3443{
3444	struct adapter			*adapter = txr->adapter;
3445	struct e1000_context_desc	*TXD = NULL;
3446	struct em_buffer		*tx_buffer;
3447	int				cur, hdr_len;
3448	u32				cmd = 0;
3449	u16				offload = 0;
3450	u8				ipcso, ipcss, tucso, tucss;
3451
3452	ipcss = ipcso = tucss = tucso = 0;
3453	hdr_len = ip_off + (ip->ip_hl << 2);
3454	cur = txr->next_avail_desc;
3455
3456	/* Setup of IP header checksum. */
3457	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3458		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3459		offload |= CSUM_IP;
3460		ipcss = ip_off;
3461		ipcso = ip_off + offsetof(struct ip, ip_sum);
3462		/*
3463		 * Start offset for header checksum calculation.
3464		 * End offset for header checksum calculation.
3465		 * Offset of place to put the checksum.
3466		 */
3467		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3468		TXD->lower_setup.ip_fields.ipcss = ipcss;
3469		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3470		TXD->lower_setup.ip_fields.ipcso = ipcso;
3471		cmd |= E1000_TXD_CMD_IP;
3472	}
3473
3474	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3475 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3476 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3477 		offload |= CSUM_TCP;
3478 		tucss = hdr_len;
3479 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3480 		/*
3481 		 * Setting up new checksum offload context for every frames
3482 		 * takes a lot of processing time for hardware. This also
3483 		 * reduces performance a lot for small sized frames so avoid
3484 		 * it if driver can use previously configured checksum
3485 		 * offload context.
3486 		 */
3487 		if (txr->last_hw_offload == offload) {
3488 			if (offload & CSUM_IP) {
3489 				if (txr->last_hw_ipcss == ipcss &&
3490 				    txr->last_hw_ipcso == ipcso &&
3491 				    txr->last_hw_tucss == tucss &&
3492 				    txr->last_hw_tucso == tucso)
3493 					return;
3494 			} else {
3495 				if (txr->last_hw_tucss == tucss &&
3496 				    txr->last_hw_tucso == tucso)
3497 					return;
3498 			}
3499  		}
3500 		txr->last_hw_offload = offload;
3501 		txr->last_hw_tucss = tucss;
3502 		txr->last_hw_tucso = tucso;
3503 		/*
3504 		 * Start offset for payload checksum calculation.
3505 		 * End offset for payload checksum calculation.
3506 		 * Offset of place to put the checksum.
3507 		 */
3508		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3509 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3510 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3511 		TXD->upper_setup.tcp_fields.tucso = tucso;
3512 		cmd |= E1000_TXD_CMD_TCP;
3513 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3514 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3515 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3516 		tucss = hdr_len;
3517 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3518 		/*
3519 		 * Setting up new checksum offload context for every frames
3520 		 * takes a lot of processing time for hardware. This also
3521 		 * reduces performance a lot for small sized frames so avoid
3522 		 * it if driver can use previously configured checksum
3523 		 * offload context.
3524 		 */
3525 		if (txr->last_hw_offload == offload) {
3526 			if (offload & CSUM_IP) {
3527 				if (txr->last_hw_ipcss == ipcss &&
3528 				    txr->last_hw_ipcso == ipcso &&
3529 				    txr->last_hw_tucss == tucss &&
3530 				    txr->last_hw_tucso == tucso)
3531 					return;
3532 			} else {
3533 				if (txr->last_hw_tucss == tucss &&
3534 				    txr->last_hw_tucso == tucso)
3535 					return;
3536 			}
3537 		}
3538 		txr->last_hw_offload = offload;
3539 		txr->last_hw_tucss = tucss;
3540 		txr->last_hw_tucso = tucso;
3541 		/*
3542 		 * Start offset for header checksum calculation.
3543 		 * End offset for header checksum calculation.
3544 		 * Offset of place to put the checksum.
3545 		 */
3546		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3547 		TXD->upper_setup.tcp_fields.tucss = tucss;
3548 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3549 		TXD->upper_setup.tcp_fields.tucso = tucso;
3550  	}
3551
3552 	if (offload & CSUM_IP) {
3553 		txr->last_hw_ipcss = ipcss;
3554 		txr->last_hw_ipcso = ipcso;
3555  	}
3556
3557	TXD->tcp_seg_setup.data = htole32(0);
3558	TXD->cmd_and_length =
3559	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3560	tx_buffer = &txr->tx_buffers[cur];
3561	tx_buffer->m_head = NULL;
3562	tx_buffer->next_eop = -1;
3563
3564	if (++cur == adapter->num_tx_desc)
3565		cur = 0;
3566
3567	txr->tx_avail--;
3568	txr->next_avail_desc = cur;
3569}
3570
3571
3572/**********************************************************************
3573 *
3574 *  Setup work for hardware segmentation offload (TSO)
3575 *
3576 **********************************************************************/
3577static void
3578em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3579    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3580{
3581	struct adapter			*adapter = txr->adapter;
3582	struct e1000_context_desc	*TXD;
3583	struct em_buffer		*tx_buffer;
3584	int cur, hdr_len;
3585
3586	/*
3587	 * In theory we can use the same TSO context if and only if
3588	 * frame is the same type(IP/TCP) and the same MSS. However
3589	 * checking whether a frame has the same IP/TCP structure is
3590	 * hard thing so just ignore that and always restablish a
3591	 * new TSO context.
3592	 */
3593	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3594	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3595		      E1000_TXD_DTYP_D |	/* Data descr type */
3596		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3597
3598	/* IP and/or TCP header checksum calculation and insertion. */
3599	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3600
3601	cur = txr->next_avail_desc;
3602	tx_buffer = &txr->tx_buffers[cur];
3603	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3604
3605	/*
3606	 * Start offset for header checksum calculation.
3607	 * End offset for header checksum calculation.
3608	 * Offset of place put the checksum.
3609	 */
3610	TXD->lower_setup.ip_fields.ipcss = ip_off;
3611	TXD->lower_setup.ip_fields.ipcse =
3612	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3613	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3614	/*
3615	 * Start offset for payload checksum calculation.
3616	 * End offset for payload checksum calculation.
3617	 * Offset of place to put the checksum.
3618	 */
3619	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3620	TXD->upper_setup.tcp_fields.tucse = 0;
3621	TXD->upper_setup.tcp_fields.tucso =
3622	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3623	/*
3624	 * Payload size per packet w/o any headers.
3625	 * Length of all headers up to payload.
3626	 */
3627	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3628	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3629
3630	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3631				E1000_TXD_CMD_DEXT |	/* Extended descr */
3632				E1000_TXD_CMD_TSE |	/* TSE context */
3633				E1000_TXD_CMD_IP |	/* Do IP csum */
3634				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3635				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3636
3637	tx_buffer->m_head = NULL;
3638	tx_buffer->next_eop = -1;
3639
3640	if (++cur == adapter->num_tx_desc)
3641		cur = 0;
3642
3643	txr->tx_avail--;
3644	txr->next_avail_desc = cur;
3645	txr->tx_tso = TRUE;
3646}
3647
3648
3649/**********************************************************************
3650 *
3651 *  Examine each tx_buffer in the used queue. If the hardware is done
3652 *  processing the packet then free associated resources. The
3653 *  tx_buffer is put back on the free queue.
3654 *
3655 **********************************************************************/
3656static bool
3657em_txeof(struct tx_ring *txr)
3658{
3659	struct adapter	*adapter = txr->adapter;
3660        int first, last, done, processed;
3661        struct em_buffer *tx_buffer;
3662        struct e1000_tx_desc   *tx_desc, *eop_desc;
3663	struct ifnet   *ifp = adapter->ifp;
3664
3665	EM_TX_LOCK_ASSERT(txr);
3666
3667	/* No work, make sure watchdog is off */
3668        if (txr->tx_avail == adapter->num_tx_desc) {
3669		txr->queue_status = EM_QUEUE_IDLE;
3670                return (FALSE);
3671	}
3672
3673	processed = 0;
3674        first = txr->next_to_clean;
3675        tx_desc = &txr->tx_base[first];
3676        tx_buffer = &txr->tx_buffers[first];
3677	last = tx_buffer->next_eop;
3678        eop_desc = &txr->tx_base[last];
3679
3680	/*
3681	 * What this does is get the index of the
3682	 * first descriptor AFTER the EOP of the
3683	 * first packet, that way we can do the
3684	 * simple comparison on the inner while loop.
3685	 */
3686	if (++last == adapter->num_tx_desc)
3687 		last = 0;
3688	done = last;
3689
3690        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3691            BUS_DMASYNC_POSTREAD);
3692
3693        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3694		/* We clean the range of the packet */
3695		while (first != done) {
3696                	tx_desc->upper.data = 0;
3697                	tx_desc->lower.data = 0;
3698                	tx_desc->buffer_addr = 0;
3699                	++txr->tx_avail;
3700			++processed;
3701
3702			if (tx_buffer->m_head) {
3703				bus_dmamap_sync(txr->txtag,
3704				    tx_buffer->map,
3705				    BUS_DMASYNC_POSTWRITE);
3706				bus_dmamap_unload(txr->txtag,
3707				    tx_buffer->map);
3708                        	m_freem(tx_buffer->m_head);
3709                        	tx_buffer->m_head = NULL;
3710                	}
3711			tx_buffer->next_eop = -1;
3712			txr->watchdog_time = ticks;
3713
3714	                if (++first == adapter->num_tx_desc)
3715				first = 0;
3716
3717	                tx_buffer = &txr->tx_buffers[first];
3718			tx_desc = &txr->tx_base[first];
3719		}
3720		++ifp->if_opackets;
3721		/* See if we can continue to the next packet */
3722		last = tx_buffer->next_eop;
3723		if (last != -1) {
3724        		eop_desc = &txr->tx_base[last];
3725			/* Get new done point */
3726			if (++last == adapter->num_tx_desc) last = 0;
3727			done = last;
3728		} else
3729			break;
3730        }
3731        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3732            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3733
3734        txr->next_to_clean = first;
3735
3736	/*
3737	** Watchdog calculation, we know there's
3738	** work outstanding or the first return
3739	** would have been taken, so none processed
3740	** for too long indicates a hang. local timer
3741	** will examine this and do a reset if needed.
3742	*/
3743	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3744		txr->queue_status = EM_QUEUE_HUNG;
3745
3746        /*
3747         * If we have a minimum free, clear IFF_DRV_OACTIVE
3748         * to tell the stack that it is OK to send packets.
3749         */
3750        if (txr->tx_avail > EM_MAX_SCATTER)
3751                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3752
3753	/* Disable watchdog if all clean */
3754	if (txr->tx_avail == adapter->num_tx_desc) {
3755		txr->queue_status = EM_QUEUE_IDLE;
3756		return (FALSE);
3757	}
3758
3759	return (TRUE);
3760}
3761
3762
3763/*********************************************************************
3764 *
3765 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3766 *
3767 **********************************************************************/
3768static void
3769em_refresh_mbufs(struct rx_ring *rxr, int limit)
3770{
3771	struct adapter		*adapter = rxr->adapter;
3772	struct mbuf		*m;
3773	bus_dma_segment_t	segs[1];
3774	struct em_buffer	*rxbuf;
3775	int			i, j, error, nsegs;
3776	bool			cleaned = FALSE;
3777
3778	i = j = rxr->next_to_refresh;
3779	/*
3780	** Get one descriptor beyond
3781	** our work mark to control
3782	** the loop.
3783	*/
3784	if (++j == adapter->num_rx_desc)
3785		j = 0;
3786
3787	while (j != limit) {
3788		rxbuf = &rxr->rx_buffers[i];
3789		if (rxbuf->m_head == NULL) {
3790			m = m_getjcl(M_DONTWAIT, MT_DATA,
3791			    M_PKTHDR, adapter->rx_mbuf_sz);
3792			/*
3793			** If we have a temporary resource shortage
3794			** that causes a failure, just abort refresh
3795			** for now, we will return to this point when
3796			** reinvoked from em_rxeof.
3797			*/
3798			if (m == NULL)
3799				goto update;
3800		} else
3801			m = rxbuf->m_head;
3802
3803		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3804		m->m_flags |= M_PKTHDR;
3805		m->m_data = m->m_ext.ext_buf;
3806
3807		/* Use bus_dma machinery to setup the memory mapping  */
3808		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3809		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3810		if (error != 0) {
3811			printf("Refresh mbufs: hdr dmamap load"
3812			    " failure - %d\n", error);
3813			m_free(m);
3814			rxbuf->m_head = NULL;
3815			goto update;
3816		}
3817		rxbuf->m_head = m;
3818		bus_dmamap_sync(rxr->rxtag,
3819		    rxbuf->map, BUS_DMASYNC_PREREAD);
3820		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3821		cleaned = TRUE;
3822
3823		i = j; /* Next is precalulated for us */
3824		rxr->next_to_refresh = i;
3825		/* Calculate next controlling index */
3826		if (++j == adapter->num_rx_desc)
3827			j = 0;
3828	}
3829update:
3830	/*
3831	** Update the tail pointer only if,
3832	** and as far as we have refreshed.
3833	*/
3834	if (cleaned)
3835		E1000_WRITE_REG(&adapter->hw,
3836		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3837
3838	return;
3839}
3840
3841
3842/*********************************************************************
3843 *
3844 *  Allocate memory for rx_buffer structures. Since we use one
3845 *  rx_buffer per received packet, the maximum number of rx_buffer's
3846 *  that we'll need is equal to the number of receive descriptors
3847 *  that we've allocated.
3848 *
3849 **********************************************************************/
3850static int
3851em_allocate_receive_buffers(struct rx_ring *rxr)
3852{
3853	struct adapter		*adapter = rxr->adapter;
3854	device_t		dev = adapter->dev;
3855	struct em_buffer	*rxbuf;
3856	int			error;
3857
3858	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3859	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3860	if (rxr->rx_buffers == NULL) {
3861		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3862		return (ENOMEM);
3863	}
3864
3865	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3866				1, 0,			/* alignment, bounds */
3867				BUS_SPACE_MAXADDR,	/* lowaddr */
3868				BUS_SPACE_MAXADDR,	/* highaddr */
3869				NULL, NULL,		/* filter, filterarg */
3870				MJUM9BYTES,		/* maxsize */
3871				1,			/* nsegments */
3872				MJUM9BYTES,		/* maxsegsize */
3873				0,			/* flags */
3874				NULL,			/* lockfunc */
3875				NULL,			/* lockarg */
3876				&rxr->rxtag);
3877	if (error) {
3878		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3879		    __func__, error);
3880		goto fail;
3881	}
3882
3883	rxbuf = rxr->rx_buffers;
3884	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3885		rxbuf = &rxr->rx_buffers[i];
3886		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3887		    &rxbuf->map);
3888		if (error) {
3889			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3890			    __func__, error);
3891			goto fail;
3892		}
3893	}
3894
3895	return (0);
3896
3897fail:
3898	em_free_receive_structures(adapter);
3899	return (error);
3900}
3901
3902
3903/*********************************************************************
3904 *
3905 *  Initialize a receive ring and its buffers.
3906 *
3907 **********************************************************************/
3908static int
3909em_setup_receive_ring(struct rx_ring *rxr)
3910{
3911	struct	adapter 	*adapter = rxr->adapter;
3912	struct em_buffer	*rxbuf;
3913	bus_dma_segment_t	seg[1];
3914	int			i, j, nsegs, error;
3915
3916
3917	/* Clear the ring contents */
3918	EM_RX_LOCK(rxr);
3919
3920	/* Invalidate all descriptors */
3921	for (i = 0; i < adapter->num_rx_desc; i++) {
3922		struct e1000_rx_desc* cur;
3923		cur = &rxr->rx_base[i];
3924		cur->status = 0;
3925	}
3926
3927	/* Now replenish the mbufs */
3928	i = j = rxr->next_to_refresh;
3929	if (++j == adapter->num_rx_desc)
3930		j = 0;
3931
3932	while(j != rxr->next_to_check) {
3933		rxbuf = &rxr->rx_buffers[i];
3934		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3935		    M_PKTHDR, adapter->rx_mbuf_sz);
3936		if (rxbuf->m_head == NULL) {
3937			error = ENOBUFS;
3938			goto fail;
3939		}
3940		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3941		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3942		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3943
3944		/* Get the memory mapping */
3945		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3946		    rxbuf->map, rxbuf->m_head, seg,
3947		    &nsegs, BUS_DMA_NOWAIT);
3948		if (error != 0) {
3949			m_freem(rxbuf->m_head);
3950			rxbuf->m_head = NULL;
3951			goto fail;
3952		}
3953		bus_dmamap_sync(rxr->rxtag,
3954		    rxbuf->map, BUS_DMASYNC_PREREAD);
3955
3956		/* Update descriptor */
3957		rxr->rx_base[i].buffer_addr = htole64(seg[0].ds_addr);
3958		i = j;
3959		if (++j == adapter->num_rx_desc)
3960			j = 0;
3961	}
3962
3963fail:
3964	rxr->next_to_refresh = i;
3965	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3966	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3967	EM_RX_UNLOCK(rxr);
3968	return (error);
3969}
3970
3971/*********************************************************************
3972 *
3973 *  Initialize all receive rings.
3974 *
3975 **********************************************************************/
3976static int
3977em_setup_receive_structures(struct adapter *adapter)
3978{
3979	struct rx_ring *rxr = adapter->rx_rings;
3980	int q;
3981
3982	for (q = 0; q < adapter->num_queues; q++, rxr++)
3983		if (em_setup_receive_ring(rxr))
3984			goto fail;
3985
3986	return (0);
3987fail:
3988	/*
3989	 * Free RX buffers allocated so far, we will only handle
3990	 * the rings that completed, the failing case will have
3991	 * cleaned up for itself. 'q' failed, so its the terminus.
3992	 */
3993	for (int i = 0, n = 0; i < q; ++i) {
3994		rxr = &adapter->rx_rings[i];
3995		n = rxr->next_to_check;
3996		while(n != rxr->next_to_refresh) {
3997			struct em_buffer *rxbuf;
3998			rxbuf = &rxr->rx_buffers[n];
3999			if (rxbuf->m_head != NULL) {
4000				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4001			  	  BUS_DMASYNC_POSTREAD);
4002				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4003				m_freem(rxbuf->m_head);
4004				rxbuf->m_head = NULL;
4005			}
4006			if (++n == adapter->num_rx_desc)
4007				n = 0;
4008		}
4009		rxr->next_to_check = 0;
4010		rxr->next_to_refresh = 0;
4011	}
4012
4013	return (ENOBUFS);
4014}
4015
4016/*********************************************************************
4017 *
4018 *  Free all receive rings.
4019 *
4020 **********************************************************************/
4021static void
4022em_free_receive_structures(struct adapter *adapter)
4023{
4024	struct rx_ring *rxr = adapter->rx_rings;
4025
4026	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4027		em_free_receive_buffers(rxr);
4028		/* Free the ring memory as well */
4029		em_dma_free(adapter, &rxr->rxdma);
4030		EM_RX_LOCK_DESTROY(rxr);
4031	}
4032
4033	free(adapter->rx_rings, M_DEVBUF);
4034}
4035
4036
4037/*********************************************************************
4038 *
4039 *  Free receive ring data structures
4040 *
4041 **********************************************************************/
4042static void
4043em_free_receive_buffers(struct rx_ring *rxr)
4044{
4045	struct adapter		*adapter = rxr->adapter;
4046	struct em_buffer	*rxbuf = NULL;
4047
4048	INIT_DEBUGOUT("free_receive_buffers: begin");
4049
4050	if (rxr->rx_buffers != NULL) {
4051		int i = rxr->next_to_check;
4052		while(i != rxr->next_to_refresh) {
4053			rxbuf = &rxr->rx_buffers[i];
4054			if (rxbuf->map != NULL) {
4055				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4056				    BUS_DMASYNC_POSTREAD);
4057				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4058				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4059			}
4060			if (rxbuf->m_head != NULL) {
4061				m_freem(rxbuf->m_head);
4062				rxbuf->m_head = NULL;
4063			}
4064			if (++i == adapter->num_rx_desc)
4065				i = 0;
4066		}
4067		free(rxr->rx_buffers, M_DEVBUF);
4068		rxr->rx_buffers = NULL;
4069		rxr->next_to_check = 0;
4070		rxr->next_to_refresh = 0;
4071	}
4072
4073	if (rxr->rxtag != NULL) {
4074		bus_dma_tag_destroy(rxr->rxtag);
4075		rxr->rxtag = NULL;
4076	}
4077
4078	return;
4079}
4080
4081
4082/*********************************************************************
4083 *
4084 *  Enable receive unit.
4085 *
4086 **********************************************************************/
4087#define MAX_INTS_PER_SEC	8000
4088#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4089
4090static void
4091em_initialize_receive_unit(struct adapter *adapter)
4092{
4093	struct rx_ring	*rxr = adapter->rx_rings;
4094	struct ifnet	*ifp = adapter->ifp;
4095	struct e1000_hw	*hw = &adapter->hw;
4096	u64	bus_addr;
4097	u32	rctl, rxcsum;
4098
4099	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4100
4101	/*
4102	 * Make sure receives are disabled while setting
4103	 * up the descriptor ring
4104	 */
4105	rctl = E1000_READ_REG(hw, E1000_RCTL);
4106	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4107
4108	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4109	    adapter->rx_abs_int_delay.value);
4110	/*
4111	 * Set the interrupt throttling rate. Value is calculated
4112	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4113	 */
4114	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4115
4116	/*
4117	** When using MSIX interrupts we need to throttle
4118	** using the EITR register (82574 only)
4119	*/
4120	if (hw->mac.type == e1000_82574)
4121		for (int i = 0; i < 4; i++)
4122			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4123			    DEFAULT_ITR);
4124
4125	/* Disable accelerated ackknowledge */
4126	if (adapter->hw.mac.type == e1000_82574)
4127		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4128
4129	if (ifp->if_capenable & IFCAP_RXCSUM) {
4130		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4131		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4132		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4133	}
4134
4135	/*
4136	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4137	** long latencies are observed, like Lenovo X60. This
4138	** change eliminates the problem, but since having positive
4139	** values in RDTR is a known source of problems on other
4140	** platforms another solution is being sought.
4141	*/
4142	if (hw->mac.type == e1000_82573)
4143		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4144
4145	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4146		/* Setup the Base and Length of the Rx Descriptor Ring */
4147		bus_addr = rxr->rxdma.dma_paddr;
4148		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4149		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4150		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4151		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4152		/* Setup the Head and Tail Descriptor Pointers */
4153		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4154		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4155	}
4156
4157	/* Set early receive threshold on appropriate hw */
4158	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4159	    (adapter->hw.mac.type == e1000_pch2lan) ||
4160	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4161	    (ifp->if_mtu > ETHERMTU)) {
4162		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4163		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4164		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4165	}
4166
4167	if (adapter->hw.mac.type == e1000_pch2lan) {
4168		if (ifp->if_mtu > ETHERMTU)
4169			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4170		else
4171			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4172	}
4173
4174	/* Setup the Receive Control Register */
4175	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4176	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4177	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4178	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4179
4180        /* Strip the CRC */
4181        rctl |= E1000_RCTL_SECRC;
4182
4183        /* Make sure VLAN Filters are off */
4184        rctl &= ~E1000_RCTL_VFE;
4185	rctl &= ~E1000_RCTL_SBP;
4186
4187	if (adapter->rx_mbuf_sz == MCLBYTES)
4188		rctl |= E1000_RCTL_SZ_2048;
4189	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4190		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4191	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4192		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4193
4194	if (ifp->if_mtu > ETHERMTU)
4195		rctl |= E1000_RCTL_LPE;
4196	else
4197		rctl &= ~E1000_RCTL_LPE;
4198
4199	/* Write out the settings */
4200	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4201
4202	return;
4203}
4204
4205
4206/*********************************************************************
4207 *
4208 *  This routine executes in interrupt context. It replenishes
4209 *  the mbufs in the descriptor and sends data which has been
4210 *  dma'ed into host memory to upper layer.
4211 *
4212 *  We loop at most count times if count is > 0, or until done if
4213 *  count < 0.
4214 *
4215 *  For polling we also now return the number of cleaned packets
4216 *********************************************************************/
4217static bool
4218em_rxeof(struct rx_ring *rxr, int count, int *done)
4219{
4220	struct adapter		*adapter = rxr->adapter;
4221	struct ifnet		*ifp = adapter->ifp;
4222	struct mbuf		*mp, *sendmp;
4223	u8			status = 0;
4224	u16 			len;
4225	int			i, processed, rxdone = 0;
4226	bool			eop;
4227	struct e1000_rx_desc	*cur;
4228
4229	EM_RX_LOCK(rxr);
4230
4231	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4232
4233		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4234			break;
4235
4236		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4237		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4238
4239		cur = &rxr->rx_base[i];
4240		status = cur->status;
4241		mp = sendmp = NULL;
4242
4243		if ((status & E1000_RXD_STAT_DD) == 0)
4244			break;
4245
4246		len = le16toh(cur->length);
4247		eop = (status & E1000_RXD_STAT_EOP) != 0;
4248
4249		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4250		    (rxr->discard == TRUE)) {
4251			ifp->if_ierrors++;
4252			++rxr->rx_discarded;
4253			if (!eop) /* Catch subsequent segs */
4254				rxr->discard = TRUE;
4255			else
4256				rxr->discard = FALSE;
4257			em_rx_discard(rxr, i);
4258			goto next_desc;
4259		}
4260
4261		/* Assign correct length to the current fragment */
4262		mp = rxr->rx_buffers[i].m_head;
4263		mp->m_len = len;
4264
4265		/* Trigger for refresh */
4266		rxr->rx_buffers[i].m_head = NULL;
4267
4268		/* First segment? */
4269		if (rxr->fmp == NULL) {
4270			mp->m_pkthdr.len = len;
4271			rxr->fmp = rxr->lmp = mp;
4272		} else {
4273			/* Chain mbuf's together */
4274			mp->m_flags &= ~M_PKTHDR;
4275			rxr->lmp->m_next = mp;
4276			rxr->lmp = mp;
4277			rxr->fmp->m_pkthdr.len += len;
4278		}
4279
4280		if (eop) {
4281			--count;
4282			sendmp = rxr->fmp;
4283			sendmp->m_pkthdr.rcvif = ifp;
4284			ifp->if_ipackets++;
4285			em_receive_checksum(cur, sendmp);
4286#ifndef __NO_STRICT_ALIGNMENT
4287			if (adapter->max_frame_size >
4288			    (MCLBYTES - ETHER_ALIGN) &&
4289			    em_fixup_rx(rxr) != 0)
4290				goto skip;
4291#endif
4292			if (status & E1000_RXD_STAT_VP) {
4293				sendmp->m_pkthdr.ether_vtag =
4294				    (le16toh(cur->special) &
4295				    E1000_RXD_SPC_VLAN_MASK);
4296				sendmp->m_flags |= M_VLANTAG;
4297			}
4298#ifdef EM_MULTIQUEUE
4299			sendmp->m_pkthdr.flowid = rxr->msix;
4300			sendmp->m_flags |= M_FLOWID;
4301#endif
4302#ifndef __NO_STRICT_ALIGNMENT
4303skip:
4304#endif
4305			rxr->fmp = rxr->lmp = NULL;
4306		}
4307next_desc:
4308		/* Zero out the receive descriptors status. */
4309		cur->status = 0;
4310		++rxdone;	/* cumulative for POLL */
4311		++processed;
4312
4313		/* Advance our pointers to the next descriptor. */
4314		if (++i == adapter->num_rx_desc)
4315			i = 0;
4316
4317		/* Send to the stack */
4318		if (sendmp != NULL) {
4319			rxr->next_to_check = i;
4320			EM_RX_UNLOCK(rxr);
4321			(*ifp->if_input)(ifp, sendmp);
4322			EM_RX_LOCK(rxr);
4323			i = rxr->next_to_check;
4324		}
4325
4326		/* Only refresh mbufs every 8 descriptors */
4327		if (processed == 8) {
4328			em_refresh_mbufs(rxr, i);
4329			processed = 0;
4330		}
4331	}
4332
4333	/* Catch any remaining refresh work */
4334	if (e1000_rx_unrefreshed(rxr))
4335		em_refresh_mbufs(rxr, i);
4336
4337	rxr->next_to_check = i;
4338	if (done != NULL)
4339		*done = rxdone;
4340	EM_RX_UNLOCK(rxr);
4341
4342	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4343}
4344
4345static __inline void
4346em_rx_discard(struct rx_ring *rxr, int i)
4347{
4348	struct em_buffer	*rbuf;
4349
4350	rbuf = &rxr->rx_buffers[i];
4351	/* Free any previous pieces */
4352	if (rxr->fmp != NULL) {
4353		rxr->fmp->m_flags |= M_PKTHDR;
4354		m_freem(rxr->fmp);
4355		rxr->fmp = NULL;
4356		rxr->lmp = NULL;
4357	}
4358	/*
4359	** Free buffer and allow em_refresh_mbufs()
4360	** to clean up and recharge buffer.
4361	*/
4362	if (rbuf->m_head) {
4363		m_free(rbuf->m_head);
4364		rbuf->m_head = NULL;
4365	}
4366	return;
4367}
4368
4369#ifndef __NO_STRICT_ALIGNMENT
4370/*
4371 * When jumbo frames are enabled we should realign entire payload on
4372 * architecures with strict alignment. This is serious design mistake of 8254x
4373 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4374 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4375 * payload. On architecures without strict alignment restrictions 8254x still
4376 * performs unaligned memory access which would reduce the performance too.
4377 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4378 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4379 * existing mbuf chain.
4380 *
4381 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4382 * not used at all on architectures with strict alignment.
4383 */
4384static int
4385em_fixup_rx(struct rx_ring *rxr)
4386{
4387	struct adapter *adapter = rxr->adapter;
4388	struct mbuf *m, *n;
4389	int error;
4390
4391	error = 0;
4392	m = rxr->fmp;
4393	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4394		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4395		m->m_data += ETHER_HDR_LEN;
4396	} else {
4397		MGETHDR(n, M_DONTWAIT, MT_DATA);
4398		if (n != NULL) {
4399			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4400			m->m_data += ETHER_HDR_LEN;
4401			m->m_len -= ETHER_HDR_LEN;
4402			n->m_len = ETHER_HDR_LEN;
4403			M_MOVE_PKTHDR(n, m);
4404			n->m_next = m;
4405			rxr->fmp = n;
4406		} else {
4407			adapter->dropped_pkts++;
4408			m_freem(rxr->fmp);
4409			rxr->fmp = NULL;
4410			error = ENOMEM;
4411		}
4412	}
4413
4414	return (error);
4415}
4416#endif
4417
4418/*********************************************************************
4419 *
4420 *  Verify that the hardware indicated that the checksum is valid.
4421 *  Inform the stack about the status of checksum so that stack
4422 *  doesn't spend time verifying the checksum.
4423 *
4424 *********************************************************************/
4425static void
4426em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4427{
4428	/* Ignore Checksum bit is set */
4429	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4430		mp->m_pkthdr.csum_flags = 0;
4431		return;
4432	}
4433
4434	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4435		/* Did it pass? */
4436		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4437			/* IP Checksum Good */
4438			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4439			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4440
4441		} else {
4442			mp->m_pkthdr.csum_flags = 0;
4443		}
4444	}
4445
4446	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4447		/* Did it pass? */
4448		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4449			mp->m_pkthdr.csum_flags |=
4450			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4451			mp->m_pkthdr.csum_data = htons(0xffff);
4452		}
4453	}
4454}
4455
4456/*
4457 * This routine is run via an vlan
4458 * config EVENT
4459 */
4460static void
4461em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4462{
4463	struct adapter	*adapter = ifp->if_softc;
4464	u32		index, bit;
4465
4466	if (ifp->if_softc !=  arg)   /* Not our event */
4467		return;
4468
4469	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4470                return;
4471
4472	EM_CORE_LOCK(adapter);
4473	index = (vtag >> 5) & 0x7F;
4474	bit = vtag & 0x1F;
4475	adapter->shadow_vfta[index] |= (1 << bit);
4476	++adapter->num_vlans;
4477	/* Re-init to load the changes */
4478	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4479		em_init_locked(adapter);
4480	EM_CORE_UNLOCK(adapter);
4481}
4482
4483/*
4484 * This routine is run via an vlan
4485 * unconfig EVENT
4486 */
4487static void
4488em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4489{
4490	struct adapter	*adapter = ifp->if_softc;
4491	u32		index, bit;
4492
4493	if (ifp->if_softc !=  arg)
4494		return;
4495
4496	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4497                return;
4498
4499	EM_CORE_LOCK(adapter);
4500	index = (vtag >> 5) & 0x7F;
4501	bit = vtag & 0x1F;
4502	adapter->shadow_vfta[index] &= ~(1 << bit);
4503	--adapter->num_vlans;
4504	/* Re-init to load the changes */
4505	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4506		em_init_locked(adapter);
4507	EM_CORE_UNLOCK(adapter);
4508}
4509
4510static void
4511em_setup_vlan_hw_support(struct adapter *adapter)
4512{
4513	struct e1000_hw *hw = &adapter->hw;
4514	u32             reg;
4515
4516	/*
4517	** We get here thru init_locked, meaning
4518	** a soft reset, this has already cleared
4519	** the VFTA and other state, so if there
4520	** have been no vlan's registered do nothing.
4521	*/
4522	if (adapter->num_vlans == 0)
4523                return;
4524
4525	/*
4526	** A soft reset zero's out the VFTA, so
4527	** we need to repopulate it now.
4528	*/
4529	for (int i = 0; i < EM_VFTA_SIZE; i++)
4530                if (adapter->shadow_vfta[i] != 0)
4531			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4532                            i, adapter->shadow_vfta[i]);
4533
4534	reg = E1000_READ_REG(hw, E1000_CTRL);
4535	reg |= E1000_CTRL_VME;
4536	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4537
4538	/* Enable the Filter Table */
4539	reg = E1000_READ_REG(hw, E1000_RCTL);
4540	reg &= ~E1000_RCTL_CFIEN;
4541	reg |= E1000_RCTL_VFE;
4542	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4543}
4544
4545static void
4546em_enable_intr(struct adapter *adapter)
4547{
4548	struct e1000_hw *hw = &adapter->hw;
4549	u32 ims_mask = IMS_ENABLE_MASK;
4550
4551	if (hw->mac.type == e1000_82574) {
4552		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4553		ims_mask |= EM_MSIX_MASK;
4554	}
4555	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4556}
4557
4558static void
4559em_disable_intr(struct adapter *adapter)
4560{
4561	struct e1000_hw *hw = &adapter->hw;
4562
4563	if (hw->mac.type == e1000_82574)
4564		E1000_WRITE_REG(hw, EM_EIAC, 0);
4565	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4566}
4567
4568/*
4569 * Bit of a misnomer, what this really means is
4570 * to enable OS management of the system... aka
4571 * to disable special hardware management features
4572 */
4573static void
4574em_init_manageability(struct adapter *adapter)
4575{
4576	/* A shared code workaround */
4577#define E1000_82542_MANC2H E1000_MANC2H
4578	if (adapter->has_manage) {
4579		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4580		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4581
4582		/* disable hardware interception of ARP */
4583		manc &= ~(E1000_MANC_ARP_EN);
4584
4585                /* enable receiving management packets to the host */
4586		manc |= E1000_MANC_EN_MNG2HOST;
4587#define E1000_MNG2HOST_PORT_623 (1 << 5)
4588#define E1000_MNG2HOST_PORT_664 (1 << 6)
4589		manc2h |= E1000_MNG2HOST_PORT_623;
4590		manc2h |= E1000_MNG2HOST_PORT_664;
4591		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4592		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4593	}
4594}
4595
4596/*
4597 * Give control back to hardware management
4598 * controller if there is one.
4599 */
4600static void
4601em_release_manageability(struct adapter *adapter)
4602{
4603	if (adapter->has_manage) {
4604		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4605
4606		/* re-enable hardware interception of ARP */
4607		manc |= E1000_MANC_ARP_EN;
4608		manc &= ~E1000_MANC_EN_MNG2HOST;
4609
4610		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4611	}
4612}
4613
4614/*
4615 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4616 * For ASF and Pass Through versions of f/w this means
4617 * that the driver is loaded. For AMT version type f/w
4618 * this means that the network i/f is open.
4619 */
4620static void
4621em_get_hw_control(struct adapter *adapter)
4622{
4623	u32 ctrl_ext, swsm;
4624
4625	if (adapter->hw.mac.type == e1000_82573) {
4626		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4627		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4628		    swsm | E1000_SWSM_DRV_LOAD);
4629		return;
4630	}
4631	/* else */
4632	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4633	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4634	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4635	return;
4636}
4637
4638/*
4639 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4640 * For ASF and Pass Through versions of f/w this means that
4641 * the driver is no longer loaded. For AMT versions of the
4642 * f/w this means that the network i/f is closed.
4643 */
4644static void
4645em_release_hw_control(struct adapter *adapter)
4646{
4647	u32 ctrl_ext, swsm;
4648
4649	if (!adapter->has_manage)
4650		return;
4651
4652	if (adapter->hw.mac.type == e1000_82573) {
4653		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4654		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4655		    swsm & ~E1000_SWSM_DRV_LOAD);
4656		return;
4657	}
4658	/* else */
4659	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4660	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4661	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4662	return;
4663}
4664
4665static int
4666em_is_valid_ether_addr(u8 *addr)
4667{
4668	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4669
4670	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4671		return (FALSE);
4672	}
4673
4674	return (TRUE);
4675}
4676
4677/*
4678** Parse the interface capabilities with regard
4679** to both system management and wake-on-lan for
4680** later use.
4681*/
4682static void
4683em_get_wakeup(device_t dev)
4684{
4685	struct adapter	*adapter = device_get_softc(dev);
4686	u16		eeprom_data = 0, device_id, apme_mask;
4687
4688	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4689	apme_mask = EM_EEPROM_APME;
4690
4691	switch (adapter->hw.mac.type) {
4692	case e1000_82573:
4693	case e1000_82583:
4694		adapter->has_amt = TRUE;
4695		/* Falls thru */
4696	case e1000_82571:
4697	case e1000_82572:
4698	case e1000_80003es2lan:
4699		if (adapter->hw.bus.func == 1) {
4700			e1000_read_nvm(&adapter->hw,
4701			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4702			break;
4703		} else
4704			e1000_read_nvm(&adapter->hw,
4705			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4706		break;
4707	case e1000_ich8lan:
4708	case e1000_ich9lan:
4709	case e1000_ich10lan:
4710	case e1000_pchlan:
4711	case e1000_pch2lan:
4712		apme_mask = E1000_WUC_APME;
4713		adapter->has_amt = TRUE;
4714		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4715		break;
4716	default:
4717		e1000_read_nvm(&adapter->hw,
4718		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4719		break;
4720	}
4721	if (eeprom_data & apme_mask)
4722		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4723	/*
4724         * We have the eeprom settings, now apply the special cases
4725         * where the eeprom may be wrong or the board won't support
4726         * wake on lan on a particular port
4727	 */
4728	device_id = pci_get_device(dev);
4729        switch (device_id) {
4730	case E1000_DEV_ID_82571EB_FIBER:
4731		/* Wake events only supported on port A for dual fiber
4732		 * regardless of eeprom setting */
4733		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4734		    E1000_STATUS_FUNC_1)
4735			adapter->wol = 0;
4736		break;
4737	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4738	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4739	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4740                /* if quad port adapter, disable WoL on all but port A */
4741		if (global_quad_port_a != 0)
4742			adapter->wol = 0;
4743		/* Reset for multiple quad port adapters */
4744		if (++global_quad_port_a == 4)
4745			global_quad_port_a = 0;
4746                break;
4747	}
4748	return;
4749}
4750
4751
4752/*
4753 * Enable PCI Wake On Lan capability
4754 */
4755static void
4756em_enable_wakeup(device_t dev)
4757{
4758	struct adapter	*adapter = device_get_softc(dev);
4759	struct ifnet	*ifp = adapter->ifp;
4760	u32		pmc, ctrl, ctrl_ext, rctl;
4761	u16     	status;
4762
4763	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4764		return;
4765
4766	/* Advertise the wakeup capability */
4767	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4768	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4769	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4770	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4771
4772	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4773	    (adapter->hw.mac.type == e1000_pchlan) ||
4774	    (adapter->hw.mac.type == e1000_ich9lan) ||
4775	    (adapter->hw.mac.type == e1000_ich10lan))
4776		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4777
4778	/* Keep the laser running on Fiber adapters */
4779	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4780	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4781		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4782		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4783		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4784	}
4785
4786	/*
4787	** Determine type of Wakeup: note that wol
4788	** is set with all bits on by default.
4789	*/
4790	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4791		adapter->wol &= ~E1000_WUFC_MAG;
4792
4793	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4794		adapter->wol &= ~E1000_WUFC_MC;
4795	else {
4796		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4797		rctl |= E1000_RCTL_MPE;
4798		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4799	}
4800
4801	if ((adapter->hw.mac.type == e1000_pchlan) ||
4802	    (adapter->hw.mac.type == e1000_pch2lan)) {
4803		if (em_enable_phy_wakeup(adapter))
4804			return;
4805	} else {
4806		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4807		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4808	}
4809
4810	if (adapter->hw.phy.type == e1000_phy_igp_3)
4811		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4812
4813        /* Request PME */
4814        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4815	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4816	if (ifp->if_capenable & IFCAP_WOL)
4817		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4818        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4819
4820	return;
4821}
4822
4823/*
4824** WOL in the newer chipset interfaces (pchlan)
4825** require thing to be copied into the phy
4826*/
4827static int
4828em_enable_phy_wakeup(struct adapter *adapter)
4829{
4830	struct e1000_hw *hw = &adapter->hw;
4831	u32 mreg, ret = 0;
4832	u16 preg;
4833
4834	/* copy MAC RARs to PHY RARs */
4835	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4836
4837	/* copy MAC MTA to PHY MTA */
4838	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4839		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4840		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4841		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4842		    (u16)((mreg >> 16) & 0xFFFF));
4843	}
4844
4845	/* configure PHY Rx Control register */
4846	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4847	mreg = E1000_READ_REG(hw, E1000_RCTL);
4848	if (mreg & E1000_RCTL_UPE)
4849		preg |= BM_RCTL_UPE;
4850	if (mreg & E1000_RCTL_MPE)
4851		preg |= BM_RCTL_MPE;
4852	preg &= ~(BM_RCTL_MO_MASK);
4853	if (mreg & E1000_RCTL_MO_3)
4854		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4855				<< BM_RCTL_MO_SHIFT);
4856	if (mreg & E1000_RCTL_BAM)
4857		preg |= BM_RCTL_BAM;
4858	if (mreg & E1000_RCTL_PMCF)
4859		preg |= BM_RCTL_PMCF;
4860	mreg = E1000_READ_REG(hw, E1000_CTRL);
4861	if (mreg & E1000_CTRL_RFCE)
4862		preg |= BM_RCTL_RFCE;
4863	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4864
4865	/* enable PHY wakeup in MAC register */
4866	E1000_WRITE_REG(hw, E1000_WUC,
4867	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4868	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4869
4870	/* configure and enable PHY wakeup in PHY registers */
4871	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4872	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4873
4874	/* activate PHY wakeup */
4875	ret = hw->phy.ops.acquire(hw);
4876	if (ret) {
4877		printf("Could not acquire PHY\n");
4878		return ret;
4879	}
4880	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4881	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4882	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4883	if (ret) {
4884		printf("Could not read PHY page 769\n");
4885		goto out;
4886	}
4887	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4888	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4889	if (ret)
4890		printf("Could not set PHY Host Wakeup bit\n");
4891out:
4892	hw->phy.ops.release(hw);
4893
4894	return ret;
4895}
4896
4897static void
4898em_led_func(void *arg, int onoff)
4899{
4900	struct adapter	*adapter = arg;
4901
4902	EM_CORE_LOCK(adapter);
4903	if (onoff) {
4904		e1000_setup_led(&adapter->hw);
4905		e1000_led_on(&adapter->hw);
4906	} else {
4907		e1000_led_off(&adapter->hw);
4908		e1000_cleanup_led(&adapter->hw);
4909	}
4910	EM_CORE_UNLOCK(adapter);
4911}
4912
4913/*
4914** Disable the L0S and L1 LINK states
4915*/
4916static void
4917em_disable_aspm(struct adapter *adapter)
4918{
4919	int		base, reg;
4920	u16		link_cap,link_ctrl;
4921	device_t	dev = adapter->dev;
4922
4923	switch (adapter->hw.mac.type) {
4924		case e1000_82573:
4925		case e1000_82574:
4926		case e1000_82583:
4927			break;
4928		default:
4929			return;
4930	}
4931	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
4932		return;
4933	reg = base + PCIR_EXPRESS_LINK_CAP;
4934	link_cap = pci_read_config(dev, reg, 2);
4935	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4936		return;
4937	reg = base + PCIR_EXPRESS_LINK_CTL;
4938	link_ctrl = pci_read_config(dev, reg, 2);
4939	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4940	pci_write_config(dev, reg, link_ctrl, 2);
4941	return;
4942}
4943
4944/**********************************************************************
4945 *
4946 *  Update the board statistics counters.
4947 *
4948 **********************************************************************/
4949static void
4950em_update_stats_counters(struct adapter *adapter)
4951{
4952	struct ifnet   *ifp;
4953
4954	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4955	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4956		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4957		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4958	}
4959	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4960	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4961	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4962	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4963
4964	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4965	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4966	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4967	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4968	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4969	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4970	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4971	/*
4972	** For watchdog management we need to know if we have been
4973	** paused during the last interval, so capture that here.
4974	*/
4975	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4976	adapter->stats.xoffrxc += adapter->pause_frames;
4977	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4978	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4979	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4980	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4981	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4982	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4983	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4984	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4985	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4986	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4987	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4988	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4989
4990	/* For the 64-bit byte counters the low dword must be read first. */
4991	/* Both registers clear on the read of the high dword */
4992
4993	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4994	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4995	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4996	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4997
4998	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4999	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5000	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5001	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5002	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5003
5004	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5005	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5006
5007	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5008	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5009	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5010	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5011	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5012	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5013	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5014	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5015	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5016	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5017
5018	/* Interrupt Counts */
5019
5020	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5021	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5022	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5023	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5024	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5025	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5026	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5027	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5028	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5029
5030	if (adapter->hw.mac.type >= e1000_82543) {
5031		adapter->stats.algnerrc +=
5032		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5033		adapter->stats.rxerrc +=
5034		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5035		adapter->stats.tncrs +=
5036		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5037		adapter->stats.cexterr +=
5038		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5039		adapter->stats.tsctc +=
5040		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5041		adapter->stats.tsctfc +=
5042		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5043	}
5044	ifp = adapter->ifp;
5045
5046	ifp->if_collisions = adapter->stats.colc;
5047
5048	/* Rx Errors */
5049	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5050	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5051	    adapter->stats.ruc + adapter->stats.roc +
5052	    adapter->stats.mpc + adapter->stats.cexterr;
5053
5054	/* Tx Errors */
5055	ifp->if_oerrors = adapter->stats.ecol +
5056	    adapter->stats.latecol + adapter->watchdog_events;
5057}
5058
5059/* Export a single 32-bit register via a read-only sysctl. */
5060static int
5061em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5062{
5063	struct adapter *adapter;
5064	u_int val;
5065
5066	adapter = oidp->oid_arg1;
5067	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5068	return (sysctl_handle_int(oidp, &val, 0, req));
5069}
5070
5071/*
5072 * Add sysctl variables, one per statistic, to the system.
5073 */
5074static void
5075em_add_hw_stats(struct adapter *adapter)
5076{
5077	device_t dev = adapter->dev;
5078
5079	struct tx_ring *txr = adapter->tx_rings;
5080	struct rx_ring *rxr = adapter->rx_rings;
5081
5082	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5083	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5084	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5085	struct e1000_hw_stats *stats = &adapter->stats;
5086
5087	struct sysctl_oid *stat_node, *queue_node, *int_node;
5088	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5089
5090#define QUEUE_NAME_LEN 32
5091	char namebuf[QUEUE_NAME_LEN];
5092
5093	/* Driver Statistics */
5094	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5095			CTLFLAG_RD, &adapter->link_irq,
5096			"Link MSIX IRQ Handled");
5097	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5098			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5099			 "Std mbuf failed");
5100	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5101			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5102			 "Std mbuf cluster failed");
5103	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5104			CTLFLAG_RD, &adapter->dropped_pkts,
5105			"Driver dropped packets");
5106	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5107			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5108			"Driver tx dma failure in xmit");
5109	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5110			CTLFLAG_RD, &adapter->rx_overruns,
5111			"RX overruns");
5112	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5113			CTLFLAG_RD, &adapter->watchdog_events,
5114			"Watchdog timeouts");
5115
5116	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5117			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5118			em_sysctl_reg_handler, "IU",
5119			"Device Control Register");
5120	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5121			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5122			em_sysctl_reg_handler, "IU",
5123			"Receiver Control Register");
5124	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5125			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5126			"Flow Control High Watermark");
5127	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5128			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5129			"Flow Control Low Watermark");
5130
5131	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5132		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5133		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5134					    CTLFLAG_RD, NULL, "Queue Name");
5135		queue_list = SYSCTL_CHILDREN(queue_node);
5136
5137		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5138				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5139				E1000_TDH(txr->me),
5140				em_sysctl_reg_handler, "IU",
5141 				"Transmit Descriptor Head");
5142		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5143				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5144				E1000_TDT(txr->me),
5145				em_sysctl_reg_handler, "IU",
5146 				"Transmit Descriptor Tail");
5147		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5148				CTLFLAG_RD, &txr->tx_irq,
5149				"Queue MSI-X Transmit Interrupts");
5150		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5151				CTLFLAG_RD, &txr->no_desc_avail,
5152				"Queue No Descriptor Available");
5153
5154		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5155				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5156				E1000_RDH(rxr->me),
5157				em_sysctl_reg_handler, "IU",
5158				"Receive Descriptor Head");
5159		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5160				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5161				E1000_RDT(rxr->me),
5162				em_sysctl_reg_handler, "IU",
5163				"Receive Descriptor Tail");
5164		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5165				CTLFLAG_RD, &rxr->rx_irq,
5166				"Queue MSI-X Receive Interrupts");
5167	}
5168
5169	/* MAC stats get their own sub node */
5170
5171	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5172				    CTLFLAG_RD, NULL, "Statistics");
5173	stat_list = SYSCTL_CHILDREN(stat_node);
5174
5175	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5176			CTLFLAG_RD, &stats->ecol,
5177			"Excessive collisions");
5178	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5179			CTLFLAG_RD, &stats->scc,
5180			"Single collisions");
5181	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5182			CTLFLAG_RD, &stats->mcc,
5183			"Multiple collisions");
5184	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5185			CTLFLAG_RD, &stats->latecol,
5186			"Late collisions");
5187	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5188			CTLFLAG_RD, &stats->colc,
5189			"Collision Count");
5190	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5191			CTLFLAG_RD, &adapter->stats.symerrs,
5192			"Symbol Errors");
5193	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5194			CTLFLAG_RD, &adapter->stats.sec,
5195			"Sequence Errors");
5196	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5197			CTLFLAG_RD, &adapter->stats.dc,
5198			"Defer Count");
5199	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5200			CTLFLAG_RD, &adapter->stats.mpc,
5201			"Missed Packets");
5202	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5203			CTLFLAG_RD, &adapter->stats.rnbc,
5204			"Receive No Buffers");
5205	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5206			CTLFLAG_RD, &adapter->stats.ruc,
5207			"Receive Undersize");
5208	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5209			CTLFLAG_RD, &adapter->stats.rfc,
5210			"Fragmented Packets Received ");
5211	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5212			CTLFLAG_RD, &adapter->stats.roc,
5213			"Oversized Packets Received");
5214	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5215			CTLFLAG_RD, &adapter->stats.rjc,
5216			"Recevied Jabber");
5217	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5218			CTLFLAG_RD, &adapter->stats.rxerrc,
5219			"Receive Errors");
5220	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5221			CTLFLAG_RD, &adapter->stats.crcerrs,
5222			"CRC errors");
5223	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5224			CTLFLAG_RD, &adapter->stats.algnerrc,
5225			"Alignment Errors");
5226	/* On 82575 these are collision counts */
5227	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5228			CTLFLAG_RD, &adapter->stats.cexterr,
5229			"Collision/Carrier extension errors");
5230	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5231			CTLFLAG_RD, &adapter->stats.xonrxc,
5232			"XON Received");
5233	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5234			CTLFLAG_RD, &adapter->stats.xontxc,
5235			"XON Transmitted");
5236	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5237			CTLFLAG_RD, &adapter->stats.xoffrxc,
5238			"XOFF Received");
5239	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5240			CTLFLAG_RD, &adapter->stats.xofftxc,
5241			"XOFF Transmitted");
5242
5243	/* Packet Reception Stats */
5244	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5245			CTLFLAG_RD, &adapter->stats.tpr,
5246			"Total Packets Received ");
5247	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5248			CTLFLAG_RD, &adapter->stats.gprc,
5249			"Good Packets Received");
5250	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5251			CTLFLAG_RD, &adapter->stats.bprc,
5252			"Broadcast Packets Received");
5253	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5254			CTLFLAG_RD, &adapter->stats.mprc,
5255			"Multicast Packets Received");
5256	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5257			CTLFLAG_RD, &adapter->stats.prc64,
5258			"64 byte frames received ");
5259	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5260			CTLFLAG_RD, &adapter->stats.prc127,
5261			"65-127 byte frames received");
5262	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5263			CTLFLAG_RD, &adapter->stats.prc255,
5264			"128-255 byte frames received");
5265	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5266			CTLFLAG_RD, &adapter->stats.prc511,
5267			"256-511 byte frames received");
5268	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5269			CTLFLAG_RD, &adapter->stats.prc1023,
5270			"512-1023 byte frames received");
5271	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5272			CTLFLAG_RD, &adapter->stats.prc1522,
5273			"1023-1522 byte frames received");
5274 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5275 			CTLFLAG_RD, &adapter->stats.gorc,
5276 			"Good Octets Received");
5277
5278	/* Packet Transmission Stats */
5279 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5280 			CTLFLAG_RD, &adapter->stats.gotc,
5281 			"Good Octets Transmitted");
5282	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5283			CTLFLAG_RD, &adapter->stats.tpt,
5284			"Total Packets Transmitted");
5285	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5286			CTLFLAG_RD, &adapter->stats.gptc,
5287			"Good Packets Transmitted");
5288	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5289			CTLFLAG_RD, &adapter->stats.bptc,
5290			"Broadcast Packets Transmitted");
5291	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5292			CTLFLAG_RD, &adapter->stats.mptc,
5293			"Multicast Packets Transmitted");
5294	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5295			CTLFLAG_RD, &adapter->stats.ptc64,
5296			"64 byte frames transmitted ");
5297	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5298			CTLFLAG_RD, &adapter->stats.ptc127,
5299			"65-127 byte frames transmitted");
5300	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5301			CTLFLAG_RD, &adapter->stats.ptc255,
5302			"128-255 byte frames transmitted");
5303	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5304			CTLFLAG_RD, &adapter->stats.ptc511,
5305			"256-511 byte frames transmitted");
5306	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5307			CTLFLAG_RD, &adapter->stats.ptc1023,
5308			"512-1023 byte frames transmitted");
5309	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5310			CTLFLAG_RD, &adapter->stats.ptc1522,
5311			"1024-1522 byte frames transmitted");
5312	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5313			CTLFLAG_RD, &adapter->stats.tsctc,
5314			"TSO Contexts Transmitted");
5315	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5316			CTLFLAG_RD, &adapter->stats.tsctfc,
5317			"TSO Contexts Failed");
5318
5319
5320	/* Interrupt Stats */
5321
5322	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5323				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5324	int_list = SYSCTL_CHILDREN(int_node);
5325
5326	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5327			CTLFLAG_RD, &adapter->stats.iac,
5328			"Interrupt Assertion Count");
5329
5330	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5331			CTLFLAG_RD, &adapter->stats.icrxptc,
5332			"Interrupt Cause Rx Pkt Timer Expire Count");
5333
5334	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5335			CTLFLAG_RD, &adapter->stats.icrxatc,
5336			"Interrupt Cause Rx Abs Timer Expire Count");
5337
5338	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5339			CTLFLAG_RD, &adapter->stats.ictxptc,
5340			"Interrupt Cause Tx Pkt Timer Expire Count");
5341
5342	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5343			CTLFLAG_RD, &adapter->stats.ictxatc,
5344			"Interrupt Cause Tx Abs Timer Expire Count");
5345
5346	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5347			CTLFLAG_RD, &adapter->stats.ictxqec,
5348			"Interrupt Cause Tx Queue Empty Count");
5349
5350	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5351			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5352			"Interrupt Cause Tx Queue Min Thresh Count");
5353
5354	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5355			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5356			"Interrupt Cause Rx Desc Min Thresh Count");
5357
5358	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5359			CTLFLAG_RD, &adapter->stats.icrxoc,
5360			"Interrupt Cause Receiver Overrun Count");
5361}
5362
5363/**********************************************************************
5364 *
5365 *  This routine provides a way to dump out the adapter eeprom,
5366 *  often a useful debug/service tool. This only dumps the first
5367 *  32 words, stuff that matters is in that extent.
5368 *
5369 **********************************************************************/
5370static int
5371em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5372{
5373	struct adapter *adapter;
5374	int error;
5375	int result;
5376
5377	result = -1;
5378	error = sysctl_handle_int(oidp, &result, 0, req);
5379
5380	if (error || !req->newptr)
5381		return (error);
5382
5383	/*
5384	 * This value will cause a hex dump of the
5385	 * first 32 16-bit words of the EEPROM to
5386	 * the screen.
5387	 */
5388	if (result == 1) {
5389		adapter = (struct adapter *)arg1;
5390		em_print_nvm_info(adapter);
5391        }
5392
5393	return (error);
5394}
5395
5396static void
5397em_print_nvm_info(struct adapter *adapter)
5398{
5399	u16	eeprom_data;
5400	int	i, j, row = 0;
5401
5402	/* Its a bit crude, but it gets the job done */
5403	printf("\nInterface EEPROM Dump:\n");
5404	printf("Offset\n0x0000  ");
5405	for (i = 0, j = 0; i < 32; i++, j++) {
5406		if (j == 8) { /* Make the offset block */
5407			j = 0; ++row;
5408			printf("\n0x00%x0  ",row);
5409		}
5410		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5411		printf("%04x ", eeprom_data);
5412	}
5413	printf("\n");
5414}
5415
5416static int
5417em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5418{
5419	struct em_int_delay_info *info;
5420	struct adapter *adapter;
5421	u32 regval;
5422	int error, usecs, ticks;
5423
5424	info = (struct em_int_delay_info *)arg1;
5425	usecs = info->value;
5426	error = sysctl_handle_int(oidp, &usecs, 0, req);
5427	if (error != 0 || req->newptr == NULL)
5428		return (error);
5429	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5430		return (EINVAL);
5431	info->value = usecs;
5432	ticks = EM_USECS_TO_TICKS(usecs);
5433
5434	adapter = info->adapter;
5435
5436	EM_CORE_LOCK(adapter);
5437	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5438	regval = (regval & ~0xffff) | (ticks & 0xffff);
5439	/* Handle a few special cases. */
5440	switch (info->offset) {
5441	case E1000_RDTR:
5442		break;
5443	case E1000_TIDV:
5444		if (ticks == 0) {
5445			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5446			/* Don't write 0 into the TIDV register. */
5447			regval++;
5448		} else
5449			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5450		break;
5451	}
5452	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5453	EM_CORE_UNLOCK(adapter);
5454	return (0);
5455}
5456
5457static void
5458em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5459	const char *description, struct em_int_delay_info *info,
5460	int offset, int value)
5461{
5462	info->adapter = adapter;
5463	info->offset = offset;
5464	info->value = value;
5465	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5466	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5467	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5468	    info, 0, em_sysctl_int_delay, "I", description);
5469}
5470
5471static void
5472em_set_sysctl_value(struct adapter *adapter, const char *name,
5473	const char *description, int *limit, int value)
5474{
5475	*limit = value;
5476	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5477	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5478	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5479}
5480
5481static int
5482em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5483{
5484	struct adapter *adapter;
5485	int error;
5486	int result;
5487
5488	result = -1;
5489	error = sysctl_handle_int(oidp, &result, 0, req);
5490
5491	if (error || !req->newptr)
5492		return (error);
5493
5494	if (result == 1) {
5495		adapter = (struct adapter *)arg1;
5496		em_print_debug_info(adapter);
5497        }
5498
5499	return (error);
5500}
5501
5502/*
5503** This routine is meant to be fluid, add whatever is
5504** needed for debugging a problem.  -jfv
5505*/
5506static void
5507em_print_debug_info(struct adapter *adapter)
5508{
5509	device_t dev = adapter->dev;
5510	struct tx_ring *txr = adapter->tx_rings;
5511	struct rx_ring *rxr = adapter->rx_rings;
5512
5513	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5514		printf("Interface is RUNNING ");
5515	else
5516		printf("Interface is NOT RUNNING\n");
5517	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5518		printf("and ACTIVE\n");
5519	else
5520		printf("and INACTIVE\n");
5521
5522	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5523	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5524	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5525	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5526	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5527	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5528	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5529	device_printf(dev, "TX descriptors avail = %d\n",
5530	    txr->tx_avail);
5531	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5532	    txr->no_desc_avail);
5533	device_printf(dev, "RX discarded packets = %ld\n",
5534	    rxr->rx_discarded);
5535	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5536	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5537}
5538