if_em.c revision 220254
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 220254 2011-04-01 20:24:51Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.2.3";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275static void	em_disable_aspm(struct adapter *);
276
277static int	em_irq_fast(void *);
278
279/* MSIX handlers */
280static void	em_msix_tx(void *);
281static void	em_msix_rx(void *);
282static void	em_msix_link(void *);
283static void	em_handle_tx(void *context, int pending);
284static void	em_handle_rx(void *context, int pending);
285static void	em_handle_link(void *context, int pending);
286
287static void	em_set_sysctl_value(struct adapter *, const char *,
288		    const char *, int *, int);
289
290static __inline void em_rx_discard(struct rx_ring *, int);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t em_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t em_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, em_probe),
303	DEVMETHOD(device_attach, em_attach),
304	DEVMETHOD(device_detach, em_detach),
305	DEVMETHOD(device_shutdown, em_shutdown),
306	DEVMETHOD(device_suspend, em_suspend),
307	DEVMETHOD(device_resume, em_resume),
308	{0, 0}
309};
310
311static driver_t em_driver = {
312	"em", em_methods, sizeof(struct adapter),
313};
314
315devclass_t em_devclass;
316DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
317MODULE_DEPEND(em, pci, 1, 1, 1);
318MODULE_DEPEND(em, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
325#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
326#define M_TSO_LEN			66
327
328/* Allow common code without TSO */
329#ifndef CSUM_TSO
330#define CSUM_TSO	0
331#endif
332
333static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
334static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
335TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
336TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
337
338static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
339static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
340TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
341TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
342
343static int em_rxd = EM_DEFAULT_RXD;
344static int em_txd = EM_DEFAULT_TXD;
345TUNABLE_INT("hw.em.rxd", &em_rxd);
346TUNABLE_INT("hw.em.txd", &em_txd);
347
348static int em_smart_pwr_down = FALSE;
349TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
350
351/* Controls whether promiscuous also shows bad packets */
352static int em_debug_sbp = FALSE;
353TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
354
355static int em_enable_msix = TRUE;
356TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
357
358/* How many packets rxeof tries to clean at a time */
359static int em_rx_process_limit = 100;
360TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361
362/* Flow control setting - default to FULL */
363static int em_fc_setting = e1000_fc_full;
364TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365
366/* Energy efficient ethernet - default to OFF */
367static int eee_setting = 0;
368TUNABLE_INT("hw.em.eee_setting", &eee_setting);
369
370/* Global used in WOL setup with multiport cards */
371static int global_quad_port_a = 0;
372
373/*********************************************************************
374 *  Device identification routine
375 *
376 *  em_probe determines if the driver should be loaded on
377 *  adapter based on PCI vendor/device id of the adapter.
378 *
379 *  return BUS_PROBE_DEFAULT on success, positive on failure
380 *********************************************************************/
381
382static int
383em_probe(device_t dev)
384{
385	char		adapter_name[60];
386	u16		pci_vendor_id = 0;
387	u16		pci_device_id = 0;
388	u16		pci_subvendor_id = 0;
389	u16		pci_subdevice_id = 0;
390	em_vendor_info_t *ent;
391
392	INIT_DEBUGOUT("em_probe: begin");
393
394	pci_vendor_id = pci_get_vendor(dev);
395	if (pci_vendor_id != EM_VENDOR_ID)
396		return (ENXIO);
397
398	pci_device_id = pci_get_device(dev);
399	pci_subvendor_id = pci_get_subvendor(dev);
400	pci_subdevice_id = pci_get_subdevice(dev);
401
402	ent = em_vendor_info_array;
403	while (ent->vendor_id != 0) {
404		if ((pci_vendor_id == ent->vendor_id) &&
405		    (pci_device_id == ent->device_id) &&
406
407		    ((pci_subvendor_id == ent->subvendor_id) ||
408		    (ent->subvendor_id == PCI_ANY_ID)) &&
409
410		    ((pci_subdevice_id == ent->subdevice_id) ||
411		    (ent->subdevice_id == PCI_ANY_ID))) {
412			sprintf(adapter_name, "%s %s",
413				em_strings[ent->index],
414				em_driver_version);
415			device_set_desc_copy(dev, adapter_name);
416			return (BUS_PROBE_DEFAULT);
417		}
418		ent++;
419	}
420
421	return (ENXIO);
422}
423
424/*********************************************************************
425 *  Device initialization routine
426 *
427 *  The attach entry point is called when the driver is being loaded.
428 *  This routine identifies the type of hardware, allocates all resources
429 *  and initializes the hardware.
430 *
431 *  return 0 on success, positive on failure
432 *********************************************************************/
433
434static int
435em_attach(device_t dev)
436{
437	struct adapter	*adapter;
438	struct e1000_hw	*hw;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	hw = &adapter->hw;
446	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
447
448	/* SYSCTL stuff */
449	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
450	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
451	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
452	    em_sysctl_nvm_info, "I", "NVM Information");
453
454	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
455	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
456	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
457	    em_sysctl_debug_info, "I", "Debug Information");
458
459	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
460
461	/* Determine hardware and mac info */
462	em_identify_hardware(adapter);
463
464	/* Setup PCI resources */
465	if (em_allocate_pci_resources(adapter)) {
466		device_printf(dev, "Allocation of PCI resources failed\n");
467		error = ENXIO;
468		goto err_pci;
469	}
470
471	/*
472	** For ICH8 and family we need to
473	** map the flash memory, and this
474	** must happen after the MAC is
475	** identified
476	*/
477	if ((hw->mac.type == e1000_ich8lan) ||
478	    (hw->mac.type == e1000_ich9lan) ||
479	    (hw->mac.type == e1000_ich10lan) ||
480	    (hw->mac.type == e1000_pchlan) ||
481	    (hw->mac.type == e1000_pch2lan)) {
482		int rid = EM_BAR_TYPE_FLASH;
483		adapter->flash = bus_alloc_resource_any(dev,
484		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
485		if (adapter->flash == NULL) {
486			device_printf(dev, "Mapping of Flash failed\n");
487			error = ENXIO;
488			goto err_pci;
489		}
490		/* This is used in the shared code */
491		hw->flash_address = (u8 *)adapter->flash;
492		adapter->osdep.flash_bus_space_tag =
493		    rman_get_bustag(adapter->flash);
494		adapter->osdep.flash_bus_space_handle =
495		    rman_get_bushandle(adapter->flash);
496	}
497
498	/* Do Shared Code initialization */
499	if (e1000_setup_init_funcs(hw, TRUE)) {
500		device_printf(dev, "Setup of Shared code failed\n");
501		error = ENXIO;
502		goto err_pci;
503	}
504
505	e1000_get_bus_info(hw);
506
507	/* Set up some sysctls for the tunable interrupt delays */
508	em_add_int_delay_sysctl(adapter, "rx_int_delay",
509	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
510	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
511	em_add_int_delay_sysctl(adapter, "tx_int_delay",
512	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
513	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
514	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
515	    "receive interrupt delay limit in usecs",
516	    &adapter->rx_abs_int_delay,
517	    E1000_REGISTER(hw, E1000_RADV),
518	    em_rx_abs_int_delay_dflt);
519	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
520	    "transmit interrupt delay limit in usecs",
521	    &adapter->tx_abs_int_delay,
522	    E1000_REGISTER(hw, E1000_TADV),
523	    em_tx_abs_int_delay_dflt);
524
525	/* Sysctl for limiting the amount of work done in the taskqueue */
526	em_set_sysctl_value(adapter, "rx_processing_limit",
527	    "max number of rx packets to process", &adapter->rx_process_limit,
528	    em_rx_process_limit);
529
530	/* Sysctl for setting the interface flow control */
531	em_set_sysctl_value(adapter, "flow_control",
532	    "configure flow control",
533	    &adapter->fc_setting, em_fc_setting);
534
535	/*
536	 * Validate number of transmit and receive descriptors. It
537	 * must not exceed hardware maximum, and must be multiple
538	 * of E1000_DBA_ALIGN.
539	 */
540	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
541	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
542		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
543		    EM_DEFAULT_TXD, em_txd);
544		adapter->num_tx_desc = EM_DEFAULT_TXD;
545	} else
546		adapter->num_tx_desc = em_txd;
547
548	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
549	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
550		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
551		    EM_DEFAULT_RXD, em_rxd);
552		adapter->num_rx_desc = EM_DEFAULT_RXD;
553	} else
554		adapter->num_rx_desc = em_rxd;
555
556	hw->mac.autoneg = DO_AUTO_NEG;
557	hw->phy.autoneg_wait_to_complete = FALSE;
558	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
559
560	/* Copper options */
561	if (hw->phy.media_type == e1000_media_type_copper) {
562		hw->phy.mdix = AUTO_ALL_MODES;
563		hw->phy.disable_polarity_correction = FALSE;
564		hw->phy.ms_type = EM_MASTER_SLAVE;
565	}
566
567	/*
568	 * Set the frame limits assuming
569	 * standard ethernet sized frames.
570	 */
571	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
572	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
573
574	/*
575	 * This controls when hardware reports transmit completion
576	 * status.
577	 */
578	hw->mac.report_tx_early = 1;
579
580	/*
581	** Get queue/ring memory
582	*/
583	if (em_allocate_queues(adapter)) {
584		error = ENOMEM;
585		goto err_pci;
586	}
587
588	/* Allocate multicast array memory. */
589	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
590	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
591	if (adapter->mta == NULL) {
592		device_printf(dev, "Can not allocate multicast setup array\n");
593		error = ENOMEM;
594		goto err_late;
595	}
596
597	/* Check SOL/IDER usage */
598	if (e1000_check_reset_block(hw))
599		device_printf(dev, "PHY reset is blocked"
600		    " due to SOL/IDER session.\n");
601
602	/* Sysctl for setting Energy Efficient Ethernet */
603	em_set_sysctl_value(adapter, "eee_control",
604	    "enable Energy Efficient Ethernet",
605	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
606
607	/*
608	** Start from a known state, this is
609	** important in reading the nvm and
610	** mac from that.
611	*/
612	e1000_reset_hw(hw);
613
614
615	/* Make sure we have a good EEPROM before we read from it */
616	if (e1000_validate_nvm_checksum(hw) < 0) {
617		/*
618		** Some PCI-E parts fail the first check due to
619		** the link being in sleep state, call it again,
620		** if it fails a second time its a real issue.
621		*/
622		if (e1000_validate_nvm_checksum(hw) < 0) {
623			device_printf(dev,
624			    "The EEPROM Checksum Is Not Valid\n");
625			error = EIO;
626			goto err_late;
627		}
628	}
629
630	/* Copy the permanent MAC address out of the EEPROM */
631	if (e1000_read_mac_addr(hw) < 0) {
632		device_printf(dev, "EEPROM read error while reading MAC"
633		    " address\n");
634		error = EIO;
635		goto err_late;
636	}
637
638	if (!em_is_valid_ether_addr(hw->mac.addr)) {
639		device_printf(dev, "Invalid MAC address\n");
640		error = EIO;
641		goto err_late;
642	}
643
644	/*
645	**  Do interrupt configuration
646	*/
647	if (adapter->msix > 1) /* Do MSIX */
648		error = em_allocate_msix(adapter);
649	else  /* MSI or Legacy */
650		error = em_allocate_legacy(adapter);
651	if (error)
652		goto err_late;
653
654	/*
655	 * Get Wake-on-Lan and Management info for later use
656	 */
657	em_get_wakeup(dev);
658
659	/* Setup OS specific network interface */
660	if (em_setup_interface(dev, adapter) != 0)
661		goto err_late;
662
663	em_reset(adapter);
664
665	/* Initialize statistics */
666	em_update_stats_counters(adapter);
667
668	hw->mac.get_link_status = 1;
669	em_update_link_status(adapter);
670
671	/* Register for VLAN events */
672	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
673	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
674	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
675	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
676
677	em_add_hw_stats(adapter);
678
679	/* Non-AMT based hardware can now take control from firmware */
680	if (adapter->has_manage && !adapter->has_amt)
681		em_get_hw_control(adapter);
682
683	/* Tell the stack that the interface is not active */
684	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
685
686	adapter->led_dev = led_create(em_led_func, adapter,
687	    device_get_nameunit(dev));
688
689	INIT_DEBUGOUT("em_attach: end");
690
691	return (0);
692
693err_late:
694	em_free_transmit_structures(adapter);
695	em_free_receive_structures(adapter);
696	em_release_hw_control(adapter);
697	if (adapter->ifp != NULL)
698		if_free(adapter->ifp);
699err_pci:
700	em_free_pci_resources(adapter);
701	free(adapter->mta, M_DEVBUF);
702	EM_CORE_LOCK_DESTROY(adapter);
703
704	return (error);
705}
706
707/*********************************************************************
708 *  Device removal routine
709 *
710 *  The detach entry point is called when the driver is being removed.
711 *  This routine stops the adapter and deallocates all the resources
712 *  that were allocated for driver operation.
713 *
714 *  return 0 on success, positive on failure
715 *********************************************************************/
716
717static int
718em_detach(device_t dev)
719{
720	struct adapter	*adapter = device_get_softc(dev);
721	struct ifnet	*ifp = adapter->ifp;
722
723	INIT_DEBUGOUT("em_detach: begin");
724
725	/* Make sure VLANS are not using driver */
726	if (adapter->ifp->if_vlantrunk != NULL) {
727		device_printf(dev,"Vlan in use, detach first\n");
728		return (EBUSY);
729	}
730
731#ifdef DEVICE_POLLING
732	if (ifp->if_capenable & IFCAP_POLLING)
733		ether_poll_deregister(ifp);
734#endif
735
736	if (adapter->led_dev != NULL)
737		led_destroy(adapter->led_dev);
738
739	EM_CORE_LOCK(adapter);
740	adapter->in_detach = 1;
741	em_stop(adapter);
742	EM_CORE_UNLOCK(adapter);
743	EM_CORE_LOCK_DESTROY(adapter);
744
745	e1000_phy_hw_reset(&adapter->hw);
746
747	em_release_manageability(adapter);
748	em_release_hw_control(adapter);
749
750	/* Unregister VLAN events */
751	if (adapter->vlan_attach != NULL)
752		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
753	if (adapter->vlan_detach != NULL)
754		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
755
756	ether_ifdetach(adapter->ifp);
757	callout_drain(&adapter->timer);
758
759	em_free_pci_resources(adapter);
760	bus_generic_detach(dev);
761	if_free(ifp);
762
763	em_free_transmit_structures(adapter);
764	em_free_receive_structures(adapter);
765
766	em_release_hw_control(adapter);
767	free(adapter->mta, M_DEVBUF);
768
769	return (0);
770}
771
772/*********************************************************************
773 *
774 *  Shutdown entry point
775 *
776 **********************************************************************/
777
778static int
779em_shutdown(device_t dev)
780{
781	return em_suspend(dev);
782}
783
784/*
785 * Suspend/resume device methods.
786 */
787static int
788em_suspend(device_t dev)
789{
790	struct adapter *adapter = device_get_softc(dev);
791
792	EM_CORE_LOCK(adapter);
793
794        em_release_manageability(adapter);
795	em_release_hw_control(adapter);
796	em_enable_wakeup(dev);
797
798	EM_CORE_UNLOCK(adapter);
799
800	return bus_generic_suspend(dev);
801}
802
803static int
804em_resume(device_t dev)
805{
806	struct adapter *adapter = device_get_softc(dev);
807	struct ifnet *ifp = adapter->ifp;
808
809	EM_CORE_LOCK(adapter);
810	em_init_locked(adapter);
811	em_init_manageability(adapter);
812	EM_CORE_UNLOCK(adapter);
813	em_start(ifp);
814
815	return bus_generic_resume(dev);
816}
817
818
819/*********************************************************************
820 *  Transmit entry point
821 *
822 *  em_start is called by the stack to initiate a transmit.
823 *  The driver will remain in this routine as long as there are
824 *  packets to transmit and transmit resources are available.
825 *  In case resources are not available stack is notified and
826 *  the packet is requeued.
827 **********************************************************************/
828
829#ifdef EM_MULTIQUEUE
830static int
831em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
832{
833	struct adapter  *adapter = txr->adapter;
834        struct mbuf     *next;
835        int             err = 0, enq = 0;
836
837	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
838	    IFF_DRV_RUNNING || adapter->link_active == 0) {
839		if (m != NULL)
840			err = drbr_enqueue(ifp, txr->br, m);
841		return (err);
842	}
843
844        /* Call cleanup if number of TX descriptors low */
845	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
846		em_txeof(txr);
847
848	enq = 0;
849	if (m == NULL) {
850		next = drbr_dequeue(ifp, txr->br);
851	} else if (drbr_needs_enqueue(ifp, txr->br)) {
852		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
853			return (err);
854		next = drbr_dequeue(ifp, txr->br);
855	} else
856		next = m;
857
858	/* Process the queue */
859	while (next != NULL) {
860		if ((err = em_xmit(txr, &next)) != 0) {
861                        if (next != NULL)
862                                err = drbr_enqueue(ifp, txr->br, next);
863                        break;
864		}
865		enq++;
866		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
867		ETHER_BPF_MTAP(ifp, next);
868		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
869                        break;
870		if (txr->tx_avail < EM_MAX_SCATTER) {
871			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
872			break;
873		}
874		next = drbr_dequeue(ifp, txr->br);
875	}
876
877	if (enq > 0) {
878                /* Set the watchdog */
879                txr->queue_status = EM_QUEUE_WORKING;
880		txr->watchdog_time = ticks;
881	}
882	return (err);
883}
884
885/*
886** Multiqueue capable stack interface
887*/
888static int
889em_mq_start(struct ifnet *ifp, struct mbuf *m)
890{
891	struct adapter	*adapter = ifp->if_softc;
892	struct tx_ring	*txr = adapter->tx_rings;
893	int 		error;
894
895	if (EM_TX_TRYLOCK(txr)) {
896		error = em_mq_start_locked(ifp, txr, m);
897		EM_TX_UNLOCK(txr);
898	} else
899		error = drbr_enqueue(ifp, txr->br, m);
900
901	return (error);
902}
903
904/*
905** Flush all ring buffers
906*/
907static void
908em_qflush(struct ifnet *ifp)
909{
910	struct adapter  *adapter = ifp->if_softc;
911	struct tx_ring  *txr = adapter->tx_rings;
912	struct mbuf     *m;
913
914	for (int i = 0; i < adapter->num_queues; i++, txr++) {
915		EM_TX_LOCK(txr);
916		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
917			m_freem(m);
918		EM_TX_UNLOCK(txr);
919	}
920	if_qflush(ifp);
921}
922
923#endif /* EM_MULTIQUEUE */
924
925static void
926em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
927{
928	struct adapter	*adapter = ifp->if_softc;
929	struct mbuf	*m_head;
930
931	EM_TX_LOCK_ASSERT(txr);
932
933	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
934	    IFF_DRV_RUNNING)
935		return;
936
937	if (!adapter->link_active)
938		return;
939
940	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
941        	/* Call cleanup if number of TX descriptors low */
942		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
943			em_txeof(txr);
944		if (txr->tx_avail < EM_MAX_SCATTER) {
945			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
946			break;
947		}
948                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
949		if (m_head == NULL)
950			break;
951		/*
952		 *  Encapsulation can modify our pointer, and or make it
953		 *  NULL on failure.  In that event, we can't requeue.
954		 */
955		if (em_xmit(txr, &m_head)) {
956			if (m_head == NULL)
957				break;
958			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
960			break;
961		}
962
963		/* Send a copy of the frame to the BPF listener */
964		ETHER_BPF_MTAP(ifp, m_head);
965
966		/* Set timeout in case hardware has problems transmitting. */
967		txr->watchdog_time = ticks;
968                txr->queue_status = EM_QUEUE_WORKING;
969	}
970
971	return;
972}
973
974static void
975em_start(struct ifnet *ifp)
976{
977	struct adapter	*adapter = ifp->if_softc;
978	struct tx_ring	*txr = adapter->tx_rings;
979
980	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
981		EM_TX_LOCK(txr);
982		em_start_locked(ifp, txr);
983		EM_TX_UNLOCK(txr);
984	}
985	return;
986}
987
988/*********************************************************************
989 *  Ioctl entry point
990 *
991 *  em_ioctl is called when the user wants to configure the
992 *  interface.
993 *
994 *  return 0 on success, positive on failure
995 **********************************************************************/
996
997static int
998em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
999{
1000	struct adapter	*adapter = ifp->if_softc;
1001	struct ifreq *ifr = (struct ifreq *)data;
1002#ifdef INET
1003	struct ifaddr *ifa = (struct ifaddr *)data;
1004#endif
1005	int error = 0;
1006
1007	if (adapter->in_detach)
1008		return (error);
1009
1010	switch (command) {
1011	case SIOCSIFADDR:
1012#ifdef INET
1013		if (ifa->ifa_addr->sa_family == AF_INET) {
1014			/*
1015			 * XXX
1016			 * Since resetting hardware takes a very long time
1017			 * and results in link renegotiation we only
1018			 * initialize the hardware only when it is absolutely
1019			 * required.
1020			 */
1021			ifp->if_flags |= IFF_UP;
1022			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1023				EM_CORE_LOCK(adapter);
1024				em_init_locked(adapter);
1025				EM_CORE_UNLOCK(adapter);
1026			}
1027			arp_ifinit(ifp, ifa);
1028		} else
1029#endif
1030			error = ether_ioctl(ifp, command, data);
1031		break;
1032	case SIOCSIFMTU:
1033	    {
1034		int max_frame_size;
1035
1036		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1037
1038		EM_CORE_LOCK(adapter);
1039		switch (adapter->hw.mac.type) {
1040		case e1000_82571:
1041		case e1000_82572:
1042		case e1000_ich9lan:
1043		case e1000_ich10lan:
1044		case e1000_pch2lan:
1045		case e1000_82574:
1046		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1047			max_frame_size = 9234;
1048			break;
1049		case e1000_pchlan:
1050			max_frame_size = 4096;
1051			break;
1052			/* Adapters that do not support jumbo frames */
1053		case e1000_82583:
1054		case e1000_ich8lan:
1055			max_frame_size = ETHER_MAX_LEN;
1056			break;
1057		default:
1058			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1059		}
1060		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1061		    ETHER_CRC_LEN) {
1062			EM_CORE_UNLOCK(adapter);
1063			error = EINVAL;
1064			break;
1065		}
1066
1067		ifp->if_mtu = ifr->ifr_mtu;
1068		adapter->max_frame_size =
1069		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1070		em_init_locked(adapter);
1071		EM_CORE_UNLOCK(adapter);
1072		break;
1073	    }
1074	case SIOCSIFFLAGS:
1075		IOCTL_DEBUGOUT("ioctl rcv'd:\
1076		    SIOCSIFFLAGS (Set Interface Flags)");
1077		EM_CORE_LOCK(adapter);
1078		if (ifp->if_flags & IFF_UP) {
1079			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1080				if ((ifp->if_flags ^ adapter->if_flags) &
1081				    (IFF_PROMISC | IFF_ALLMULTI)) {
1082					em_disable_promisc(adapter);
1083					em_set_promisc(adapter);
1084				}
1085			} else
1086				em_init_locked(adapter);
1087		} else
1088			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1089				em_stop(adapter);
1090		adapter->if_flags = ifp->if_flags;
1091		EM_CORE_UNLOCK(adapter);
1092		break;
1093	case SIOCADDMULTI:
1094	case SIOCDELMULTI:
1095		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1096		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1097			EM_CORE_LOCK(adapter);
1098			em_disable_intr(adapter);
1099			em_set_multi(adapter);
1100#ifdef DEVICE_POLLING
1101			if (!(ifp->if_capenable & IFCAP_POLLING))
1102#endif
1103				em_enable_intr(adapter);
1104			EM_CORE_UNLOCK(adapter);
1105		}
1106		break;
1107	case SIOCSIFMEDIA:
1108		/*
1109		** As the speed/duplex settings are being
1110		** changed, we need to reset the PHY.
1111		*/
1112		adapter->hw.phy.reset_disable = FALSE;
1113		/* Check SOL/IDER usage */
1114		EM_CORE_LOCK(adapter);
1115		if (e1000_check_reset_block(&adapter->hw)) {
1116			EM_CORE_UNLOCK(adapter);
1117			device_printf(adapter->dev, "Media change is"
1118			    " blocked due to SOL/IDER session.\n");
1119			break;
1120		}
1121		EM_CORE_UNLOCK(adapter);
1122		/* falls thru */
1123	case SIOCGIFMEDIA:
1124		IOCTL_DEBUGOUT("ioctl rcv'd: \
1125		    SIOCxIFMEDIA (Get/Set Interface Media)");
1126		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1127		break;
1128	case SIOCSIFCAP:
1129	    {
1130		int mask, reinit;
1131
1132		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1133		reinit = 0;
1134		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1135#ifdef DEVICE_POLLING
1136		if (mask & IFCAP_POLLING) {
1137			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1138				error = ether_poll_register(em_poll, ifp);
1139				if (error)
1140					return (error);
1141				EM_CORE_LOCK(adapter);
1142				em_disable_intr(adapter);
1143				ifp->if_capenable |= IFCAP_POLLING;
1144				EM_CORE_UNLOCK(adapter);
1145			} else {
1146				error = ether_poll_deregister(ifp);
1147				/* Enable interrupt even in error case */
1148				EM_CORE_LOCK(adapter);
1149				em_enable_intr(adapter);
1150				ifp->if_capenable &= ~IFCAP_POLLING;
1151				EM_CORE_UNLOCK(adapter);
1152			}
1153		}
1154#endif
1155		if (mask & IFCAP_HWCSUM) {
1156			ifp->if_capenable ^= IFCAP_HWCSUM;
1157			reinit = 1;
1158		}
1159		if (mask & IFCAP_TSO4) {
1160			ifp->if_capenable ^= IFCAP_TSO4;
1161			reinit = 1;
1162		}
1163		if (mask & IFCAP_VLAN_HWTAGGING) {
1164			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1165			reinit = 1;
1166		}
1167		if (mask & IFCAP_VLAN_HWFILTER) {
1168			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1169			reinit = 1;
1170		}
1171		if ((mask & IFCAP_WOL) &&
1172		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1173			if (mask & IFCAP_WOL_MCAST)
1174				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1175			if (mask & IFCAP_WOL_MAGIC)
1176				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1177		}
1178		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1179			em_init(adapter);
1180		VLAN_CAPABILITIES(ifp);
1181		break;
1182	    }
1183
1184	default:
1185		error = ether_ioctl(ifp, command, data);
1186		break;
1187	}
1188
1189	return (error);
1190}
1191
1192
1193/*********************************************************************
1194 *  Init entry point
1195 *
1196 *  This routine is used in two ways. It is used by the stack as
1197 *  init entry point in network interface structure. It is also used
1198 *  by the driver as a hw/sw initialization routine to get to a
1199 *  consistent state.
1200 *
1201 *  return 0 on success, positive on failure
1202 **********************************************************************/
1203
1204static void
1205em_init_locked(struct adapter *adapter)
1206{
1207	struct ifnet	*ifp = adapter->ifp;
1208	device_t	dev = adapter->dev;
1209	u32		pba;
1210
1211	INIT_DEBUGOUT("em_init: begin");
1212
1213	EM_CORE_LOCK_ASSERT(adapter);
1214
1215	em_disable_intr(adapter);
1216	callout_stop(&adapter->timer);
1217
1218	/*
1219	 * Packet Buffer Allocation (PBA)
1220	 * Writing PBA sets the receive portion of the buffer
1221	 * the remainder is used for the transmit buffer.
1222	 */
1223	switch (adapter->hw.mac.type) {
1224	/* Total Packet Buffer on these is 48K */
1225	case e1000_82571:
1226	case e1000_82572:
1227	case e1000_80003es2lan:
1228			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1229		break;
1230	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1231			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1232		break;
1233	case e1000_82574:
1234	case e1000_82583:
1235			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1236		break;
1237	case e1000_ich8lan:
1238		pba = E1000_PBA_8K;
1239		break;
1240	case e1000_ich9lan:
1241	case e1000_ich10lan:
1242		pba = E1000_PBA_10K;
1243		break;
1244	case e1000_pchlan:
1245	case e1000_pch2lan:
1246		pba = E1000_PBA_26K;
1247		break;
1248	default:
1249		if (adapter->max_frame_size > 8192)
1250			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1251		else
1252			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1253	}
1254
1255	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1256	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1257
1258	/* Get the latest mac address, User can use a LAA */
1259        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1260              ETHER_ADDR_LEN);
1261
1262	/* Put the address into the Receive Address Array */
1263	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1264
1265	/*
1266	 * With the 82571 adapter, RAR[0] may be overwritten
1267	 * when the other port is reset, we make a duplicate
1268	 * in RAR[14] for that eventuality, this assures
1269	 * the interface continues to function.
1270	 */
1271	if (adapter->hw.mac.type == e1000_82571) {
1272		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1273		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1274		    E1000_RAR_ENTRIES - 1);
1275	}
1276
1277	/* Initialize the hardware */
1278	em_reset(adapter);
1279	em_update_link_status(adapter);
1280
1281	/* Setup VLAN support, basic and offload if available */
1282	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1283
1284	/* Set hardware offload abilities */
1285	ifp->if_hwassist = 0;
1286	if (ifp->if_capenable & IFCAP_TXCSUM)
1287		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1288	if (ifp->if_capenable & IFCAP_TSO4)
1289		ifp->if_hwassist |= CSUM_TSO;
1290
1291	/* Configure for OS presence */
1292	em_init_manageability(adapter);
1293
1294	/* Prepare transmit descriptors and buffers */
1295	em_setup_transmit_structures(adapter);
1296	em_initialize_transmit_unit(adapter);
1297
1298	/* Setup Multicast table */
1299	em_set_multi(adapter);
1300
1301	/*
1302	** Figure out the desired mbuf
1303	** pool for doing jumbos
1304	*/
1305	if (adapter->max_frame_size <= 2048)
1306		adapter->rx_mbuf_sz = MCLBYTES;
1307	else if (adapter->max_frame_size <= 4096)
1308		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1309	else
1310		adapter->rx_mbuf_sz = MJUM9BYTES;
1311
1312	/* Prepare receive descriptors and buffers */
1313	if (em_setup_receive_structures(adapter)) {
1314		device_printf(dev, "Could not setup receive structures\n");
1315		em_stop(adapter);
1316		return;
1317	}
1318	em_initialize_receive_unit(adapter);
1319
1320	/* Use real VLAN Filter support? */
1321	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1322		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1323			/* Use real VLAN Filter support */
1324			em_setup_vlan_hw_support(adapter);
1325		else {
1326			u32 ctrl;
1327			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1328			ctrl |= E1000_CTRL_VME;
1329			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1330		}
1331	}
1332
1333	/* Don't lose promiscuous settings */
1334	em_set_promisc(adapter);
1335
1336	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1340	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342	/* MSI/X configuration for 82574 */
1343	if (adapter->hw.mac.type == e1000_82574) {
1344		int tmp;
1345		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1346		tmp |= E1000_CTRL_EXT_PBA_CLR;
1347		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1348		/* Set the IVAR - interrupt vector routing. */
1349		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1350	}
1351
1352#ifdef DEVICE_POLLING
1353	/*
1354	 * Only enable interrupts if we are not polling, make sure
1355	 * they are off otherwise.
1356	 */
1357	if (ifp->if_capenable & IFCAP_POLLING)
1358		em_disable_intr(adapter);
1359	else
1360#endif /* DEVICE_POLLING */
1361		em_enable_intr(adapter);
1362
1363	/* AMT based hardware can now take control from firmware */
1364	if (adapter->has_manage && adapter->has_amt)
1365		em_get_hw_control(adapter);
1366
1367	/* Don't reset the phy next time init gets called */
1368	adapter->hw.phy.reset_disable = TRUE;
1369}
1370
1371static void
1372em_init(void *arg)
1373{
1374	struct adapter *adapter = arg;
1375
1376	EM_CORE_LOCK(adapter);
1377	em_init_locked(adapter);
1378	EM_CORE_UNLOCK(adapter);
1379}
1380
1381
1382#ifdef DEVICE_POLLING
1383/*********************************************************************
1384 *
1385 *  Legacy polling routine: note this only works with single queue
1386 *
1387 *********************************************************************/
1388static int
1389em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1390{
1391	struct adapter *adapter = ifp->if_softc;
1392	struct tx_ring	*txr = adapter->tx_rings;
1393	struct rx_ring	*rxr = adapter->rx_rings;
1394	u32		reg_icr;
1395	int		rx_done;
1396
1397	EM_CORE_LOCK(adapter);
1398	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1399		EM_CORE_UNLOCK(adapter);
1400		return (0);
1401	}
1402
1403	if (cmd == POLL_AND_CHECK_STATUS) {
1404		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1405		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1406			callout_stop(&adapter->timer);
1407			adapter->hw.mac.get_link_status = 1;
1408			em_update_link_status(adapter);
1409			callout_reset(&adapter->timer, hz,
1410			    em_local_timer, adapter);
1411		}
1412	}
1413	EM_CORE_UNLOCK(adapter);
1414
1415	em_rxeof(rxr, count, &rx_done);
1416
1417	EM_TX_LOCK(txr);
1418	em_txeof(txr);
1419#ifdef EM_MULTIQUEUE
1420	if (!drbr_empty(ifp, txr->br))
1421		em_mq_start_locked(ifp, txr, NULL);
1422#else
1423	em_start_locked(ifp, txr);
1424#endif
1425	EM_TX_UNLOCK(txr);
1426
1427	return (rx_done);
1428}
1429#endif /* DEVICE_POLLING */
1430
1431
1432/*********************************************************************
1433 *
1434 *  Fast Legacy/MSI Combined Interrupt Service routine
1435 *
1436 *********************************************************************/
1437static int
1438em_irq_fast(void *arg)
1439{
1440	struct adapter	*adapter = arg;
1441	struct ifnet	*ifp;
1442	u32		reg_icr;
1443
1444	ifp = adapter->ifp;
1445
1446	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1447
1448	/* Hot eject?  */
1449	if (reg_icr == 0xffffffff)
1450		return FILTER_STRAY;
1451
1452	/* Definitely not our interrupt.  */
1453	if (reg_icr == 0x0)
1454		return FILTER_STRAY;
1455
1456	/*
1457	 * Starting with the 82571 chip, bit 31 should be used to
1458	 * determine whether the interrupt belongs to us.
1459	 */
1460	if (adapter->hw.mac.type >= e1000_82571 &&
1461	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1462		return FILTER_STRAY;
1463
1464	em_disable_intr(adapter);
1465	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466
1467	/* Link status change */
1468	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1469		adapter->hw.mac.get_link_status = 1;
1470		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1471	}
1472
1473	if (reg_icr & E1000_ICR_RXO)
1474		adapter->rx_overruns++;
1475	return FILTER_HANDLED;
1476}
1477
1478/* Combined RX/TX handler, used by Legacy and MSI */
1479static void
1480em_handle_que(void *context, int pending)
1481{
1482	struct adapter	*adapter = context;
1483	struct ifnet	*ifp = adapter->ifp;
1484	struct tx_ring	*txr = adapter->tx_rings;
1485	struct rx_ring	*rxr = adapter->rx_rings;
1486
1487
1488	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1489		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1490		EM_TX_LOCK(txr);
1491		em_txeof(txr);
1492#ifdef EM_MULTIQUEUE
1493		if (!drbr_empty(ifp, txr->br))
1494			em_mq_start_locked(ifp, txr, NULL);
1495#else
1496		em_start_locked(ifp, txr);
1497#endif
1498		EM_TX_UNLOCK(txr);
1499		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1500			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1501			return;
1502		}
1503	}
1504
1505	em_enable_intr(adapter);
1506	return;
1507}
1508
1509
1510/*********************************************************************
1511 *
1512 *  MSIX Interrupt Service Routines
1513 *
1514 **********************************************************************/
1515static void
1516em_msix_tx(void *arg)
1517{
1518	struct tx_ring *txr = arg;
1519	struct adapter *adapter = txr->adapter;
1520	bool		more;
1521
1522	++txr->tx_irq;
1523	EM_TX_LOCK(txr);
1524	more = em_txeof(txr);
1525	EM_TX_UNLOCK(txr);
1526	if (more)
1527		taskqueue_enqueue(txr->tq, &txr->tx_task);
1528	else
1529		/* Reenable this interrupt */
1530		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1531	return;
1532}
1533
1534/*********************************************************************
1535 *
1536 *  MSIX RX Interrupt Service routine
1537 *
1538 **********************************************************************/
1539
1540static void
1541em_msix_rx(void *arg)
1542{
1543	struct rx_ring	*rxr = arg;
1544	struct adapter	*adapter = rxr->adapter;
1545	bool		more;
1546
1547	++rxr->rx_irq;
1548	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1549	if (more)
1550		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1551	else
1552		/* Reenable this interrupt */
1553		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1554	return;
1555}
1556
1557/*********************************************************************
1558 *
1559 *  MSIX Link Fast Interrupt Service routine
1560 *
1561 **********************************************************************/
1562static void
1563em_msix_link(void *arg)
1564{
1565	struct adapter	*adapter = arg;
1566	u32		reg_icr;
1567
1568	++adapter->link_irq;
1569	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1570
1571	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1572		adapter->hw.mac.get_link_status = 1;
1573		em_handle_link(adapter, 0);
1574	} else
1575		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1576		    EM_MSIX_LINK | E1000_IMS_LSC);
1577	return;
1578}
1579
1580static void
1581em_handle_rx(void *context, int pending)
1582{
1583	struct rx_ring	*rxr = context;
1584	struct adapter	*adapter = rxr->adapter;
1585        bool            more;
1586
1587	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1588	if (more)
1589		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1590	else
1591		/* Reenable this interrupt */
1592		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1593}
1594
1595static void
1596em_handle_tx(void *context, int pending)
1597{
1598	struct tx_ring	*txr = context;
1599	struct adapter	*adapter = txr->adapter;
1600	struct ifnet	*ifp = adapter->ifp;
1601
1602	EM_TX_LOCK(txr);
1603	em_txeof(txr);
1604#ifdef EM_MULTIQUEUE
1605	if (!drbr_empty(ifp, txr->br))
1606		em_mq_start_locked(ifp, txr, NULL);
1607#else
1608	em_start_locked(ifp, txr);
1609#endif
1610	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1611	EM_TX_UNLOCK(txr);
1612}
1613
1614static void
1615em_handle_link(void *context, int pending)
1616{
1617	struct adapter	*adapter = context;
1618	struct ifnet *ifp = adapter->ifp;
1619
1620	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1621		return;
1622
1623	EM_CORE_LOCK(adapter);
1624	callout_stop(&adapter->timer);
1625	em_update_link_status(adapter);
1626	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1627	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628	    EM_MSIX_LINK | E1000_IMS_LSC);
1629	EM_CORE_UNLOCK(adapter);
1630}
1631
1632
1633/*********************************************************************
1634 *
1635 *  Media Ioctl callback
1636 *
1637 *  This routine is called whenever the user queries the status of
1638 *  the interface using ifconfig.
1639 *
1640 **********************************************************************/
1641static void
1642em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1643{
1644	struct adapter *adapter = ifp->if_softc;
1645	u_char fiber_type = IFM_1000_SX;
1646
1647	INIT_DEBUGOUT("em_media_status: begin");
1648
1649	EM_CORE_LOCK(adapter);
1650	em_update_link_status(adapter);
1651
1652	ifmr->ifm_status = IFM_AVALID;
1653	ifmr->ifm_active = IFM_ETHER;
1654
1655	if (!adapter->link_active) {
1656		EM_CORE_UNLOCK(adapter);
1657		return;
1658	}
1659
1660	ifmr->ifm_status |= IFM_ACTIVE;
1661
1662	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1663	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1664		ifmr->ifm_active |= fiber_type | IFM_FDX;
1665	} else {
1666		switch (adapter->link_speed) {
1667		case 10:
1668			ifmr->ifm_active |= IFM_10_T;
1669			break;
1670		case 100:
1671			ifmr->ifm_active |= IFM_100_TX;
1672			break;
1673		case 1000:
1674			ifmr->ifm_active |= IFM_1000_T;
1675			break;
1676		}
1677		if (adapter->link_duplex == FULL_DUPLEX)
1678			ifmr->ifm_active |= IFM_FDX;
1679		else
1680			ifmr->ifm_active |= IFM_HDX;
1681	}
1682	EM_CORE_UNLOCK(adapter);
1683}
1684
1685/*********************************************************************
1686 *
1687 *  Media Ioctl callback
1688 *
1689 *  This routine is called when the user changes speed/duplex using
1690 *  media/mediopt option with ifconfig.
1691 *
1692 **********************************************************************/
1693static int
1694em_media_change(struct ifnet *ifp)
1695{
1696	struct adapter *adapter = ifp->if_softc;
1697	struct ifmedia  *ifm = &adapter->media;
1698
1699	INIT_DEBUGOUT("em_media_change: begin");
1700
1701	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702		return (EINVAL);
1703
1704	EM_CORE_LOCK(adapter);
1705	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1706	case IFM_AUTO:
1707		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1708		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1709		break;
1710	case IFM_1000_LX:
1711	case IFM_1000_SX:
1712	case IFM_1000_T:
1713		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1714		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1715		break;
1716	case IFM_100_TX:
1717		adapter->hw.mac.autoneg = FALSE;
1718		adapter->hw.phy.autoneg_advertised = 0;
1719		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1720			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1721		else
1722			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1723		break;
1724	case IFM_10_T:
1725		adapter->hw.mac.autoneg = FALSE;
1726		adapter->hw.phy.autoneg_advertised = 0;
1727		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1728			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1729		else
1730			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1731		break;
1732	default:
1733		device_printf(adapter->dev, "Unsupported media type\n");
1734	}
1735
1736	em_init_locked(adapter);
1737	EM_CORE_UNLOCK(adapter);
1738
1739	return (0);
1740}
1741
1742/*********************************************************************
1743 *
1744 *  This routine maps the mbufs to tx descriptors.
1745 *
1746 *  return 0 on success, positive on failure
1747 **********************************************************************/
1748
1749static int
1750em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1751{
1752	struct adapter		*adapter = txr->adapter;
1753	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1754	bus_dmamap_t		map;
1755	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1756	struct e1000_tx_desc	*ctxd = NULL;
1757	struct mbuf		*m_head;
1758	struct ether_header	*eh;
1759	struct ip		*ip = NULL;
1760	struct tcphdr		*tp = NULL;
1761	u32			txd_upper, txd_lower, txd_used, txd_saved;
1762	int			ip_off, poff;
1763	int			nsegs, i, j, first, last = 0;
1764	int			error, do_tso, tso_desc = 0, remap = 1;
1765
1766retry:
1767	m_head = *m_headp;
1768	txd_upper = txd_lower = txd_used = txd_saved = 0;
1769	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1770	ip_off = poff = 0;
1771
1772	/*
1773	 * Intel recommends entire IP/TCP header length reside in a single
1774	 * buffer. If multiple descriptors are used to describe the IP and
1775	 * TCP header, each descriptor should describe one or more
1776	 * complete headers; descriptors referencing only parts of headers
1777	 * are not supported. If all layer headers are not coalesced into
1778	 * a single buffer, each buffer should not cross a 4KB boundary,
1779	 * or be larger than the maximum read request size.
1780	 * Controller also requires modifing IP/TCP header to make TSO work
1781	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1782	 * IP/TCP header into a single buffer to meet the requirement of
1783	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1784	 * which also has similiar restrictions.
1785	 */
1786	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1787		if (do_tso || (m_head->m_next != NULL &&
1788		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1789			if (M_WRITABLE(*m_headp) == 0) {
1790				m_head = m_dup(*m_headp, M_DONTWAIT);
1791				m_freem(*m_headp);
1792				if (m_head == NULL) {
1793					*m_headp = NULL;
1794					return (ENOBUFS);
1795				}
1796				*m_headp = m_head;
1797			}
1798		}
1799		/*
1800		 * XXX
1801		 * Assume IPv4, we don't have TSO/checksum offload support
1802		 * for IPv6 yet.
1803		 */
1804		ip_off = sizeof(struct ether_header);
1805		m_head = m_pullup(m_head, ip_off);
1806		if (m_head == NULL) {
1807			*m_headp = NULL;
1808			return (ENOBUFS);
1809		}
1810		eh = mtod(m_head, struct ether_header *);
1811		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1812			ip_off = sizeof(struct ether_vlan_header);
1813			m_head = m_pullup(m_head, ip_off);
1814			if (m_head == NULL) {
1815				*m_headp = NULL;
1816				return (ENOBUFS);
1817			}
1818		}
1819		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1820		if (m_head == NULL) {
1821			*m_headp = NULL;
1822			return (ENOBUFS);
1823		}
1824		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1825		poff = ip_off + (ip->ip_hl << 2);
1826		if (do_tso) {
1827			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1828			if (m_head == NULL) {
1829				*m_headp = NULL;
1830				return (ENOBUFS);
1831			}
1832			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1833			/*
1834			 * TSO workaround:
1835			 *   pull 4 more bytes of data into it.
1836			 */
1837			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1838			if (m_head == NULL) {
1839				*m_headp = NULL;
1840				return (ENOBUFS);
1841			}
1842			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1843			ip->ip_len = 0;
1844			ip->ip_sum = 0;
1845			/*
1846			 * The pseudo TCP checksum does not include TCP payload
1847			 * length so driver should recompute the checksum here
1848			 * what hardware expect to see. This is adherence of
1849			 * Microsoft's Large Send specification.
1850			 */
1851			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1852			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1853			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1854		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1855			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1856			if (m_head == NULL) {
1857				*m_headp = NULL;
1858				return (ENOBUFS);
1859			}
1860			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1861			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1862			if (m_head == NULL) {
1863				*m_headp = NULL;
1864				return (ENOBUFS);
1865			}
1866			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1868		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1869			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1870			if (m_head == NULL) {
1871				*m_headp = NULL;
1872				return (ENOBUFS);
1873			}
1874			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1875		}
1876		*m_headp = m_head;
1877	}
1878
1879	/*
1880	 * Map the packet for DMA
1881	 *
1882	 * Capture the first descriptor index,
1883	 * this descriptor will have the index
1884	 * of the EOP which is the only one that
1885	 * now gets a DONE bit writeback.
1886	 */
1887	first = txr->next_avail_desc;
1888	tx_buffer = &txr->tx_buffers[first];
1889	tx_buffer_mapped = tx_buffer;
1890	map = tx_buffer->map;
1891
1892	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1893	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1894
1895	/*
1896	 * There are two types of errors we can (try) to handle:
1897	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1898	 *   out of segments.  Defragment the mbuf chain and try again.
1899	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1900	 *   at this point in time.  Defer sending and try again later.
1901	 * All other errors, in particular EINVAL, are fatal and prevent the
1902	 * mbuf chain from ever going through.  Drop it and report error.
1903	 */
1904	if (error == EFBIG && remap) {
1905		struct mbuf *m;
1906
1907		m = m_defrag(*m_headp, M_DONTWAIT);
1908		if (m == NULL) {
1909			adapter->mbuf_alloc_failed++;
1910			m_freem(*m_headp);
1911			*m_headp = NULL;
1912			return (ENOBUFS);
1913		}
1914		*m_headp = m;
1915
1916		/* Try it again, but only once */
1917		remap = 0;
1918		goto retry;
1919	} else if (error == ENOMEM) {
1920		adapter->no_tx_dma_setup++;
1921		return (error);
1922	} else if (error != 0) {
1923		adapter->no_tx_dma_setup++;
1924		m_freem(*m_headp);
1925		*m_headp = NULL;
1926		return (error);
1927	}
1928
1929	/*
1930	 * TSO Hardware workaround, if this packet is not
1931	 * TSO, and is only a single descriptor long, and
1932	 * it follows a TSO burst, then we need to add a
1933	 * sentinel descriptor to prevent premature writeback.
1934	 */
1935	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1936		if (nsegs == 1)
1937			tso_desc = TRUE;
1938		txr->tx_tso = FALSE;
1939	}
1940
1941        if (nsegs > (txr->tx_avail - 2)) {
1942                txr->no_desc_avail++;
1943		bus_dmamap_unload(txr->txtag, map);
1944		return (ENOBUFS);
1945        }
1946	m_head = *m_headp;
1947
1948	/* Do hardware assists */
1949	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1950		em_tso_setup(txr, m_head, ip_off, ip, tp,
1951		    &txd_upper, &txd_lower);
1952		/* we need to make a final sentinel transmit desc */
1953		tso_desc = TRUE;
1954	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1955		em_transmit_checksum_setup(txr, m_head,
1956		    ip_off, ip, &txd_upper, &txd_lower);
1957
1958	i = txr->next_avail_desc;
1959
1960	/* Set up our transmit descriptors */
1961	for (j = 0; j < nsegs; j++) {
1962		bus_size_t seg_len;
1963		bus_addr_t seg_addr;
1964
1965		tx_buffer = &txr->tx_buffers[i];
1966		ctxd = &txr->tx_base[i];
1967		seg_addr = segs[j].ds_addr;
1968		seg_len  = segs[j].ds_len;
1969		/*
1970		** TSO Workaround:
1971		** If this is the last descriptor, we want to
1972		** split it so we have a small final sentinel
1973		*/
1974		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1975			seg_len -= 4;
1976			ctxd->buffer_addr = htole64(seg_addr);
1977			ctxd->lower.data = htole32(
1978			adapter->txd_cmd | txd_lower | seg_len);
1979			ctxd->upper.data =
1980			    htole32(txd_upper);
1981			if (++i == adapter->num_tx_desc)
1982				i = 0;
1983			/* Now make the sentinel */
1984			++txd_used; /* using an extra txd */
1985			ctxd = &txr->tx_base[i];
1986			tx_buffer = &txr->tx_buffers[i];
1987			ctxd->buffer_addr =
1988			    htole64(seg_addr + seg_len);
1989			ctxd->lower.data = htole32(
1990			adapter->txd_cmd | txd_lower | 4);
1991			ctxd->upper.data =
1992			    htole32(txd_upper);
1993			last = i;
1994			if (++i == adapter->num_tx_desc)
1995				i = 0;
1996		} else {
1997			ctxd->buffer_addr = htole64(seg_addr);
1998			ctxd->lower.data = htole32(
1999			adapter->txd_cmd | txd_lower | seg_len);
2000			ctxd->upper.data =
2001			    htole32(txd_upper);
2002			last = i;
2003			if (++i == adapter->num_tx_desc)
2004				i = 0;
2005		}
2006		tx_buffer->m_head = NULL;
2007		tx_buffer->next_eop = -1;
2008	}
2009
2010	txr->next_avail_desc = i;
2011	txr->tx_avail -= nsegs;
2012	if (tso_desc) /* TSO used an extra for sentinel */
2013		txr->tx_avail -= txd_used;
2014
2015	if (m_head->m_flags & M_VLANTAG) {
2016		/* Set the vlan id. */
2017		ctxd->upper.fields.special =
2018		    htole16(m_head->m_pkthdr.ether_vtag);
2019                /* Tell hardware to add tag */
2020                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2021        }
2022
2023        tx_buffer->m_head = m_head;
2024	tx_buffer_mapped->map = tx_buffer->map;
2025	tx_buffer->map = map;
2026        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2027
2028        /*
2029         * Last Descriptor of Packet
2030	 * needs End Of Packet (EOP)
2031	 * and Report Status (RS)
2032         */
2033        ctxd->lower.data |=
2034	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2035	/*
2036	 * Keep track in the first buffer which
2037	 * descriptor will be written back
2038	 */
2039	tx_buffer = &txr->tx_buffers[first];
2040	tx_buffer->next_eop = last;
2041	/* Update the watchdog time early and often */
2042	txr->watchdog_time = ticks;
2043
2044	/*
2045	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2046	 * that this frame is available to transmit.
2047	 */
2048	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2049	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2050	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2051
2052	return (0);
2053}
2054
2055static void
2056em_set_promisc(struct adapter *adapter)
2057{
2058	struct ifnet	*ifp = adapter->ifp;
2059	u32		reg_rctl;
2060
2061	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2062
2063	if (ifp->if_flags & IFF_PROMISC) {
2064		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2065		/* Turn this on if you want to see bad packets */
2066		if (em_debug_sbp)
2067			reg_rctl |= E1000_RCTL_SBP;
2068		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2069	} else if (ifp->if_flags & IFF_ALLMULTI) {
2070		reg_rctl |= E1000_RCTL_MPE;
2071		reg_rctl &= ~E1000_RCTL_UPE;
2072		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2073	}
2074}
2075
2076static void
2077em_disable_promisc(struct adapter *adapter)
2078{
2079	u32	reg_rctl;
2080
2081	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2082
2083	reg_rctl &=  (~E1000_RCTL_UPE);
2084	reg_rctl &=  (~E1000_RCTL_MPE);
2085	reg_rctl &=  (~E1000_RCTL_SBP);
2086	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2087}
2088
2089
2090/*********************************************************************
2091 *  Multicast Update
2092 *
2093 *  This routine is called whenever multicast address list is updated.
2094 *
2095 **********************************************************************/
2096
2097static void
2098em_set_multi(struct adapter *adapter)
2099{
2100	struct ifnet	*ifp = adapter->ifp;
2101	struct ifmultiaddr *ifma;
2102	u32 reg_rctl = 0;
2103	u8  *mta; /* Multicast array memory */
2104	int mcnt = 0;
2105
2106	IOCTL_DEBUGOUT("em_set_multi: begin");
2107
2108	mta = adapter->mta;
2109	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2110
2111	if (adapter->hw.mac.type == e1000_82542 &&
2112	    adapter->hw.revision_id == E1000_REVISION_2) {
2113		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2114		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2115			e1000_pci_clear_mwi(&adapter->hw);
2116		reg_rctl |= E1000_RCTL_RST;
2117		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2118		msec_delay(5);
2119	}
2120
2121#if __FreeBSD_version < 800000
2122	IF_ADDR_LOCK(ifp);
2123#else
2124	if_maddr_rlock(ifp);
2125#endif
2126	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2127		if (ifma->ifma_addr->sa_family != AF_LINK)
2128			continue;
2129
2130		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2131			break;
2132
2133		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2134		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2135		mcnt++;
2136	}
2137#if __FreeBSD_version < 800000
2138	IF_ADDR_UNLOCK(ifp);
2139#else
2140	if_maddr_runlock(ifp);
2141#endif
2142	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2143		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2144		reg_rctl |= E1000_RCTL_MPE;
2145		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2146	} else
2147		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2148
2149	if (adapter->hw.mac.type == e1000_82542 &&
2150	    adapter->hw.revision_id == E1000_REVISION_2) {
2151		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2152		reg_rctl &= ~E1000_RCTL_RST;
2153		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2154		msec_delay(5);
2155		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2156			e1000_pci_set_mwi(&adapter->hw);
2157	}
2158}
2159
2160
2161/*********************************************************************
2162 *  Timer routine
2163 *
2164 *  This routine checks for link status and updates statistics.
2165 *
2166 **********************************************************************/
2167
2168static void
2169em_local_timer(void *arg)
2170{
2171	struct adapter	*adapter = arg;
2172	struct ifnet	*ifp = adapter->ifp;
2173	struct tx_ring	*txr = adapter->tx_rings;
2174	struct rx_ring	*rxr = adapter->rx_rings;
2175	u32		trigger;
2176
2177	EM_CORE_LOCK_ASSERT(adapter);
2178
2179	em_update_link_status(adapter);
2180	em_update_stats_counters(adapter);
2181
2182	/* Reset LAA into RAR[0] on 82571 */
2183	if ((adapter->hw.mac.type == e1000_82571) &&
2184	    e1000_get_laa_state_82571(&adapter->hw))
2185		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2186
2187	/* Mask to use in the irq trigger */
2188	if (adapter->msix_mem)
2189		trigger = rxr->ims; /* RX for 82574 */
2190	else
2191		trigger = E1000_ICS_RXDMT0;
2192
2193	/*
2194	** Don't do TX watchdog check if we've been paused
2195	*/
2196	if (adapter->pause_frames) {
2197		adapter->pause_frames = 0;
2198		goto out;
2199	}
2200	/*
2201	** Check on the state of the TX queue(s), this
2202	** can be done without the lock because its RO
2203	** and the HUNG state will be static if set.
2204	*/
2205	for (int i = 0; i < adapter->num_queues; i++, txr++)
2206		if (txr->queue_status == EM_QUEUE_HUNG)
2207			goto hung;
2208out:
2209	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2210#ifndef DEVICE_POLLING
2211	/* Trigger an RX interrupt to guarantee mbuf refresh */
2212	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2213#endif
2214	return;
2215hung:
2216	/* Looks like we're hung */
2217	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2218	device_printf(adapter->dev,
2219	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2220	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2221	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2222	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2223	    "Next TX to Clean = %d\n",
2224	    txr->me, txr->tx_avail, txr->next_to_clean);
2225	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2226	adapter->watchdog_events++;
2227	em_init_locked(adapter);
2228}
2229
2230
2231static void
2232em_update_link_status(struct adapter *adapter)
2233{
2234	struct e1000_hw *hw = &adapter->hw;
2235	struct ifnet *ifp = adapter->ifp;
2236	device_t dev = adapter->dev;
2237	struct tx_ring *txr = adapter->tx_rings;
2238	u32 link_check = 0;
2239
2240	/* Get the cached link value or read phy for real */
2241	switch (hw->phy.media_type) {
2242	case e1000_media_type_copper:
2243		if (hw->mac.get_link_status) {
2244			/* Do the work to read phy */
2245			e1000_check_for_link(hw);
2246			link_check = !hw->mac.get_link_status;
2247			if (link_check) /* ESB2 fix */
2248				e1000_cfg_on_link_up(hw);
2249		} else
2250			link_check = TRUE;
2251		break;
2252	case e1000_media_type_fiber:
2253		e1000_check_for_link(hw);
2254		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2255                                 E1000_STATUS_LU);
2256		break;
2257	case e1000_media_type_internal_serdes:
2258		e1000_check_for_link(hw);
2259		link_check = adapter->hw.mac.serdes_has_link;
2260		break;
2261	default:
2262	case e1000_media_type_unknown:
2263		break;
2264	}
2265
2266	/* Now check for a transition */
2267	if (link_check && (adapter->link_active == 0)) {
2268		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2269		    &adapter->link_duplex);
2270		/* Check if we must disable SPEED_MODE bit on PCI-E */
2271		if ((adapter->link_speed != SPEED_1000) &&
2272		    ((hw->mac.type == e1000_82571) ||
2273		    (hw->mac.type == e1000_82572))) {
2274			int tarc0;
2275			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2276			tarc0 &= ~SPEED_MODE_BIT;
2277			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2278		}
2279		if (bootverbose)
2280			device_printf(dev, "Link is up %d Mbps %s\n",
2281			    adapter->link_speed,
2282			    ((adapter->link_duplex == FULL_DUPLEX) ?
2283			    "Full Duplex" : "Half Duplex"));
2284		adapter->link_active = 1;
2285		adapter->smartspeed = 0;
2286		ifp->if_baudrate = adapter->link_speed * 1000000;
2287		if_link_state_change(ifp, LINK_STATE_UP);
2288	} else if (!link_check && (adapter->link_active == 1)) {
2289		ifp->if_baudrate = adapter->link_speed = 0;
2290		adapter->link_duplex = 0;
2291		if (bootverbose)
2292			device_printf(dev, "Link is Down\n");
2293		adapter->link_active = 0;
2294		/* Link down, disable watchdog */
2295		for (int i = 0; i < adapter->num_queues; i++, txr++)
2296			txr->queue_status = EM_QUEUE_IDLE;
2297		if_link_state_change(ifp, LINK_STATE_DOWN);
2298	}
2299}
2300
2301/*********************************************************************
2302 *
2303 *  This routine disables all traffic on the adapter by issuing a
2304 *  global reset on the MAC and deallocates TX/RX buffers.
2305 *
2306 *  This routine should always be called with BOTH the CORE
2307 *  and TX locks.
2308 **********************************************************************/
2309
2310static void
2311em_stop(void *arg)
2312{
2313	struct adapter	*adapter = arg;
2314	struct ifnet	*ifp = adapter->ifp;
2315	struct tx_ring	*txr = adapter->tx_rings;
2316
2317	EM_CORE_LOCK_ASSERT(adapter);
2318
2319	INIT_DEBUGOUT("em_stop: begin");
2320
2321	em_disable_intr(adapter);
2322	callout_stop(&adapter->timer);
2323
2324	/* Tell the stack that the interface is no longer active */
2325	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2326
2327        /* Unarm watchdog timer. */
2328	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2329		EM_TX_LOCK(txr);
2330		txr->queue_status = EM_QUEUE_IDLE;
2331		EM_TX_UNLOCK(txr);
2332	}
2333
2334	e1000_reset_hw(&adapter->hw);
2335	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2336
2337	e1000_led_off(&adapter->hw);
2338	e1000_cleanup_led(&adapter->hw);
2339}
2340
2341
2342/*********************************************************************
2343 *
2344 *  Determine hardware revision.
2345 *
2346 **********************************************************************/
2347static void
2348em_identify_hardware(struct adapter *adapter)
2349{
2350	device_t dev = adapter->dev;
2351
2352	/* Make sure our PCI config space has the necessary stuff set */
2353	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2354	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2355	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2356		device_printf(dev, "Memory Access and/or Bus Master bits "
2357		    "were not set!\n");
2358		adapter->hw.bus.pci_cmd_word |=
2359		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2360		pci_write_config(dev, PCIR_COMMAND,
2361		    adapter->hw.bus.pci_cmd_word, 2);
2362	}
2363
2364	/* Save off the information about this board */
2365	adapter->hw.vendor_id = pci_get_vendor(dev);
2366	adapter->hw.device_id = pci_get_device(dev);
2367	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2368	adapter->hw.subsystem_vendor_id =
2369	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2370	adapter->hw.subsystem_device_id =
2371	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2372
2373	/* Do Shared Code Init and Setup */
2374	if (e1000_set_mac_type(&adapter->hw)) {
2375		device_printf(dev, "Setup init failure\n");
2376		return;
2377	}
2378}
2379
2380static int
2381em_allocate_pci_resources(struct adapter *adapter)
2382{
2383	device_t	dev = adapter->dev;
2384	int		rid;
2385
2386	rid = PCIR_BAR(0);
2387	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2388	    &rid, RF_ACTIVE);
2389	if (adapter->memory == NULL) {
2390		device_printf(dev, "Unable to allocate bus resource: memory\n");
2391		return (ENXIO);
2392	}
2393	adapter->osdep.mem_bus_space_tag =
2394	    rman_get_bustag(adapter->memory);
2395	adapter->osdep.mem_bus_space_handle =
2396	    rman_get_bushandle(adapter->memory);
2397	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2398
2399	/* Default to a single queue */
2400	adapter->num_queues = 1;
2401
2402	/*
2403	 * Setup MSI/X or MSI if PCI Express
2404	 */
2405	adapter->msix = em_setup_msix(adapter);
2406
2407	adapter->hw.back = &adapter->osdep;
2408
2409	return (0);
2410}
2411
2412/*********************************************************************
2413 *
2414 *  Setup the Legacy or MSI Interrupt handler
2415 *
2416 **********************************************************************/
2417int
2418em_allocate_legacy(struct adapter *adapter)
2419{
2420	device_t dev = adapter->dev;
2421	int error, rid = 0;
2422
2423	/* Manually turn off all interrupts */
2424	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2425
2426	if (adapter->msix == 1) /* using MSI */
2427		rid = 1;
2428	/* We allocate a single interrupt resource */
2429	adapter->res = bus_alloc_resource_any(dev,
2430	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2431	if (adapter->res == NULL) {
2432		device_printf(dev, "Unable to allocate bus resource: "
2433		    "interrupt\n");
2434		return (ENXIO);
2435	}
2436
2437	/*
2438	 * Allocate a fast interrupt and the associated
2439	 * deferred processing contexts.
2440	 */
2441	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2442	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2443	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2444	    taskqueue_thread_enqueue, &adapter->tq);
2445	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2446	    device_get_nameunit(adapter->dev));
2447	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2448	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2449		device_printf(dev, "Failed to register fast interrupt "
2450			    "handler: %d\n", error);
2451		taskqueue_free(adapter->tq);
2452		adapter->tq = NULL;
2453		return (error);
2454	}
2455
2456	return (0);
2457}
2458
2459/*********************************************************************
2460 *
2461 *  Setup the MSIX Interrupt handlers
2462 *   This is not really Multiqueue, rather
2463 *   its just multiple interrupt vectors.
2464 *
2465 **********************************************************************/
2466int
2467em_allocate_msix(struct adapter *adapter)
2468{
2469	device_t	dev = adapter->dev;
2470	struct		tx_ring *txr = adapter->tx_rings;
2471	struct		rx_ring *rxr = adapter->rx_rings;
2472	int		error, rid, vector = 0;
2473
2474
2475	/* Make sure all interrupts are disabled */
2476	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2477
2478	/* First set up ring resources */
2479	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2480
2481		/* RX ring */
2482		rid = vector + 1;
2483
2484		rxr->res = bus_alloc_resource_any(dev,
2485		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2486		if (rxr->res == NULL) {
2487			device_printf(dev,
2488			    "Unable to allocate bus resource: "
2489			    "RX MSIX Interrupt %d\n", i);
2490			return (ENXIO);
2491		}
2492		if ((error = bus_setup_intr(dev, rxr->res,
2493		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2494		    rxr, &rxr->tag)) != 0) {
2495			device_printf(dev, "Failed to register RX handler");
2496			return (error);
2497		}
2498#if __FreeBSD_version >= 800504
2499		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2500#endif
2501		rxr->msix = vector++; /* NOTE increment vector for TX */
2502		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2503		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2504		    taskqueue_thread_enqueue, &rxr->tq);
2505		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2506		    device_get_nameunit(adapter->dev));
2507		/*
2508		** Set the bit to enable interrupt
2509		** in E1000_IMS -- bits 20 and 21
2510		** are for RX0 and RX1, note this has
2511		** NOTHING to do with the MSIX vector
2512		*/
2513		rxr->ims = 1 << (20 + i);
2514		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2515
2516		/* TX ring */
2517		rid = vector + 1;
2518		txr->res = bus_alloc_resource_any(dev,
2519		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2520		if (txr->res == NULL) {
2521			device_printf(dev,
2522			    "Unable to allocate bus resource: "
2523			    "TX MSIX Interrupt %d\n", i);
2524			return (ENXIO);
2525		}
2526		if ((error = bus_setup_intr(dev, txr->res,
2527		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2528		    txr, &txr->tag)) != 0) {
2529			device_printf(dev, "Failed to register TX handler");
2530			return (error);
2531		}
2532#if __FreeBSD_version >= 800504
2533		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2534#endif
2535		txr->msix = vector++; /* Increment vector for next pass */
2536		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2537		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2538		    taskqueue_thread_enqueue, &txr->tq);
2539		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2540		    device_get_nameunit(adapter->dev));
2541		/*
2542		** Set the bit to enable interrupt
2543		** in E1000_IMS -- bits 22 and 23
2544		** are for TX0 and TX1, note this has
2545		** NOTHING to do with the MSIX vector
2546		*/
2547		txr->ims = 1 << (22 + i);
2548		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2549	}
2550
2551	/* Link interrupt */
2552	++rid;
2553	adapter->res = bus_alloc_resource_any(dev,
2554	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2555	if (!adapter->res) {
2556		device_printf(dev,"Unable to allocate "
2557		    "bus resource: Link interrupt [%d]\n", rid);
2558		return (ENXIO);
2559        }
2560	/* Set the link handler function */
2561	error = bus_setup_intr(dev, adapter->res,
2562	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2563	    em_msix_link, adapter, &adapter->tag);
2564	if (error) {
2565		adapter->res = NULL;
2566		device_printf(dev, "Failed to register LINK handler");
2567		return (error);
2568	}
2569#if __FreeBSD_version >= 800504
2570		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2571#endif
2572	adapter->linkvec = vector;
2573	adapter->ivars |=  (8 | vector) << 16;
2574	adapter->ivars |= 0x80000000;
2575
2576	return (0);
2577}
2578
2579
2580static void
2581em_free_pci_resources(struct adapter *adapter)
2582{
2583	device_t	dev = adapter->dev;
2584	struct tx_ring	*txr;
2585	struct rx_ring	*rxr;
2586	int		rid;
2587
2588
2589	/*
2590	** Release all the queue interrupt resources:
2591	*/
2592	for (int i = 0; i < adapter->num_queues; i++) {
2593		txr = &adapter->tx_rings[i];
2594		rxr = &adapter->rx_rings[i];
2595		/* an early abort? */
2596		if ((txr == NULL) || (rxr == NULL))
2597			break;
2598		rid = txr->msix +1;
2599		if (txr->tag != NULL) {
2600			bus_teardown_intr(dev, txr->res, txr->tag);
2601			txr->tag = NULL;
2602		}
2603		if (txr->res != NULL)
2604			bus_release_resource(dev, SYS_RES_IRQ,
2605			    rid, txr->res);
2606		rid = rxr->msix +1;
2607		if (rxr->tag != NULL) {
2608			bus_teardown_intr(dev, rxr->res, rxr->tag);
2609			rxr->tag = NULL;
2610		}
2611		if (rxr->res != NULL)
2612			bus_release_resource(dev, SYS_RES_IRQ,
2613			    rid, rxr->res);
2614	}
2615
2616        if (adapter->linkvec) /* we are doing MSIX */
2617                rid = adapter->linkvec + 1;
2618        else
2619                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2620
2621	if (adapter->tag != NULL) {
2622		bus_teardown_intr(dev, adapter->res, adapter->tag);
2623		adapter->tag = NULL;
2624	}
2625
2626	if (adapter->res != NULL)
2627		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2628
2629
2630	if (adapter->msix)
2631		pci_release_msi(dev);
2632
2633	if (adapter->msix_mem != NULL)
2634		bus_release_resource(dev, SYS_RES_MEMORY,
2635		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2636
2637	if (adapter->memory != NULL)
2638		bus_release_resource(dev, SYS_RES_MEMORY,
2639		    PCIR_BAR(0), adapter->memory);
2640
2641	if (adapter->flash != NULL)
2642		bus_release_resource(dev, SYS_RES_MEMORY,
2643		    EM_FLASH, adapter->flash);
2644}
2645
2646/*
2647 * Setup MSI or MSI/X
2648 */
2649static int
2650em_setup_msix(struct adapter *adapter)
2651{
2652	device_t dev = adapter->dev;
2653	int val = 0;
2654
2655
2656	/*
2657	** Setup MSI/X for Hartwell: tests have shown
2658	** use of two queues to be unstable, and to
2659	** provide no great gain anyway, so we simply
2660	** seperate the interrupts and use a single queue.
2661	*/
2662	if ((adapter->hw.mac.type == e1000_82574) &&
2663	    (em_enable_msix == TRUE)) {
2664		/* Map the MSIX BAR */
2665		int rid = PCIR_BAR(EM_MSIX_BAR);
2666		adapter->msix_mem = bus_alloc_resource_any(dev,
2667		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2668       		if (!adapter->msix_mem) {
2669			/* May not be enabled */
2670               		device_printf(adapter->dev,
2671			    "Unable to map MSIX table \n");
2672			goto msi;
2673       		}
2674		val = pci_msix_count(dev);
2675		if (val < 3) {
2676			bus_release_resource(dev, SYS_RES_MEMORY,
2677			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2678			adapter->msix_mem = NULL;
2679               		device_printf(adapter->dev,
2680			    "MSIX: insufficient vectors, using MSI\n");
2681			goto msi;
2682		}
2683		val = 3;
2684		adapter->num_queues = 1;
2685		if (pci_alloc_msix(dev, &val) == 0) {
2686			device_printf(adapter->dev,
2687			    "Using MSIX interrupts "
2688			    "with %d vectors\n", val);
2689		}
2690
2691		return (val);
2692	}
2693msi:
2694       	val = pci_msi_count(dev);
2695       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2696               	adapter->msix = 1;
2697               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2698		return (val);
2699	}
2700	/* Should only happen due to manual configuration */
2701	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2702	return (0);
2703}
2704
2705
2706/*********************************************************************
2707 *
2708 *  Initialize the hardware to a configuration
2709 *  as specified by the adapter structure.
2710 *
2711 **********************************************************************/
2712static void
2713em_reset(struct adapter *adapter)
2714{
2715	device_t	dev = adapter->dev;
2716	struct ifnet	*ifp = adapter->ifp;
2717	struct e1000_hw	*hw = &adapter->hw;
2718	u16		rx_buffer_size;
2719
2720	INIT_DEBUGOUT("em_reset: begin");
2721
2722	/* Set up smart power down as default off on newer adapters. */
2723	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2724	    hw->mac.type == e1000_82572)) {
2725		u16 phy_tmp = 0;
2726
2727		/* Speed up time to link by disabling smart power down. */
2728		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2729		phy_tmp &= ~IGP02E1000_PM_SPD;
2730		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2731	}
2732
2733	/*
2734	 * These parameters control the automatic generation (Tx) and
2735	 * response (Rx) to Ethernet PAUSE frames.
2736	 * - High water mark should allow for at least two frames to be
2737	 *   received after sending an XOFF.
2738	 * - Low water mark works best when it is very near the high water mark.
2739	 *   This allows the receiver to restart by sending XON when it has
2740	 *   drained a bit. Here we use an arbitary value of 1500 which will
2741	 *   restart after one full frame is pulled from the buffer. There
2742	 *   could be several smaller frames in the buffer and if so they will
2743	 *   not trigger the XON until their total number reduces the buffer
2744	 *   by 1500.
2745	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2746	 */
2747	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2748
2749	hw->fc.high_water = rx_buffer_size -
2750	    roundup2(adapter->max_frame_size, 1024);
2751	hw->fc.low_water = hw->fc.high_water - 1500;
2752
2753	if (hw->mac.type == e1000_80003es2lan)
2754		hw->fc.pause_time = 0xFFFF;
2755	else
2756		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2757
2758	hw->fc.send_xon = TRUE;
2759
2760        /* Set Flow control, use the tunable location if sane */
2761	hw->fc.requested_mode = adapter->fc_setting;
2762
2763	/* Workaround: no TX flow ctrl for PCH */
2764	if (hw->mac.type == e1000_pchlan)
2765                hw->fc.requested_mode = e1000_fc_rx_pause;
2766
2767	/* Override - settings for PCH2LAN, ya its magic :) */
2768	if (hw->mac.type == e1000_pch2lan) {
2769		hw->fc.high_water = 0x5C20;
2770		hw->fc.low_water = 0x5048;
2771		hw->fc.pause_time = 0x0650;
2772		hw->fc.refresh_time = 0x0400;
2773		/* Jumbos need adjusted PBA */
2774		if (ifp->if_mtu > ETHERMTU)
2775			E1000_WRITE_REG(hw, E1000_PBA, 12);
2776		else
2777			E1000_WRITE_REG(hw, E1000_PBA, 26);
2778	}
2779
2780	/* Issue a global reset */
2781	e1000_reset_hw(hw);
2782	E1000_WRITE_REG(hw, E1000_WUC, 0);
2783	em_disable_aspm(adapter);
2784
2785	if (e1000_init_hw(hw) < 0) {
2786		device_printf(dev, "Hardware Initialization Failed\n");
2787		return;
2788	}
2789
2790	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2791	e1000_get_phy_info(hw);
2792	e1000_check_for_link(hw);
2793	return;
2794}
2795
2796/*********************************************************************
2797 *
2798 *  Setup networking device structure and register an interface.
2799 *
2800 **********************************************************************/
2801static int
2802em_setup_interface(device_t dev, struct adapter *adapter)
2803{
2804	struct ifnet   *ifp;
2805
2806	INIT_DEBUGOUT("em_setup_interface: begin");
2807
2808	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2809	if (ifp == NULL) {
2810		device_printf(dev, "can not allocate ifnet structure\n");
2811		return (-1);
2812	}
2813	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2814	ifp->if_mtu = ETHERMTU;
2815	ifp->if_init =  em_init;
2816	ifp->if_softc = adapter;
2817	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2818	ifp->if_ioctl = em_ioctl;
2819	ifp->if_start = em_start;
2820	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2821	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2822	IFQ_SET_READY(&ifp->if_snd);
2823
2824	ether_ifattach(ifp, adapter->hw.mac.addr);
2825
2826	ifp->if_capabilities = ifp->if_capenable = 0;
2827
2828#ifdef EM_MULTIQUEUE
2829	/* Multiqueue tx functions */
2830	ifp->if_transmit = em_mq_start;
2831	ifp->if_qflush = em_qflush;
2832#endif
2833
2834	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2835	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2836
2837	/* Enable TSO by default, can disable with ifconfig */
2838	ifp->if_capabilities |= IFCAP_TSO4;
2839	ifp->if_capenable |= IFCAP_TSO4;
2840
2841	/*
2842	 * Tell the upper layer(s) we
2843	 * support full VLAN capability
2844	 */
2845	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2846	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2847	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2848
2849	/*
2850	** Dont turn this on by default, if vlans are
2851	** created on another pseudo device (eg. lagg)
2852	** then vlan events are not passed thru, breaking
2853	** operation, but with HW FILTER off it works. If
2854	** using vlans directly on the em driver you can
2855	** enable this and get full hardware tag filtering.
2856	*/
2857	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2858
2859#ifdef DEVICE_POLLING
2860	ifp->if_capabilities |= IFCAP_POLLING;
2861#endif
2862
2863	/* Enable only WOL MAGIC by default */
2864	if (adapter->wol) {
2865		ifp->if_capabilities |= IFCAP_WOL;
2866		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2867	}
2868
2869	/*
2870	 * Specify the media types supported by this adapter and register
2871	 * callbacks to update media and link information
2872	 */
2873	ifmedia_init(&adapter->media, IFM_IMASK,
2874	    em_media_change, em_media_status);
2875	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2876	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2877		u_char fiber_type = IFM_1000_SX;	/* default type */
2878
2879		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2880			    0, NULL);
2881		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2882	} else {
2883		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2884		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2885			    0, NULL);
2886		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2887			    0, NULL);
2888		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2889			    0, NULL);
2890		if (adapter->hw.phy.type != e1000_phy_ife) {
2891			ifmedia_add(&adapter->media,
2892				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2893			ifmedia_add(&adapter->media,
2894				IFM_ETHER | IFM_1000_T, 0, NULL);
2895		}
2896	}
2897	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2898	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2899	return (0);
2900}
2901
2902
2903/*
2904 * Manage DMA'able memory.
2905 */
2906static void
2907em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2908{
2909	if (error)
2910		return;
2911	*(bus_addr_t *) arg = segs[0].ds_addr;
2912}
2913
2914static int
2915em_dma_malloc(struct adapter *adapter, bus_size_t size,
2916        struct em_dma_alloc *dma, int mapflags)
2917{
2918	int error;
2919
2920	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2921				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2922				BUS_SPACE_MAXADDR,	/* lowaddr */
2923				BUS_SPACE_MAXADDR,	/* highaddr */
2924				NULL, NULL,		/* filter, filterarg */
2925				size,			/* maxsize */
2926				1,			/* nsegments */
2927				size,			/* maxsegsize */
2928				0,			/* flags */
2929				NULL,			/* lockfunc */
2930				NULL,			/* lockarg */
2931				&dma->dma_tag);
2932	if (error) {
2933		device_printf(adapter->dev,
2934		    "%s: bus_dma_tag_create failed: %d\n",
2935		    __func__, error);
2936		goto fail_0;
2937	}
2938
2939	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2940	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2941	if (error) {
2942		device_printf(adapter->dev,
2943		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2944		    __func__, (uintmax_t)size, error);
2945		goto fail_2;
2946	}
2947
2948	dma->dma_paddr = 0;
2949	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2950	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2951	if (error || dma->dma_paddr == 0) {
2952		device_printf(adapter->dev,
2953		    "%s: bus_dmamap_load failed: %d\n",
2954		    __func__, error);
2955		goto fail_3;
2956	}
2957
2958	return (0);
2959
2960fail_3:
2961	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2962fail_2:
2963	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2964	bus_dma_tag_destroy(dma->dma_tag);
2965fail_0:
2966	dma->dma_map = NULL;
2967	dma->dma_tag = NULL;
2968
2969	return (error);
2970}
2971
2972static void
2973em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2974{
2975	if (dma->dma_tag == NULL)
2976		return;
2977	if (dma->dma_map != NULL) {
2978		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2979		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2980		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2981		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2982		dma->dma_map = NULL;
2983	}
2984	bus_dma_tag_destroy(dma->dma_tag);
2985	dma->dma_tag = NULL;
2986}
2987
2988
2989/*********************************************************************
2990 *
2991 *  Allocate memory for the transmit and receive rings, and then
2992 *  the descriptors associated with each, called only once at attach.
2993 *
2994 **********************************************************************/
2995static int
2996em_allocate_queues(struct adapter *adapter)
2997{
2998	device_t		dev = adapter->dev;
2999	struct tx_ring		*txr = NULL;
3000	struct rx_ring		*rxr = NULL;
3001	int rsize, tsize, error = E1000_SUCCESS;
3002	int txconf = 0, rxconf = 0;
3003
3004
3005	/* Allocate the TX ring struct memory */
3006	if (!(adapter->tx_rings =
3007	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3008	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3009		device_printf(dev, "Unable to allocate TX ring memory\n");
3010		error = ENOMEM;
3011		goto fail;
3012	}
3013
3014	/* Now allocate the RX */
3015	if (!(adapter->rx_rings =
3016	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3017	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3018		device_printf(dev, "Unable to allocate RX ring memory\n");
3019		error = ENOMEM;
3020		goto rx_fail;
3021	}
3022
3023	tsize = roundup2(adapter->num_tx_desc *
3024	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3025	/*
3026	 * Now set up the TX queues, txconf is needed to handle the
3027	 * possibility that things fail midcourse and we need to
3028	 * undo memory gracefully
3029	 */
3030	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3031		/* Set up some basics */
3032		txr = &adapter->tx_rings[i];
3033		txr->adapter = adapter;
3034		txr->me = i;
3035
3036		/* Initialize the TX lock */
3037		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3038		    device_get_nameunit(dev), txr->me);
3039		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3040
3041		if (em_dma_malloc(adapter, tsize,
3042			&txr->txdma, BUS_DMA_NOWAIT)) {
3043			device_printf(dev,
3044			    "Unable to allocate TX Descriptor memory\n");
3045			error = ENOMEM;
3046			goto err_tx_desc;
3047		}
3048		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3049		bzero((void *)txr->tx_base, tsize);
3050
3051        	if (em_allocate_transmit_buffers(txr)) {
3052			device_printf(dev,
3053			    "Critical Failure setting up transmit buffers\n");
3054			error = ENOMEM;
3055			goto err_tx_desc;
3056        	}
3057#if __FreeBSD_version >= 800000
3058		/* Allocate a buf ring */
3059		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3060		    M_WAITOK, &txr->tx_mtx);
3061#endif
3062	}
3063
3064	/*
3065	 * Next the RX queues...
3066	 */
3067	rsize = roundup2(adapter->num_rx_desc *
3068	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3069	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3070		rxr = &adapter->rx_rings[i];
3071		rxr->adapter = adapter;
3072		rxr->me = i;
3073
3074		/* Initialize the RX lock */
3075		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3076		    device_get_nameunit(dev), txr->me);
3077		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3078
3079		if (em_dma_malloc(adapter, rsize,
3080			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3081			device_printf(dev,
3082			    "Unable to allocate RxDescriptor memory\n");
3083			error = ENOMEM;
3084			goto err_rx_desc;
3085		}
3086		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3087		bzero((void *)rxr->rx_base, rsize);
3088
3089        	/* Allocate receive buffers for the ring*/
3090		if (em_allocate_receive_buffers(rxr)) {
3091			device_printf(dev,
3092			    "Critical Failure setting up receive buffers\n");
3093			error = ENOMEM;
3094			goto err_rx_desc;
3095		}
3096	}
3097
3098	return (0);
3099
3100err_rx_desc:
3101	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3102		em_dma_free(adapter, &rxr->rxdma);
3103err_tx_desc:
3104	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3105		em_dma_free(adapter, &txr->txdma);
3106	free(adapter->rx_rings, M_DEVBUF);
3107rx_fail:
3108#if __FreeBSD_version >= 800000
3109	buf_ring_free(txr->br, M_DEVBUF);
3110#endif
3111	free(adapter->tx_rings, M_DEVBUF);
3112fail:
3113	return (error);
3114}
3115
3116
3117/*********************************************************************
3118 *
3119 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3120 *  the information needed to transmit a packet on the wire. This is
3121 *  called only once at attach, setup is done every reset.
3122 *
3123 **********************************************************************/
3124static int
3125em_allocate_transmit_buffers(struct tx_ring *txr)
3126{
3127	struct adapter *adapter = txr->adapter;
3128	device_t dev = adapter->dev;
3129	struct em_buffer *txbuf;
3130	int error, i;
3131
3132	/*
3133	 * Setup DMA descriptor areas.
3134	 */
3135	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3136			       1, 0,			/* alignment, bounds */
3137			       BUS_SPACE_MAXADDR,	/* lowaddr */
3138			       BUS_SPACE_MAXADDR,	/* highaddr */
3139			       NULL, NULL,		/* filter, filterarg */
3140			       EM_TSO_SIZE,		/* maxsize */
3141			       EM_MAX_SCATTER,		/* nsegments */
3142			       PAGE_SIZE,		/* maxsegsize */
3143			       0,			/* flags */
3144			       NULL,			/* lockfunc */
3145			       NULL,			/* lockfuncarg */
3146			       &txr->txtag))) {
3147		device_printf(dev,"Unable to allocate TX DMA tag\n");
3148		goto fail;
3149	}
3150
3151	if (!(txr->tx_buffers =
3152	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3153	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3154		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3155		error = ENOMEM;
3156		goto fail;
3157	}
3158
3159        /* Create the descriptor buffer dma maps */
3160	txbuf = txr->tx_buffers;
3161	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3162		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3163		if (error != 0) {
3164			device_printf(dev, "Unable to create TX DMA map\n");
3165			goto fail;
3166		}
3167	}
3168
3169	return 0;
3170fail:
3171	/* We free all, it handles case where we are in the middle */
3172	em_free_transmit_structures(adapter);
3173	return (error);
3174}
3175
3176/*********************************************************************
3177 *
3178 *  Initialize a transmit ring.
3179 *
3180 **********************************************************************/
3181static void
3182em_setup_transmit_ring(struct tx_ring *txr)
3183{
3184	struct adapter *adapter = txr->adapter;
3185	struct em_buffer *txbuf;
3186	int i;
3187
3188	/* Clear the old descriptor contents */
3189	EM_TX_LOCK(txr);
3190	bzero((void *)txr->tx_base,
3191	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3192	/* Reset indices */
3193	txr->next_avail_desc = 0;
3194	txr->next_to_clean = 0;
3195
3196	/* Free any existing tx buffers. */
3197        txbuf = txr->tx_buffers;
3198	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3199		if (txbuf->m_head != NULL) {
3200			bus_dmamap_sync(txr->txtag, txbuf->map,
3201			    BUS_DMASYNC_POSTWRITE);
3202			bus_dmamap_unload(txr->txtag, txbuf->map);
3203			m_freem(txbuf->m_head);
3204			txbuf->m_head = NULL;
3205		}
3206		/* clear the watch index */
3207		txbuf->next_eop = -1;
3208        }
3209
3210	/* Set number of descriptors available */
3211	txr->tx_avail = adapter->num_tx_desc;
3212	txr->queue_status = EM_QUEUE_IDLE;
3213
3214	/* Clear checksum offload context. */
3215	txr->last_hw_offload = 0;
3216	txr->last_hw_ipcss = 0;
3217	txr->last_hw_ipcso = 0;
3218	txr->last_hw_tucss = 0;
3219	txr->last_hw_tucso = 0;
3220
3221	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3222	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3223	EM_TX_UNLOCK(txr);
3224}
3225
3226/*********************************************************************
3227 *
3228 *  Initialize all transmit rings.
3229 *
3230 **********************************************************************/
3231static void
3232em_setup_transmit_structures(struct adapter *adapter)
3233{
3234	struct tx_ring *txr = adapter->tx_rings;
3235
3236	for (int i = 0; i < adapter->num_queues; i++, txr++)
3237		em_setup_transmit_ring(txr);
3238
3239	return;
3240}
3241
3242/*********************************************************************
3243 *
3244 *  Enable transmit unit.
3245 *
3246 **********************************************************************/
3247static void
3248em_initialize_transmit_unit(struct adapter *adapter)
3249{
3250	struct tx_ring	*txr = adapter->tx_rings;
3251	struct e1000_hw	*hw = &adapter->hw;
3252	u32	tctl, tarc, tipg = 0;
3253
3254	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3255
3256	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3257		u64 bus_addr = txr->txdma.dma_paddr;
3258		/* Base and Len of TX Ring */
3259		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3260	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3261		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3262	    	    (u32)(bus_addr >> 32));
3263		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3264	    	    (u32)bus_addr);
3265		/* Init the HEAD/TAIL indices */
3266		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3267		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3268
3269		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3270		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3271		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3272
3273		txr->queue_status = EM_QUEUE_IDLE;
3274	}
3275
3276	/* Set the default values for the Tx Inter Packet Gap timer */
3277	switch (adapter->hw.mac.type) {
3278	case e1000_82542:
3279		tipg = DEFAULT_82542_TIPG_IPGT;
3280		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3281		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3282		break;
3283	case e1000_80003es2lan:
3284		tipg = DEFAULT_82543_TIPG_IPGR1;
3285		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3286		    E1000_TIPG_IPGR2_SHIFT;
3287		break;
3288	default:
3289		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3290		    (adapter->hw.phy.media_type ==
3291		    e1000_media_type_internal_serdes))
3292			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3293		else
3294			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3295		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3296		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3297	}
3298
3299	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3300	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3301
3302	if(adapter->hw.mac.type >= e1000_82540)
3303		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3304		    adapter->tx_abs_int_delay.value);
3305
3306	if ((adapter->hw.mac.type == e1000_82571) ||
3307	    (adapter->hw.mac.type == e1000_82572)) {
3308		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3309		tarc |= SPEED_MODE_BIT;
3310		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3311	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3312		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3313		tarc |= 1;
3314		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3315		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3316		tarc |= 1;
3317		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3318	}
3319
3320	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3321	if (adapter->tx_int_delay.value > 0)
3322		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3323
3324	/* Program the Transmit Control Register */
3325	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3326	tctl &= ~E1000_TCTL_CT;
3327	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3328		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3329
3330	if (adapter->hw.mac.type >= e1000_82571)
3331		tctl |= E1000_TCTL_MULR;
3332
3333	/* This write will effectively turn on the transmit unit. */
3334	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3335
3336}
3337
3338
3339/*********************************************************************
3340 *
3341 *  Free all transmit rings.
3342 *
3343 **********************************************************************/
3344static void
3345em_free_transmit_structures(struct adapter *adapter)
3346{
3347	struct tx_ring *txr = adapter->tx_rings;
3348
3349	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3350		EM_TX_LOCK(txr);
3351		em_free_transmit_buffers(txr);
3352		em_dma_free(adapter, &txr->txdma);
3353		EM_TX_UNLOCK(txr);
3354		EM_TX_LOCK_DESTROY(txr);
3355	}
3356
3357	free(adapter->tx_rings, M_DEVBUF);
3358}
3359
3360/*********************************************************************
3361 *
3362 *  Free transmit ring related data structures.
3363 *
3364 **********************************************************************/
3365static void
3366em_free_transmit_buffers(struct tx_ring *txr)
3367{
3368	struct adapter		*adapter = txr->adapter;
3369	struct em_buffer	*txbuf;
3370
3371	INIT_DEBUGOUT("free_transmit_ring: begin");
3372
3373	if (txr->tx_buffers == NULL)
3374		return;
3375
3376	for (int i = 0; i < adapter->num_tx_desc; i++) {
3377		txbuf = &txr->tx_buffers[i];
3378		if (txbuf->m_head != NULL) {
3379			bus_dmamap_sync(txr->txtag, txbuf->map,
3380			    BUS_DMASYNC_POSTWRITE);
3381			bus_dmamap_unload(txr->txtag,
3382			    txbuf->map);
3383			m_freem(txbuf->m_head);
3384			txbuf->m_head = NULL;
3385			if (txbuf->map != NULL) {
3386				bus_dmamap_destroy(txr->txtag,
3387				    txbuf->map);
3388				txbuf->map = NULL;
3389			}
3390		} else if (txbuf->map != NULL) {
3391			bus_dmamap_unload(txr->txtag,
3392			    txbuf->map);
3393			bus_dmamap_destroy(txr->txtag,
3394			    txbuf->map);
3395			txbuf->map = NULL;
3396		}
3397	}
3398#if __FreeBSD_version >= 800000
3399	if (txr->br != NULL)
3400		buf_ring_free(txr->br, M_DEVBUF);
3401#endif
3402	if (txr->tx_buffers != NULL) {
3403		free(txr->tx_buffers, M_DEVBUF);
3404		txr->tx_buffers = NULL;
3405	}
3406	if (txr->txtag != NULL) {
3407		bus_dma_tag_destroy(txr->txtag);
3408		txr->txtag = NULL;
3409	}
3410	return;
3411}
3412
3413
3414/*********************************************************************
3415 *  The offload context is protocol specific (TCP/UDP) and thus
3416 *  only needs to be set when the protocol changes. The occasion
3417 *  of a context change can be a performance detriment, and
3418 *  might be better just disabled. The reason arises in the way
3419 *  in which the controller supports pipelined requests from the
3420 *  Tx data DMA. Up to four requests can be pipelined, and they may
3421 *  belong to the same packet or to multiple packets. However all
3422 *  requests for one packet are issued before a request is issued
3423 *  for a subsequent packet and if a request for the next packet
3424 *  requires a context change, that request will be stalled
3425 *  until the previous request completes. This means setting up
3426 *  a new context effectively disables pipelined Tx data DMA which
3427 *  in turn greatly slow down performance to send small sized
3428 *  frames.
3429 **********************************************************************/
3430static void
3431em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3432    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3433{
3434	struct adapter			*adapter = txr->adapter;
3435	struct e1000_context_desc	*TXD = NULL;
3436	struct em_buffer		*tx_buffer;
3437	int				cur, hdr_len;
3438	u32				cmd = 0;
3439	u16				offload = 0;
3440	u8				ipcso, ipcss, tucso, tucss;
3441
3442	ipcss = ipcso = tucss = tucso = 0;
3443	hdr_len = ip_off + (ip->ip_hl << 2);
3444	cur = txr->next_avail_desc;
3445
3446	/* Setup of IP header checksum. */
3447	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3448		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3449		offload |= CSUM_IP;
3450		ipcss = ip_off;
3451		ipcso = ip_off + offsetof(struct ip, ip_sum);
3452		/*
3453		 * Start offset for header checksum calculation.
3454		 * End offset for header checksum calculation.
3455		 * Offset of place to put the checksum.
3456		 */
3457		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3458		TXD->lower_setup.ip_fields.ipcss = ipcss;
3459		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3460		TXD->lower_setup.ip_fields.ipcso = ipcso;
3461		cmd |= E1000_TXD_CMD_IP;
3462	}
3463
3464	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3465 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3466 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3467 		offload |= CSUM_TCP;
3468 		tucss = hdr_len;
3469 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3470 		/*
3471 		 * Setting up new checksum offload context for every frames
3472 		 * takes a lot of processing time for hardware. This also
3473 		 * reduces performance a lot for small sized frames so avoid
3474 		 * it if driver can use previously configured checksum
3475 		 * offload context.
3476 		 */
3477 		if (txr->last_hw_offload == offload) {
3478 			if (offload & CSUM_IP) {
3479 				if (txr->last_hw_ipcss == ipcss &&
3480 				    txr->last_hw_ipcso == ipcso &&
3481 				    txr->last_hw_tucss == tucss &&
3482 				    txr->last_hw_tucso == tucso)
3483 					return;
3484 			} else {
3485 				if (txr->last_hw_tucss == tucss &&
3486 				    txr->last_hw_tucso == tucso)
3487 					return;
3488 			}
3489  		}
3490 		txr->last_hw_offload = offload;
3491 		txr->last_hw_tucss = tucss;
3492 		txr->last_hw_tucso = tucso;
3493 		/*
3494 		 * Start offset for payload checksum calculation.
3495 		 * End offset for payload checksum calculation.
3496 		 * Offset of place to put the checksum.
3497 		 */
3498		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3499 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3500 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3501 		TXD->upper_setup.tcp_fields.tucso = tucso;
3502 		cmd |= E1000_TXD_CMD_TCP;
3503 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3504 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3505 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3506 		tucss = hdr_len;
3507 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3508 		/*
3509 		 * Setting up new checksum offload context for every frames
3510 		 * takes a lot of processing time for hardware. This also
3511 		 * reduces performance a lot for small sized frames so avoid
3512 		 * it if driver can use previously configured checksum
3513 		 * offload context.
3514 		 */
3515 		if (txr->last_hw_offload == offload) {
3516 			if (offload & CSUM_IP) {
3517 				if (txr->last_hw_ipcss == ipcss &&
3518 				    txr->last_hw_ipcso == ipcso &&
3519 				    txr->last_hw_tucss == tucss &&
3520 				    txr->last_hw_tucso == tucso)
3521 					return;
3522 			} else {
3523 				if (txr->last_hw_tucss == tucss &&
3524 				    txr->last_hw_tucso == tucso)
3525 					return;
3526 			}
3527 		}
3528 		txr->last_hw_offload = offload;
3529 		txr->last_hw_tucss = tucss;
3530 		txr->last_hw_tucso = tucso;
3531 		/*
3532 		 * Start offset for header checksum calculation.
3533 		 * End offset for header checksum calculation.
3534 		 * Offset of place to put the checksum.
3535 		 */
3536		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3537 		TXD->upper_setup.tcp_fields.tucss = tucss;
3538 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3539 		TXD->upper_setup.tcp_fields.tucso = tucso;
3540  	}
3541
3542 	if (offload & CSUM_IP) {
3543 		txr->last_hw_ipcss = ipcss;
3544 		txr->last_hw_ipcso = ipcso;
3545  	}
3546
3547	TXD->tcp_seg_setup.data = htole32(0);
3548	TXD->cmd_and_length =
3549	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3550	tx_buffer = &txr->tx_buffers[cur];
3551	tx_buffer->m_head = NULL;
3552	tx_buffer->next_eop = -1;
3553
3554	if (++cur == adapter->num_tx_desc)
3555		cur = 0;
3556
3557	txr->tx_avail--;
3558	txr->next_avail_desc = cur;
3559}
3560
3561
3562/**********************************************************************
3563 *
3564 *  Setup work for hardware segmentation offload (TSO)
3565 *
3566 **********************************************************************/
3567static void
3568em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3569    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3570{
3571	struct adapter			*adapter = txr->adapter;
3572	struct e1000_context_desc	*TXD;
3573	struct em_buffer		*tx_buffer;
3574	int cur, hdr_len;
3575
3576	/*
3577	 * In theory we can use the same TSO context if and only if
3578	 * frame is the same type(IP/TCP) and the same MSS. However
3579	 * checking whether a frame has the same IP/TCP structure is
3580	 * hard thing so just ignore that and always restablish a
3581	 * new TSO context.
3582	 */
3583	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3584	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3585		      E1000_TXD_DTYP_D |	/* Data descr type */
3586		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3587
3588	/* IP and/or TCP header checksum calculation and insertion. */
3589	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3590
3591	cur = txr->next_avail_desc;
3592	tx_buffer = &txr->tx_buffers[cur];
3593	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3594
3595	/*
3596	 * Start offset for header checksum calculation.
3597	 * End offset for header checksum calculation.
3598	 * Offset of place put the checksum.
3599	 */
3600	TXD->lower_setup.ip_fields.ipcss = ip_off;
3601	TXD->lower_setup.ip_fields.ipcse =
3602	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3603	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3604	/*
3605	 * Start offset for payload checksum calculation.
3606	 * End offset for payload checksum calculation.
3607	 * Offset of place to put the checksum.
3608	 */
3609	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3610	TXD->upper_setup.tcp_fields.tucse = 0;
3611	TXD->upper_setup.tcp_fields.tucso =
3612	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3613	/*
3614	 * Payload size per packet w/o any headers.
3615	 * Length of all headers up to payload.
3616	 */
3617	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3618	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3619
3620	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3621				E1000_TXD_CMD_DEXT |	/* Extended descr */
3622				E1000_TXD_CMD_TSE |	/* TSE context */
3623				E1000_TXD_CMD_IP |	/* Do IP csum */
3624				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3625				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3626
3627	tx_buffer->m_head = NULL;
3628	tx_buffer->next_eop = -1;
3629
3630	if (++cur == adapter->num_tx_desc)
3631		cur = 0;
3632
3633	txr->tx_avail--;
3634	txr->next_avail_desc = cur;
3635	txr->tx_tso = TRUE;
3636}
3637
3638
3639/**********************************************************************
3640 *
3641 *  Examine each tx_buffer in the used queue. If the hardware is done
3642 *  processing the packet then free associated resources. The
3643 *  tx_buffer is put back on the free queue.
3644 *
3645 **********************************************************************/
3646static bool
3647em_txeof(struct tx_ring *txr)
3648{
3649	struct adapter	*adapter = txr->adapter;
3650        int first, last, done, processed;
3651        struct em_buffer *tx_buffer;
3652        struct e1000_tx_desc   *tx_desc, *eop_desc;
3653	struct ifnet   *ifp = adapter->ifp;
3654
3655	EM_TX_LOCK_ASSERT(txr);
3656
3657	/* No work, make sure watchdog is off */
3658        if (txr->tx_avail == adapter->num_tx_desc) {
3659		txr->queue_status = EM_QUEUE_IDLE;
3660                return (FALSE);
3661	}
3662
3663	processed = 0;
3664        first = txr->next_to_clean;
3665        tx_desc = &txr->tx_base[first];
3666        tx_buffer = &txr->tx_buffers[first];
3667	last = tx_buffer->next_eop;
3668        eop_desc = &txr->tx_base[last];
3669
3670	/*
3671	 * What this does is get the index of the
3672	 * first descriptor AFTER the EOP of the
3673	 * first packet, that way we can do the
3674	 * simple comparison on the inner while loop.
3675	 */
3676	if (++last == adapter->num_tx_desc)
3677 		last = 0;
3678	done = last;
3679
3680        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3681            BUS_DMASYNC_POSTREAD);
3682
3683        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3684		/* We clean the range of the packet */
3685		while (first != done) {
3686                	tx_desc->upper.data = 0;
3687                	tx_desc->lower.data = 0;
3688                	tx_desc->buffer_addr = 0;
3689                	++txr->tx_avail;
3690			++processed;
3691
3692			if (tx_buffer->m_head) {
3693				bus_dmamap_sync(txr->txtag,
3694				    tx_buffer->map,
3695				    BUS_DMASYNC_POSTWRITE);
3696				bus_dmamap_unload(txr->txtag,
3697				    tx_buffer->map);
3698                        	m_freem(tx_buffer->m_head);
3699                        	tx_buffer->m_head = NULL;
3700                	}
3701			tx_buffer->next_eop = -1;
3702			txr->watchdog_time = ticks;
3703
3704	                if (++first == adapter->num_tx_desc)
3705				first = 0;
3706
3707	                tx_buffer = &txr->tx_buffers[first];
3708			tx_desc = &txr->tx_base[first];
3709		}
3710		++ifp->if_opackets;
3711		/* See if we can continue to the next packet */
3712		last = tx_buffer->next_eop;
3713		if (last != -1) {
3714        		eop_desc = &txr->tx_base[last];
3715			/* Get new done point */
3716			if (++last == adapter->num_tx_desc) last = 0;
3717			done = last;
3718		} else
3719			break;
3720        }
3721        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3722            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723
3724        txr->next_to_clean = first;
3725
3726	/*
3727	** Watchdog calculation, we know there's
3728	** work outstanding or the first return
3729	** would have been taken, so none processed
3730	** for too long indicates a hang. local timer
3731	** will examine this and do a reset if needed.
3732	*/
3733	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3734		txr->queue_status = EM_QUEUE_HUNG;
3735
3736        /*
3737         * If we have a minimum free, clear IFF_DRV_OACTIVE
3738         * to tell the stack that it is OK to send packets.
3739         */
3740        if (txr->tx_avail > EM_MAX_SCATTER)
3741                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3742
3743	/* Disable watchdog if all clean */
3744	if (txr->tx_avail == adapter->num_tx_desc) {
3745		txr->queue_status = EM_QUEUE_IDLE;
3746		return (FALSE);
3747	}
3748
3749	return (TRUE);
3750}
3751
3752
3753/*********************************************************************
3754 *
3755 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3756 *
3757 **********************************************************************/
3758static void
3759em_refresh_mbufs(struct rx_ring *rxr, int limit)
3760{
3761	struct adapter		*adapter = rxr->adapter;
3762	struct mbuf		*m;
3763	bus_dma_segment_t	segs[1];
3764	struct em_buffer	*rxbuf;
3765	int			i, j, error, nsegs;
3766	bool			cleaned = FALSE;
3767
3768	i = j = rxr->next_to_refresh;
3769	/*
3770	** Get one descriptor beyond
3771	** our work mark to control
3772	** the loop.
3773	*/
3774	if (++j == adapter->num_rx_desc)
3775		j = 0;
3776
3777	while (j != limit) {
3778		rxbuf = &rxr->rx_buffers[i];
3779		if (rxbuf->m_head == NULL) {
3780			m = m_getjcl(M_DONTWAIT, MT_DATA,
3781			    M_PKTHDR, adapter->rx_mbuf_sz);
3782			/*
3783			** If we have a temporary resource shortage
3784			** that causes a failure, just abort refresh
3785			** for now, we will return to this point when
3786			** reinvoked from em_rxeof.
3787			*/
3788			if (m == NULL)
3789				goto update;
3790		} else
3791			m = rxbuf->m_head;
3792
3793		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3794		m->m_flags |= M_PKTHDR;
3795		m->m_data = m->m_ext.ext_buf;
3796
3797		/* Use bus_dma machinery to setup the memory mapping  */
3798		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3799		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3800		if (error != 0) {
3801			printf("Refresh mbufs: hdr dmamap load"
3802			    " failure - %d\n", error);
3803			m_free(m);
3804			rxbuf->m_head = NULL;
3805			goto update;
3806		}
3807		rxbuf->m_head = m;
3808		bus_dmamap_sync(rxr->rxtag,
3809		    rxbuf->map, BUS_DMASYNC_PREREAD);
3810		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3811		cleaned = TRUE;
3812
3813		i = j; /* Next is precalulated for us */
3814		rxr->next_to_refresh = i;
3815		/* Calculate next controlling index */
3816		if (++j == adapter->num_rx_desc)
3817			j = 0;
3818	}
3819update:
3820	/*
3821	** Update the tail pointer only if,
3822	** and as far as we have refreshed.
3823	*/
3824	if (cleaned)
3825		E1000_WRITE_REG(&adapter->hw,
3826		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3827
3828	return;
3829}
3830
3831
3832/*********************************************************************
3833 *
3834 *  Allocate memory for rx_buffer structures. Since we use one
3835 *  rx_buffer per received packet, the maximum number of rx_buffer's
3836 *  that we'll need is equal to the number of receive descriptors
3837 *  that we've allocated.
3838 *
3839 **********************************************************************/
3840static int
3841em_allocate_receive_buffers(struct rx_ring *rxr)
3842{
3843	struct adapter		*adapter = rxr->adapter;
3844	device_t		dev = adapter->dev;
3845	struct em_buffer	*rxbuf;
3846	int			error;
3847
3848	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3849	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3850	if (rxr->rx_buffers == NULL) {
3851		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3852		return (ENOMEM);
3853	}
3854
3855	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3856				1, 0,			/* alignment, bounds */
3857				BUS_SPACE_MAXADDR,	/* lowaddr */
3858				BUS_SPACE_MAXADDR,	/* highaddr */
3859				NULL, NULL,		/* filter, filterarg */
3860				MJUM9BYTES,		/* maxsize */
3861				1,			/* nsegments */
3862				MJUM9BYTES,		/* maxsegsize */
3863				0,			/* flags */
3864				NULL,			/* lockfunc */
3865				NULL,			/* lockarg */
3866				&rxr->rxtag);
3867	if (error) {
3868		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3869		    __func__, error);
3870		goto fail;
3871	}
3872
3873	rxbuf = rxr->rx_buffers;
3874	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3875		rxbuf = &rxr->rx_buffers[i];
3876		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3877		    &rxbuf->map);
3878		if (error) {
3879			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3880			    __func__, error);
3881			goto fail;
3882		}
3883	}
3884
3885	return (0);
3886
3887fail:
3888	em_free_receive_structures(adapter);
3889	return (error);
3890}
3891
3892
3893/*********************************************************************
3894 *
3895 *  Initialize a receive ring and its buffers.
3896 *
3897 **********************************************************************/
3898static int
3899em_setup_receive_ring(struct rx_ring *rxr)
3900{
3901	struct	adapter 	*adapter = rxr->adapter;
3902	struct em_buffer	*rxbuf;
3903	bus_dma_segment_t	seg[1];
3904	int			i, j, nsegs, error;
3905
3906
3907	/* Clear the ring contents */
3908	EM_RX_LOCK(rxr);
3909
3910	/* Invalidate all descriptors */
3911	for (i = 0; i < adapter->num_rx_desc; i++) {
3912		struct e1000_rx_desc* cur;
3913		cur = &rxr->rx_base[i];
3914		cur->status = 0;
3915	}
3916
3917	/* Now replenish the mbufs */
3918	i = j = rxr->next_to_refresh;
3919	if (++j == adapter->num_rx_desc)
3920		j = 0;
3921
3922	while(j != rxr->next_to_check) {
3923		rxbuf = &rxr->rx_buffers[i];
3924		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3925		    M_PKTHDR, adapter->rx_mbuf_sz);
3926		if (rxbuf->m_head == NULL) {
3927			error = ENOBUFS;
3928			goto fail;
3929		}
3930		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3931		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3932		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3933
3934		/* Get the memory mapping */
3935		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3936		    rxbuf->map, rxbuf->m_head, seg,
3937		    &nsegs, BUS_DMA_NOWAIT);
3938		if (error != 0) {
3939			m_freem(rxbuf->m_head);
3940			rxbuf->m_head = NULL;
3941			goto fail;
3942		}
3943		bus_dmamap_sync(rxr->rxtag,
3944		    rxbuf->map, BUS_DMASYNC_PREREAD);
3945
3946		/* Update descriptor */
3947		rxr->rx_base[i].buffer_addr = htole64(seg[0].ds_addr);
3948		i = j;
3949		if (++j == adapter->num_rx_desc)
3950			j = 0;
3951	}
3952
3953fail:
3954	rxr->next_to_refresh = i;
3955	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3956	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3957	EM_RX_UNLOCK(rxr);
3958	return (error);
3959}
3960
3961/*********************************************************************
3962 *
3963 *  Initialize all receive rings.
3964 *
3965 **********************************************************************/
3966static int
3967em_setup_receive_structures(struct adapter *adapter)
3968{
3969	struct rx_ring *rxr = adapter->rx_rings;
3970	int q;
3971
3972	for (q = 0; q < adapter->num_queues; q++, rxr++)
3973		if (em_setup_receive_ring(rxr))
3974			goto fail;
3975
3976	return (0);
3977fail:
3978	/*
3979	 * Free RX buffers allocated so far, we will only handle
3980	 * the rings that completed, the failing case will have
3981	 * cleaned up for itself. 'q' failed, so its the terminus.
3982	 */
3983	for (int i = 0, n = 0; i < q; ++i) {
3984		rxr = &adapter->rx_rings[i];
3985		n = rxr->next_to_check;
3986		while(n != rxr->next_to_refresh) {
3987			struct em_buffer *rxbuf;
3988			rxbuf = &rxr->rx_buffers[n];
3989			if (rxbuf->m_head != NULL) {
3990				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3991			  	  BUS_DMASYNC_POSTREAD);
3992				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3993				m_freem(rxbuf->m_head);
3994				rxbuf->m_head = NULL;
3995			}
3996			if (++n == adapter->num_rx_desc)
3997				n = 0;
3998		}
3999		rxr->next_to_check = 0;
4000		rxr->next_to_refresh = 0;
4001	}
4002
4003	return (ENOBUFS);
4004}
4005
4006/*********************************************************************
4007 *
4008 *  Free all receive rings.
4009 *
4010 **********************************************************************/
4011static void
4012em_free_receive_structures(struct adapter *adapter)
4013{
4014	struct rx_ring *rxr = adapter->rx_rings;
4015
4016	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4017		em_free_receive_buffers(rxr);
4018		/* Free the ring memory as well */
4019		em_dma_free(adapter, &rxr->rxdma);
4020		EM_RX_LOCK_DESTROY(rxr);
4021	}
4022
4023	free(adapter->rx_rings, M_DEVBUF);
4024}
4025
4026
4027/*********************************************************************
4028 *
4029 *  Free receive ring data structures
4030 *
4031 **********************************************************************/
4032static void
4033em_free_receive_buffers(struct rx_ring *rxr)
4034{
4035	struct adapter		*adapter = rxr->adapter;
4036	struct em_buffer	*rxbuf = NULL;
4037
4038	INIT_DEBUGOUT("free_receive_buffers: begin");
4039
4040	if (rxr->rx_buffers != NULL) {
4041		int i = rxr->next_to_check;
4042		while(i != rxr->next_to_refresh) {
4043			rxbuf = &rxr->rx_buffers[i];
4044			if (rxbuf->map != NULL) {
4045				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4046				    BUS_DMASYNC_POSTREAD);
4047				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4048				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4049			}
4050			if (rxbuf->m_head != NULL) {
4051				m_freem(rxbuf->m_head);
4052				rxbuf->m_head = NULL;
4053			}
4054			if (++i == adapter->num_rx_desc)
4055				i = 0;
4056		}
4057		free(rxr->rx_buffers, M_DEVBUF);
4058		rxr->rx_buffers = NULL;
4059		rxr->next_to_check = 0;
4060		rxr->next_to_refresh = 0;
4061	}
4062
4063	if (rxr->rxtag != NULL) {
4064		bus_dma_tag_destroy(rxr->rxtag);
4065		rxr->rxtag = NULL;
4066	}
4067
4068	return;
4069}
4070
4071
4072/*********************************************************************
4073 *
4074 *  Enable receive unit.
4075 *
4076 **********************************************************************/
4077#define MAX_INTS_PER_SEC	8000
4078#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4079
4080static void
4081em_initialize_receive_unit(struct adapter *adapter)
4082{
4083	struct rx_ring	*rxr = adapter->rx_rings;
4084	struct ifnet	*ifp = adapter->ifp;
4085	struct e1000_hw	*hw = &adapter->hw;
4086	u64	bus_addr;
4087	u32	rctl, rxcsum;
4088
4089	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4090
4091	/*
4092	 * Make sure receives are disabled while setting
4093	 * up the descriptor ring
4094	 */
4095	rctl = E1000_READ_REG(hw, E1000_RCTL);
4096	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4097
4098	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4099	    adapter->rx_abs_int_delay.value);
4100	/*
4101	 * Set the interrupt throttling rate. Value is calculated
4102	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4103	 */
4104	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4105
4106	/*
4107	** When using MSIX interrupts we need to throttle
4108	** using the EITR register (82574 only)
4109	*/
4110	if (hw->mac.type == e1000_82574)
4111		for (int i = 0; i < 4; i++)
4112			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4113			    DEFAULT_ITR);
4114
4115	/* Disable accelerated ackknowledge */
4116	if (adapter->hw.mac.type == e1000_82574)
4117		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4118
4119	if (ifp->if_capenable & IFCAP_RXCSUM) {
4120		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4121		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4122		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4123	}
4124
4125	/*
4126	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4127	** long latencies are observed, like Lenovo X60. This
4128	** change eliminates the problem, but since having positive
4129	** values in RDTR is a known source of problems on other
4130	** platforms another solution is being sought.
4131	*/
4132	if (hw->mac.type == e1000_82573)
4133		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4134
4135	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4136		/* Setup the Base and Length of the Rx Descriptor Ring */
4137		bus_addr = rxr->rxdma.dma_paddr;
4138		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4139		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4140		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4141		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4142		/* Setup the Head and Tail Descriptor Pointers */
4143		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4144		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4145	}
4146
4147	/* Set early receive threshold on appropriate hw */
4148	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4149	    (adapter->hw.mac.type == e1000_pch2lan) ||
4150	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4151	    (ifp->if_mtu > ETHERMTU)) {
4152		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4153		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4154		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4155	}
4156
4157	if (adapter->hw.mac.type == e1000_pch2lan) {
4158		if (ifp->if_mtu > ETHERMTU)
4159			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4160		else
4161			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4162	}
4163
4164	/* Setup the Receive Control Register */
4165	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4166	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4167	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4168	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4169
4170        /* Strip the CRC */
4171        rctl |= E1000_RCTL_SECRC;
4172
4173        /* Make sure VLAN Filters are off */
4174        rctl &= ~E1000_RCTL_VFE;
4175	rctl &= ~E1000_RCTL_SBP;
4176
4177	if (adapter->rx_mbuf_sz == MCLBYTES)
4178		rctl |= E1000_RCTL_SZ_2048;
4179	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4180		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4181	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4182		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4183
4184	if (ifp->if_mtu > ETHERMTU)
4185		rctl |= E1000_RCTL_LPE;
4186	else
4187		rctl &= ~E1000_RCTL_LPE;
4188
4189	/* Write out the settings */
4190	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4191
4192	return;
4193}
4194
4195
4196/*********************************************************************
4197 *
4198 *  This routine executes in interrupt context. It replenishes
4199 *  the mbufs in the descriptor and sends data which has been
4200 *  dma'ed into host memory to upper layer.
4201 *
4202 *  We loop at most count times if count is > 0, or until done if
4203 *  count < 0.
4204 *
4205 *  For polling we also now return the number of cleaned packets
4206 *********************************************************************/
4207static bool
4208em_rxeof(struct rx_ring *rxr, int count, int *done)
4209{
4210	struct adapter		*adapter = rxr->adapter;
4211	struct ifnet		*ifp = adapter->ifp;
4212	struct mbuf		*mp, *sendmp;
4213	u8			status = 0;
4214	u16 			len;
4215	int			i, processed, rxdone = 0;
4216	bool			eop;
4217	struct e1000_rx_desc	*cur;
4218
4219	EM_RX_LOCK(rxr);
4220
4221	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4222
4223		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4224			break;
4225
4226		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4227		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4228
4229		cur = &rxr->rx_base[i];
4230		status = cur->status;
4231		mp = sendmp = NULL;
4232
4233		if ((status & E1000_RXD_STAT_DD) == 0)
4234			break;
4235
4236		len = le16toh(cur->length);
4237		eop = (status & E1000_RXD_STAT_EOP) != 0;
4238
4239		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4240		    (rxr->discard == TRUE)) {
4241			ifp->if_ierrors++;
4242			++rxr->rx_discarded;
4243			if (!eop) /* Catch subsequent segs */
4244				rxr->discard = TRUE;
4245			else
4246				rxr->discard = FALSE;
4247			em_rx_discard(rxr, i);
4248			goto next_desc;
4249		}
4250
4251		/* Assign correct length to the current fragment */
4252		mp = rxr->rx_buffers[i].m_head;
4253		mp->m_len = len;
4254
4255		/* Trigger for refresh */
4256		rxr->rx_buffers[i].m_head = NULL;
4257
4258		/* First segment? */
4259		if (rxr->fmp == NULL) {
4260			mp->m_pkthdr.len = len;
4261			rxr->fmp = rxr->lmp = mp;
4262		} else {
4263			/* Chain mbuf's together */
4264			mp->m_flags &= ~M_PKTHDR;
4265			rxr->lmp->m_next = mp;
4266			rxr->lmp = mp;
4267			rxr->fmp->m_pkthdr.len += len;
4268		}
4269
4270		if (eop) {
4271			--count;
4272			sendmp = rxr->fmp;
4273			sendmp->m_pkthdr.rcvif = ifp;
4274			ifp->if_ipackets++;
4275			em_receive_checksum(cur, sendmp);
4276#ifndef __NO_STRICT_ALIGNMENT
4277			if (adapter->max_frame_size >
4278			    (MCLBYTES - ETHER_ALIGN) &&
4279			    em_fixup_rx(rxr) != 0)
4280				goto skip;
4281#endif
4282			if (status & E1000_RXD_STAT_VP) {
4283				sendmp->m_pkthdr.ether_vtag =
4284				    (le16toh(cur->special) &
4285				    E1000_RXD_SPC_VLAN_MASK);
4286				sendmp->m_flags |= M_VLANTAG;
4287			}
4288#ifdef EM_MULTIQUEUE
4289			sendmp->m_pkthdr.flowid = rxr->msix;
4290			sendmp->m_flags |= M_FLOWID;
4291#endif
4292#ifndef __NO_STRICT_ALIGNMENT
4293skip:
4294#endif
4295			rxr->fmp = rxr->lmp = NULL;
4296		}
4297next_desc:
4298		/* Zero out the receive descriptors status. */
4299		cur->status = 0;
4300		++rxdone;	/* cumulative for POLL */
4301		++processed;
4302
4303		/* Advance our pointers to the next descriptor. */
4304		if (++i == adapter->num_rx_desc)
4305			i = 0;
4306
4307		/* Send to the stack */
4308		if (sendmp != NULL) {
4309			rxr->next_to_check = i;
4310			EM_RX_UNLOCK(rxr);
4311			(*ifp->if_input)(ifp, sendmp);
4312			EM_RX_LOCK(rxr);
4313			i = rxr->next_to_check;
4314		}
4315
4316		/* Only refresh mbufs every 8 descriptors */
4317		if (processed == 8) {
4318			em_refresh_mbufs(rxr, i);
4319			processed = 0;
4320		}
4321	}
4322
4323	/* Catch any remaining refresh work */
4324	if (e1000_rx_unrefreshed(rxr))
4325		em_refresh_mbufs(rxr, i);
4326
4327	rxr->next_to_check = i;
4328	if (done != NULL)
4329		*done = rxdone;
4330	EM_RX_UNLOCK(rxr);
4331
4332	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4333}
4334
4335static __inline void
4336em_rx_discard(struct rx_ring *rxr, int i)
4337{
4338	struct em_buffer	*rbuf;
4339
4340	rbuf = &rxr->rx_buffers[i];
4341	/* Free any previous pieces */
4342	if (rxr->fmp != NULL) {
4343		rxr->fmp->m_flags |= M_PKTHDR;
4344		m_freem(rxr->fmp);
4345		rxr->fmp = NULL;
4346		rxr->lmp = NULL;
4347	}
4348	/*
4349	** Free buffer and allow em_refresh_mbufs()
4350	** to clean up and recharge buffer.
4351	*/
4352	if (rbuf->m_head) {
4353		m_free(rbuf->m_head);
4354		rbuf->m_head = NULL;
4355	}
4356	return;
4357}
4358
4359#ifndef __NO_STRICT_ALIGNMENT
4360/*
4361 * When jumbo frames are enabled we should realign entire payload on
4362 * architecures with strict alignment. This is serious design mistake of 8254x
4363 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4364 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4365 * payload. On architecures without strict alignment restrictions 8254x still
4366 * performs unaligned memory access which would reduce the performance too.
4367 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4368 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4369 * existing mbuf chain.
4370 *
4371 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4372 * not used at all on architectures with strict alignment.
4373 */
4374static int
4375em_fixup_rx(struct rx_ring *rxr)
4376{
4377	struct adapter *adapter = rxr->adapter;
4378	struct mbuf *m, *n;
4379	int error;
4380
4381	error = 0;
4382	m = rxr->fmp;
4383	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4384		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4385		m->m_data += ETHER_HDR_LEN;
4386	} else {
4387		MGETHDR(n, M_DONTWAIT, MT_DATA);
4388		if (n != NULL) {
4389			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4390			m->m_data += ETHER_HDR_LEN;
4391			m->m_len -= ETHER_HDR_LEN;
4392			n->m_len = ETHER_HDR_LEN;
4393			M_MOVE_PKTHDR(n, m);
4394			n->m_next = m;
4395			rxr->fmp = n;
4396		} else {
4397			adapter->dropped_pkts++;
4398			m_freem(rxr->fmp);
4399			rxr->fmp = NULL;
4400			error = ENOMEM;
4401		}
4402	}
4403
4404	return (error);
4405}
4406#endif
4407
4408/*********************************************************************
4409 *
4410 *  Verify that the hardware indicated that the checksum is valid.
4411 *  Inform the stack about the status of checksum so that stack
4412 *  doesn't spend time verifying the checksum.
4413 *
4414 *********************************************************************/
4415static void
4416em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4417{
4418	/* Ignore Checksum bit is set */
4419	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4420		mp->m_pkthdr.csum_flags = 0;
4421		return;
4422	}
4423
4424	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4425		/* Did it pass? */
4426		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4427			/* IP Checksum Good */
4428			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4429			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4430
4431		} else {
4432			mp->m_pkthdr.csum_flags = 0;
4433		}
4434	}
4435
4436	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4437		/* Did it pass? */
4438		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4439			mp->m_pkthdr.csum_flags |=
4440			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4441			mp->m_pkthdr.csum_data = htons(0xffff);
4442		}
4443	}
4444}
4445
4446/*
4447 * This routine is run via an vlan
4448 * config EVENT
4449 */
4450static void
4451em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4452{
4453	struct adapter	*adapter = ifp->if_softc;
4454	u32		index, bit;
4455
4456	if (ifp->if_softc !=  arg)   /* Not our event */
4457		return;
4458
4459	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4460                return;
4461
4462	EM_CORE_LOCK(adapter);
4463	index = (vtag >> 5) & 0x7F;
4464	bit = vtag & 0x1F;
4465	adapter->shadow_vfta[index] |= (1 << bit);
4466	++adapter->num_vlans;
4467	/* Re-init to load the changes */
4468	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4469		em_init_locked(adapter);
4470	EM_CORE_UNLOCK(adapter);
4471}
4472
4473/*
4474 * This routine is run via an vlan
4475 * unconfig EVENT
4476 */
4477static void
4478em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4479{
4480	struct adapter	*adapter = ifp->if_softc;
4481	u32		index, bit;
4482
4483	if (ifp->if_softc !=  arg)
4484		return;
4485
4486	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4487                return;
4488
4489	EM_CORE_LOCK(adapter);
4490	index = (vtag >> 5) & 0x7F;
4491	bit = vtag & 0x1F;
4492	adapter->shadow_vfta[index] &= ~(1 << bit);
4493	--adapter->num_vlans;
4494	/* Re-init to load the changes */
4495	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4496		em_init_locked(adapter);
4497	EM_CORE_UNLOCK(adapter);
4498}
4499
4500static void
4501em_setup_vlan_hw_support(struct adapter *adapter)
4502{
4503	struct e1000_hw *hw = &adapter->hw;
4504	u32             reg;
4505
4506	/*
4507	** We get here thru init_locked, meaning
4508	** a soft reset, this has already cleared
4509	** the VFTA and other state, so if there
4510	** have been no vlan's registered do nothing.
4511	*/
4512	if (adapter->num_vlans == 0)
4513                return;
4514
4515	/*
4516	** A soft reset zero's out the VFTA, so
4517	** we need to repopulate it now.
4518	*/
4519	for (int i = 0; i < EM_VFTA_SIZE; i++)
4520                if (adapter->shadow_vfta[i] != 0)
4521			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4522                            i, adapter->shadow_vfta[i]);
4523
4524	reg = E1000_READ_REG(hw, E1000_CTRL);
4525	reg |= E1000_CTRL_VME;
4526	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4527
4528	/* Enable the Filter Table */
4529	reg = E1000_READ_REG(hw, E1000_RCTL);
4530	reg &= ~E1000_RCTL_CFIEN;
4531	reg |= E1000_RCTL_VFE;
4532	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4533}
4534
4535static void
4536em_enable_intr(struct adapter *adapter)
4537{
4538	struct e1000_hw *hw = &adapter->hw;
4539	u32 ims_mask = IMS_ENABLE_MASK;
4540
4541	if (hw->mac.type == e1000_82574) {
4542		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4543		ims_mask |= EM_MSIX_MASK;
4544	}
4545	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4546}
4547
4548static void
4549em_disable_intr(struct adapter *adapter)
4550{
4551	struct e1000_hw *hw = &adapter->hw;
4552
4553	if (hw->mac.type == e1000_82574)
4554		E1000_WRITE_REG(hw, EM_EIAC, 0);
4555	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4556}
4557
4558/*
4559 * Bit of a misnomer, what this really means is
4560 * to enable OS management of the system... aka
4561 * to disable special hardware management features
4562 */
4563static void
4564em_init_manageability(struct adapter *adapter)
4565{
4566	/* A shared code workaround */
4567#define E1000_82542_MANC2H E1000_MANC2H
4568	if (adapter->has_manage) {
4569		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4570		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4571
4572		/* disable hardware interception of ARP */
4573		manc &= ~(E1000_MANC_ARP_EN);
4574
4575                /* enable receiving management packets to the host */
4576		manc |= E1000_MANC_EN_MNG2HOST;
4577#define E1000_MNG2HOST_PORT_623 (1 << 5)
4578#define E1000_MNG2HOST_PORT_664 (1 << 6)
4579		manc2h |= E1000_MNG2HOST_PORT_623;
4580		manc2h |= E1000_MNG2HOST_PORT_664;
4581		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4582		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4583	}
4584}
4585
4586/*
4587 * Give control back to hardware management
4588 * controller if there is one.
4589 */
4590static void
4591em_release_manageability(struct adapter *adapter)
4592{
4593	if (adapter->has_manage) {
4594		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4595
4596		/* re-enable hardware interception of ARP */
4597		manc |= E1000_MANC_ARP_EN;
4598		manc &= ~E1000_MANC_EN_MNG2HOST;
4599
4600		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4601	}
4602}
4603
4604/*
4605 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4606 * For ASF and Pass Through versions of f/w this means
4607 * that the driver is loaded. For AMT version type f/w
4608 * this means that the network i/f is open.
4609 */
4610static void
4611em_get_hw_control(struct adapter *adapter)
4612{
4613	u32 ctrl_ext, swsm;
4614
4615	if (adapter->hw.mac.type == e1000_82573) {
4616		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4617		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4618		    swsm | E1000_SWSM_DRV_LOAD);
4619		return;
4620	}
4621	/* else */
4622	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4623	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4624	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4625	return;
4626}
4627
4628/*
4629 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4630 * For ASF and Pass Through versions of f/w this means that
4631 * the driver is no longer loaded. For AMT versions of the
4632 * f/w this means that the network i/f is closed.
4633 */
4634static void
4635em_release_hw_control(struct adapter *adapter)
4636{
4637	u32 ctrl_ext, swsm;
4638
4639	if (!adapter->has_manage)
4640		return;
4641
4642	if (adapter->hw.mac.type == e1000_82573) {
4643		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4644		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4645		    swsm & ~E1000_SWSM_DRV_LOAD);
4646		return;
4647	}
4648	/* else */
4649	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4650	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4651	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4652	return;
4653}
4654
4655static int
4656em_is_valid_ether_addr(u8 *addr)
4657{
4658	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4659
4660	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4661		return (FALSE);
4662	}
4663
4664	return (TRUE);
4665}
4666
4667/*
4668** Parse the interface capabilities with regard
4669** to both system management and wake-on-lan for
4670** later use.
4671*/
4672static void
4673em_get_wakeup(device_t dev)
4674{
4675	struct adapter	*adapter = device_get_softc(dev);
4676	u16		eeprom_data = 0, device_id, apme_mask;
4677
4678	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4679	apme_mask = EM_EEPROM_APME;
4680
4681	switch (adapter->hw.mac.type) {
4682	case e1000_82573:
4683	case e1000_82583:
4684		adapter->has_amt = TRUE;
4685		/* Falls thru */
4686	case e1000_82571:
4687	case e1000_82572:
4688	case e1000_80003es2lan:
4689		if (adapter->hw.bus.func == 1) {
4690			e1000_read_nvm(&adapter->hw,
4691			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4692			break;
4693		} else
4694			e1000_read_nvm(&adapter->hw,
4695			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4696		break;
4697	case e1000_ich8lan:
4698	case e1000_ich9lan:
4699	case e1000_ich10lan:
4700	case e1000_pchlan:
4701	case e1000_pch2lan:
4702		apme_mask = E1000_WUC_APME;
4703		adapter->has_amt = TRUE;
4704		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4705		break;
4706	default:
4707		e1000_read_nvm(&adapter->hw,
4708		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4709		break;
4710	}
4711	if (eeprom_data & apme_mask)
4712		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4713	/*
4714         * We have the eeprom settings, now apply the special cases
4715         * where the eeprom may be wrong or the board won't support
4716         * wake on lan on a particular port
4717	 */
4718	device_id = pci_get_device(dev);
4719        switch (device_id) {
4720	case E1000_DEV_ID_82571EB_FIBER:
4721		/* Wake events only supported on port A for dual fiber
4722		 * regardless of eeprom setting */
4723		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4724		    E1000_STATUS_FUNC_1)
4725			adapter->wol = 0;
4726		break;
4727	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4728	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4729	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4730                /* if quad port adapter, disable WoL on all but port A */
4731		if (global_quad_port_a != 0)
4732			adapter->wol = 0;
4733		/* Reset for multiple quad port adapters */
4734		if (++global_quad_port_a == 4)
4735			global_quad_port_a = 0;
4736                break;
4737	}
4738	return;
4739}
4740
4741
4742/*
4743 * Enable PCI Wake On Lan capability
4744 */
4745static void
4746em_enable_wakeup(device_t dev)
4747{
4748	struct adapter	*adapter = device_get_softc(dev);
4749	struct ifnet	*ifp = adapter->ifp;
4750	u32		pmc, ctrl, ctrl_ext, rctl;
4751	u16     	status;
4752
4753	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4754		return;
4755
4756	/* Advertise the wakeup capability */
4757	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4758	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4759	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4760	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4761
4762	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4763	    (adapter->hw.mac.type == e1000_pchlan) ||
4764	    (adapter->hw.mac.type == e1000_ich9lan) ||
4765	    (adapter->hw.mac.type == e1000_ich10lan))
4766		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4767
4768	/* Keep the laser running on Fiber adapters */
4769	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4770	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4771		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4772		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4773		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4774	}
4775
4776	/*
4777	** Determine type of Wakeup: note that wol
4778	** is set with all bits on by default.
4779	*/
4780	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4781		adapter->wol &= ~E1000_WUFC_MAG;
4782
4783	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4784		adapter->wol &= ~E1000_WUFC_MC;
4785	else {
4786		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4787		rctl |= E1000_RCTL_MPE;
4788		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4789	}
4790
4791	if ((adapter->hw.mac.type == e1000_pchlan) ||
4792	    (adapter->hw.mac.type == e1000_pch2lan)) {
4793		if (em_enable_phy_wakeup(adapter))
4794			return;
4795	} else {
4796		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4797		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4798	}
4799
4800	if (adapter->hw.phy.type == e1000_phy_igp_3)
4801		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4802
4803        /* Request PME */
4804        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4805	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4806	if (ifp->if_capenable & IFCAP_WOL)
4807		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4808        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4809
4810	return;
4811}
4812
4813/*
4814** WOL in the newer chipset interfaces (pchlan)
4815** require thing to be copied into the phy
4816*/
4817static int
4818em_enable_phy_wakeup(struct adapter *adapter)
4819{
4820	struct e1000_hw *hw = &adapter->hw;
4821	u32 mreg, ret = 0;
4822	u16 preg;
4823
4824	/* copy MAC RARs to PHY RARs */
4825	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4826
4827	/* copy MAC MTA to PHY MTA */
4828	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4829		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4830		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4831		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4832		    (u16)((mreg >> 16) & 0xFFFF));
4833	}
4834
4835	/* configure PHY Rx Control register */
4836	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4837	mreg = E1000_READ_REG(hw, E1000_RCTL);
4838	if (mreg & E1000_RCTL_UPE)
4839		preg |= BM_RCTL_UPE;
4840	if (mreg & E1000_RCTL_MPE)
4841		preg |= BM_RCTL_MPE;
4842	preg &= ~(BM_RCTL_MO_MASK);
4843	if (mreg & E1000_RCTL_MO_3)
4844		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4845				<< BM_RCTL_MO_SHIFT);
4846	if (mreg & E1000_RCTL_BAM)
4847		preg |= BM_RCTL_BAM;
4848	if (mreg & E1000_RCTL_PMCF)
4849		preg |= BM_RCTL_PMCF;
4850	mreg = E1000_READ_REG(hw, E1000_CTRL);
4851	if (mreg & E1000_CTRL_RFCE)
4852		preg |= BM_RCTL_RFCE;
4853	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4854
4855	/* enable PHY wakeup in MAC register */
4856	E1000_WRITE_REG(hw, E1000_WUC,
4857	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4858	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4859
4860	/* configure and enable PHY wakeup in PHY registers */
4861	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4862	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4863
4864	/* activate PHY wakeup */
4865	ret = hw->phy.ops.acquire(hw);
4866	if (ret) {
4867		printf("Could not acquire PHY\n");
4868		return ret;
4869	}
4870	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4871	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4872	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4873	if (ret) {
4874		printf("Could not read PHY page 769\n");
4875		goto out;
4876	}
4877	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4878	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4879	if (ret)
4880		printf("Could not set PHY Host Wakeup bit\n");
4881out:
4882	hw->phy.ops.release(hw);
4883
4884	return ret;
4885}
4886
4887static void
4888em_led_func(void *arg, int onoff)
4889{
4890	struct adapter	*adapter = arg;
4891
4892	EM_CORE_LOCK(adapter);
4893	if (onoff) {
4894		e1000_setup_led(&adapter->hw);
4895		e1000_led_on(&adapter->hw);
4896	} else {
4897		e1000_led_off(&adapter->hw);
4898		e1000_cleanup_led(&adapter->hw);
4899	}
4900	EM_CORE_UNLOCK(adapter);
4901}
4902
4903/*
4904** Disable the L0S and L1 LINK states
4905*/
4906static void
4907em_disable_aspm(struct adapter *adapter)
4908{
4909	int		base, reg;
4910	u16		link_cap,link_ctrl;
4911	device_t	dev = adapter->dev;
4912
4913	switch (adapter->hw.mac.type) {
4914		case e1000_82573:
4915		case e1000_82574:
4916		case e1000_82583:
4917			break;
4918		default:
4919			return;
4920	}
4921	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
4922		return;
4923	reg = base + PCIR_EXPRESS_LINK_CAP;
4924	link_cap = pci_read_config(dev, reg, 2);
4925	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4926		return;
4927	reg = base + PCIR_EXPRESS_LINK_CTL;
4928	link_ctrl = pci_read_config(dev, reg, 2);
4929	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4930	pci_write_config(dev, reg, link_ctrl, 2);
4931	return;
4932}
4933
4934/**********************************************************************
4935 *
4936 *  Update the board statistics counters.
4937 *
4938 **********************************************************************/
4939static void
4940em_update_stats_counters(struct adapter *adapter)
4941{
4942	struct ifnet   *ifp;
4943
4944	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4945	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4946		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4947		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4948	}
4949	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4950	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4951	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4952	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4953
4954	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4955	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4956	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4957	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4958	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4959	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4960	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4961	/*
4962	** For watchdog management we need to know if we have been
4963	** paused during the last interval, so capture that here.
4964	*/
4965	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4966	adapter->stats.xoffrxc += adapter->pause_frames;
4967	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4968	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4969	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4970	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4971	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4972	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4973	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4974	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4975	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4976	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4977	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4978	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4979
4980	/* For the 64-bit byte counters the low dword must be read first. */
4981	/* Both registers clear on the read of the high dword */
4982
4983	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4984	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4985	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4986	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4987
4988	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4989	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4990	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4991	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4992	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4993
4994	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4995	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4996
4997	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4998	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4999	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5000	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5001	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5002	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5003	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5004	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5005	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5006	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5007
5008	/* Interrupt Counts */
5009
5010	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5011	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5012	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5013	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5014	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5015	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5016	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5017	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5018	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5019
5020	if (adapter->hw.mac.type >= e1000_82543) {
5021		adapter->stats.algnerrc +=
5022		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5023		adapter->stats.rxerrc +=
5024		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5025		adapter->stats.tncrs +=
5026		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5027		adapter->stats.cexterr +=
5028		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5029		adapter->stats.tsctc +=
5030		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5031		adapter->stats.tsctfc +=
5032		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5033	}
5034	ifp = adapter->ifp;
5035
5036	ifp->if_collisions = adapter->stats.colc;
5037
5038	/* Rx Errors */
5039	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5040	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5041	    adapter->stats.ruc + adapter->stats.roc +
5042	    adapter->stats.mpc + adapter->stats.cexterr;
5043
5044	/* Tx Errors */
5045	ifp->if_oerrors = adapter->stats.ecol +
5046	    adapter->stats.latecol + adapter->watchdog_events;
5047}
5048
5049/* Export a single 32-bit register via a read-only sysctl. */
5050static int
5051em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5052{
5053	struct adapter *adapter;
5054	u_int val;
5055
5056	adapter = oidp->oid_arg1;
5057	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5058	return (sysctl_handle_int(oidp, &val, 0, req));
5059}
5060
5061/*
5062 * Add sysctl variables, one per statistic, to the system.
5063 */
5064static void
5065em_add_hw_stats(struct adapter *adapter)
5066{
5067	device_t dev = adapter->dev;
5068
5069	struct tx_ring *txr = adapter->tx_rings;
5070	struct rx_ring *rxr = adapter->rx_rings;
5071
5072	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5073	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5074	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5075	struct e1000_hw_stats *stats = &adapter->stats;
5076
5077	struct sysctl_oid *stat_node, *queue_node, *int_node;
5078	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5079
5080#define QUEUE_NAME_LEN 32
5081	char namebuf[QUEUE_NAME_LEN];
5082
5083	/* Driver Statistics */
5084	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5085			CTLFLAG_RD, &adapter->link_irq,
5086			"Link MSIX IRQ Handled");
5087	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5088			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5089			 "Std mbuf failed");
5090	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5091			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5092			 "Std mbuf cluster failed");
5093	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5094			CTLFLAG_RD, &adapter->dropped_pkts,
5095			"Driver dropped packets");
5096	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5097			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5098			"Driver tx dma failure in xmit");
5099	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5100			CTLFLAG_RD, &adapter->rx_overruns,
5101			"RX overruns");
5102	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5103			CTLFLAG_RD, &adapter->watchdog_events,
5104			"Watchdog timeouts");
5105
5106	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5107			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5108			em_sysctl_reg_handler, "IU",
5109			"Device Control Register");
5110	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5111			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5112			em_sysctl_reg_handler, "IU",
5113			"Receiver Control Register");
5114	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5115			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5116			"Flow Control High Watermark");
5117	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5118			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5119			"Flow Control Low Watermark");
5120
5121	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5122		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5123		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5124					    CTLFLAG_RD, NULL, "Queue Name");
5125		queue_list = SYSCTL_CHILDREN(queue_node);
5126
5127		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5128				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5129				E1000_TDH(txr->me),
5130				em_sysctl_reg_handler, "IU",
5131 				"Transmit Descriptor Head");
5132		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5133				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5134				E1000_TDT(txr->me),
5135				em_sysctl_reg_handler, "IU",
5136 				"Transmit Descriptor Tail");
5137		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5138				CTLFLAG_RD, &txr->tx_irq,
5139				"Queue MSI-X Transmit Interrupts");
5140		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5141				CTLFLAG_RD, &txr->no_desc_avail,
5142				"Queue No Descriptor Available");
5143
5144		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5145				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5146				E1000_RDH(rxr->me),
5147				em_sysctl_reg_handler, "IU",
5148				"Receive Descriptor Head");
5149		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5150				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5151				E1000_RDT(rxr->me),
5152				em_sysctl_reg_handler, "IU",
5153				"Receive Descriptor Tail");
5154		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5155				CTLFLAG_RD, &rxr->rx_irq,
5156				"Queue MSI-X Receive Interrupts");
5157	}
5158
5159	/* MAC stats get their own sub node */
5160
5161	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5162				    CTLFLAG_RD, NULL, "Statistics");
5163	stat_list = SYSCTL_CHILDREN(stat_node);
5164
5165	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5166			CTLFLAG_RD, &stats->ecol,
5167			"Excessive collisions");
5168	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5169			CTLFLAG_RD, &stats->scc,
5170			"Single collisions");
5171	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5172			CTLFLAG_RD, &stats->mcc,
5173			"Multiple collisions");
5174	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5175			CTLFLAG_RD, &stats->latecol,
5176			"Late collisions");
5177	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5178			CTLFLAG_RD, &stats->colc,
5179			"Collision Count");
5180	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5181			CTLFLAG_RD, &adapter->stats.symerrs,
5182			"Symbol Errors");
5183	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5184			CTLFLAG_RD, &adapter->stats.sec,
5185			"Sequence Errors");
5186	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5187			CTLFLAG_RD, &adapter->stats.dc,
5188			"Defer Count");
5189	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5190			CTLFLAG_RD, &adapter->stats.mpc,
5191			"Missed Packets");
5192	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5193			CTLFLAG_RD, &adapter->stats.rnbc,
5194			"Receive No Buffers");
5195	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5196			CTLFLAG_RD, &adapter->stats.ruc,
5197			"Receive Undersize");
5198	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5199			CTLFLAG_RD, &adapter->stats.rfc,
5200			"Fragmented Packets Received ");
5201	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5202			CTLFLAG_RD, &adapter->stats.roc,
5203			"Oversized Packets Received");
5204	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5205			CTLFLAG_RD, &adapter->stats.rjc,
5206			"Recevied Jabber");
5207	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5208			CTLFLAG_RD, &adapter->stats.rxerrc,
5209			"Receive Errors");
5210	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5211			CTLFLAG_RD, &adapter->stats.crcerrs,
5212			"CRC errors");
5213	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5214			CTLFLAG_RD, &adapter->stats.algnerrc,
5215			"Alignment Errors");
5216	/* On 82575 these are collision counts */
5217	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5218			CTLFLAG_RD, &adapter->stats.cexterr,
5219			"Collision/Carrier extension errors");
5220	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5221			CTLFLAG_RD, &adapter->stats.xonrxc,
5222			"XON Received");
5223	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5224			CTLFLAG_RD, &adapter->stats.xontxc,
5225			"XON Transmitted");
5226	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5227			CTLFLAG_RD, &adapter->stats.xoffrxc,
5228			"XOFF Received");
5229	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5230			CTLFLAG_RD, &adapter->stats.xofftxc,
5231			"XOFF Transmitted");
5232
5233	/* Packet Reception Stats */
5234	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5235			CTLFLAG_RD, &adapter->stats.tpr,
5236			"Total Packets Received ");
5237	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5238			CTLFLAG_RD, &adapter->stats.gprc,
5239			"Good Packets Received");
5240	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5241			CTLFLAG_RD, &adapter->stats.bprc,
5242			"Broadcast Packets Received");
5243	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5244			CTLFLAG_RD, &adapter->stats.mprc,
5245			"Multicast Packets Received");
5246	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5247			CTLFLAG_RD, &adapter->stats.prc64,
5248			"64 byte frames received ");
5249	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5250			CTLFLAG_RD, &adapter->stats.prc127,
5251			"65-127 byte frames received");
5252	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5253			CTLFLAG_RD, &adapter->stats.prc255,
5254			"128-255 byte frames received");
5255	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5256			CTLFLAG_RD, &adapter->stats.prc511,
5257			"256-511 byte frames received");
5258	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5259			CTLFLAG_RD, &adapter->stats.prc1023,
5260			"512-1023 byte frames received");
5261	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5262			CTLFLAG_RD, &adapter->stats.prc1522,
5263			"1023-1522 byte frames received");
5264 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5265 			CTLFLAG_RD, &adapter->stats.gorc,
5266 			"Good Octets Received");
5267
5268	/* Packet Transmission Stats */
5269 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5270 			CTLFLAG_RD, &adapter->stats.gotc,
5271 			"Good Octets Transmitted");
5272	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5273			CTLFLAG_RD, &adapter->stats.tpt,
5274			"Total Packets Transmitted");
5275	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5276			CTLFLAG_RD, &adapter->stats.gptc,
5277			"Good Packets Transmitted");
5278	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5279			CTLFLAG_RD, &adapter->stats.bptc,
5280			"Broadcast Packets Transmitted");
5281	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5282			CTLFLAG_RD, &adapter->stats.mptc,
5283			"Multicast Packets Transmitted");
5284	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5285			CTLFLAG_RD, &adapter->stats.ptc64,
5286			"64 byte frames transmitted ");
5287	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5288			CTLFLAG_RD, &adapter->stats.ptc127,
5289			"65-127 byte frames transmitted");
5290	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5291			CTLFLAG_RD, &adapter->stats.ptc255,
5292			"128-255 byte frames transmitted");
5293	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5294			CTLFLAG_RD, &adapter->stats.ptc511,
5295			"256-511 byte frames transmitted");
5296	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5297			CTLFLAG_RD, &adapter->stats.ptc1023,
5298			"512-1023 byte frames transmitted");
5299	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5300			CTLFLAG_RD, &adapter->stats.ptc1522,
5301			"1024-1522 byte frames transmitted");
5302	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5303			CTLFLAG_RD, &adapter->stats.tsctc,
5304			"TSO Contexts Transmitted");
5305	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5306			CTLFLAG_RD, &adapter->stats.tsctfc,
5307			"TSO Contexts Failed");
5308
5309
5310	/* Interrupt Stats */
5311
5312	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5313				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5314	int_list = SYSCTL_CHILDREN(int_node);
5315
5316	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5317			CTLFLAG_RD, &adapter->stats.iac,
5318			"Interrupt Assertion Count");
5319
5320	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5321			CTLFLAG_RD, &adapter->stats.icrxptc,
5322			"Interrupt Cause Rx Pkt Timer Expire Count");
5323
5324	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5325			CTLFLAG_RD, &adapter->stats.icrxatc,
5326			"Interrupt Cause Rx Abs Timer Expire Count");
5327
5328	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5329			CTLFLAG_RD, &adapter->stats.ictxptc,
5330			"Interrupt Cause Tx Pkt Timer Expire Count");
5331
5332	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5333			CTLFLAG_RD, &adapter->stats.ictxatc,
5334			"Interrupt Cause Tx Abs Timer Expire Count");
5335
5336	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5337			CTLFLAG_RD, &adapter->stats.ictxqec,
5338			"Interrupt Cause Tx Queue Empty Count");
5339
5340	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5341			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5342			"Interrupt Cause Tx Queue Min Thresh Count");
5343
5344	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5345			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5346			"Interrupt Cause Rx Desc Min Thresh Count");
5347
5348	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5349			CTLFLAG_RD, &adapter->stats.icrxoc,
5350			"Interrupt Cause Receiver Overrun Count");
5351}
5352
5353/**********************************************************************
5354 *
5355 *  This routine provides a way to dump out the adapter eeprom,
5356 *  often a useful debug/service tool. This only dumps the first
5357 *  32 words, stuff that matters is in that extent.
5358 *
5359 **********************************************************************/
5360static int
5361em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5362{
5363	struct adapter *adapter;
5364	int error;
5365	int result;
5366
5367	result = -1;
5368	error = sysctl_handle_int(oidp, &result, 0, req);
5369
5370	if (error || !req->newptr)
5371		return (error);
5372
5373	/*
5374	 * This value will cause a hex dump of the
5375	 * first 32 16-bit words of the EEPROM to
5376	 * the screen.
5377	 */
5378	if (result == 1) {
5379		adapter = (struct adapter *)arg1;
5380		em_print_nvm_info(adapter);
5381        }
5382
5383	return (error);
5384}
5385
5386static void
5387em_print_nvm_info(struct adapter *adapter)
5388{
5389	u16	eeprom_data;
5390	int	i, j, row = 0;
5391
5392	/* Its a bit crude, but it gets the job done */
5393	printf("\nInterface EEPROM Dump:\n");
5394	printf("Offset\n0x0000  ");
5395	for (i = 0, j = 0; i < 32; i++, j++) {
5396		if (j == 8) { /* Make the offset block */
5397			j = 0; ++row;
5398			printf("\n0x00%x0  ",row);
5399		}
5400		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5401		printf("%04x ", eeprom_data);
5402	}
5403	printf("\n");
5404}
5405
5406static int
5407em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5408{
5409	struct em_int_delay_info *info;
5410	struct adapter *adapter;
5411	u32 regval;
5412	int error, usecs, ticks;
5413
5414	info = (struct em_int_delay_info *)arg1;
5415	usecs = info->value;
5416	error = sysctl_handle_int(oidp, &usecs, 0, req);
5417	if (error != 0 || req->newptr == NULL)
5418		return (error);
5419	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5420		return (EINVAL);
5421	info->value = usecs;
5422	ticks = EM_USECS_TO_TICKS(usecs);
5423
5424	adapter = info->adapter;
5425
5426	EM_CORE_LOCK(adapter);
5427	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5428	regval = (regval & ~0xffff) | (ticks & 0xffff);
5429	/* Handle a few special cases. */
5430	switch (info->offset) {
5431	case E1000_RDTR:
5432		break;
5433	case E1000_TIDV:
5434		if (ticks == 0) {
5435			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5436			/* Don't write 0 into the TIDV register. */
5437			regval++;
5438		} else
5439			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5440		break;
5441	}
5442	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5443	EM_CORE_UNLOCK(adapter);
5444	return (0);
5445}
5446
5447static void
5448em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5449	const char *description, struct em_int_delay_info *info,
5450	int offset, int value)
5451{
5452	info->adapter = adapter;
5453	info->offset = offset;
5454	info->value = value;
5455	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5456	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5457	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5458	    info, 0, em_sysctl_int_delay, "I", description);
5459}
5460
5461static void
5462em_set_sysctl_value(struct adapter *adapter, const char *name,
5463	const char *description, int *limit, int value)
5464{
5465	*limit = value;
5466	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5467	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5468	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5469}
5470
5471static int
5472em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5473{
5474	struct adapter *adapter;
5475	int error;
5476	int result;
5477
5478	result = -1;
5479	error = sysctl_handle_int(oidp, &result, 0, req);
5480
5481	if (error || !req->newptr)
5482		return (error);
5483
5484	if (result == 1) {
5485		adapter = (struct adapter *)arg1;
5486		em_print_debug_info(adapter);
5487        }
5488
5489	return (error);
5490}
5491
5492/*
5493** This routine is meant to be fluid, add whatever is
5494** needed for debugging a problem.  -jfv
5495*/
5496static void
5497em_print_debug_info(struct adapter *adapter)
5498{
5499	device_t dev = adapter->dev;
5500	struct tx_ring *txr = adapter->tx_rings;
5501	struct rx_ring *rxr = adapter->rx_rings;
5502
5503	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5504		printf("Interface is RUNNING ");
5505	else
5506		printf("Interface is NOT RUNNING\n");
5507	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5508		printf("and ACTIVE\n");
5509	else
5510		printf("and INACTIVE\n");
5511
5512	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5513	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5514	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5515	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5516	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5517	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5518	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5519	device_printf(dev, "TX descriptors avail = %d\n",
5520	    txr->tx_avail);
5521	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5522	    txr->no_desc_avail);
5523	device_printf(dev, "RX discarded packets = %ld\n",
5524	    rxr->rx_discarded);
5525	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5526	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5527}
5528