if_em.c revision 206447
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 206447 2010-04-10 07:26:51Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.4";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static int	em_rxeof(struct rx_ring *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350static int em_enable_msix = TRUE;
351static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
352TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
354
355/* How many packets rxeof tries to clean at a time */
356static int em_rx_process_limit = 100;
357TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358
359/* Flow control setting - default to FULL */
360static int em_fc_setting = e1000_fc_full;
361TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
362
363/*
364** Shadow VFTA table, this is needed because
365** the real vlan filter table gets cleared during
366** a soft reset and the driver needs to be able
367** to repopulate it.
368*/
369static u32 em_shadow_vfta[EM_VFTA_SIZE];
370
371/* Global used in WOL setup with multiport cards */
372static int global_quad_port_a = 0;
373
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  em_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384em_probe(device_t dev)
385{
386	char		adapter_name[60];
387	u16		pci_vendor_id = 0;
388	u16		pci_device_id = 0;
389	u16		pci_subvendor_id = 0;
390	u16		pci_subdevice_id = 0;
391	em_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("em_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != EM_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = em_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				em_strings[ent->index],
415				em_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436em_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
446
447	/* SYSCTL stuff */
448	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
449	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
450	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
451	    em_sysctl_debug_info, "I", "Debug Information");
452
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_stats, "I", "Statistics");
457
458	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460	/* Determine hardware and mac info */
461	em_identify_hardware(adapter);
462
463	/* Setup PCI resources */
464	if (em_allocate_pci_resources(adapter)) {
465		device_printf(dev, "Allocation of PCI resources failed\n");
466		error = ENXIO;
467		goto err_pci;
468	}
469
470	/*
471	** For ICH8 and family we need to
472	** map the flash memory, and this
473	** must happen after the MAC is
474	** identified
475	*/
476	if ((adapter->hw.mac.type == e1000_ich8lan) ||
477	    (adapter->hw.mac.type == e1000_pchlan) ||
478	    (adapter->hw.mac.type == e1000_ich9lan) ||
479	    (adapter->hw.mac.type == e1000_ich10lan)) {
480		int rid = EM_BAR_TYPE_FLASH;
481		adapter->flash = bus_alloc_resource_any(dev,
482		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
483		if (adapter->flash == NULL) {
484			device_printf(dev, "Mapping of Flash failed\n");
485			error = ENXIO;
486			goto err_pci;
487		}
488		/* This is used in the shared code */
489		adapter->hw.flash_address = (u8 *)adapter->flash;
490		adapter->osdep.flash_bus_space_tag =
491		    rman_get_bustag(adapter->flash);
492		adapter->osdep.flash_bus_space_handle =
493		    rman_get_bushandle(adapter->flash);
494	}
495
496	/* Do Shared Code initialization */
497	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498		device_printf(dev, "Setup of Shared code failed\n");
499		error = ENXIO;
500		goto err_pci;
501	}
502
503	e1000_get_bus_info(&adapter->hw);
504
505	/* Set up some sysctls for the tunable interrupt delays */
506	em_add_int_delay_sysctl(adapter, "rx_int_delay",
507	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "tx_int_delay",
510	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513	    "receive interrupt delay limit in usecs",
514	    &adapter->rx_abs_int_delay,
515	    E1000_REGISTER(&adapter->hw, E1000_RADV),
516	    em_rx_abs_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518	    "transmit interrupt delay limit in usecs",
519	    &adapter->tx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_TADV),
521	    em_tx_abs_int_delay_dflt);
522
523	/* Sysctls for limiting the amount of work done in the taskqueue */
524	em_add_rx_process_limit(adapter, "rx_processing_limit",
525	    "max number of rx packets to process", &adapter->rx_process_limit,
526	    em_rx_process_limit);
527
528	/*
529	 * Validate number of transmit and receive descriptors. It
530	 * must not exceed hardware maximum, and must be multiple
531	 * of E1000_DBA_ALIGN.
532	 */
533	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536		    EM_DEFAULT_TXD, em_txd);
537		adapter->num_tx_desc = EM_DEFAULT_TXD;
538	} else
539		adapter->num_tx_desc = em_txd;
540
541	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544		    EM_DEFAULT_RXD, em_rxd);
545		adapter->num_rx_desc = EM_DEFAULT_RXD;
546	} else
547		adapter->num_rx_desc = em_rxd;
548
549	adapter->hw.mac.autoneg = DO_AUTO_NEG;
550	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553	/* Copper options */
554	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555		adapter->hw.phy.mdix = AUTO_ALL_MODES;
556		adapter->hw.phy.disable_polarity_correction = FALSE;
557		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558	}
559
560	/*
561	 * Set the frame limits assuming
562	 * standard ethernet sized frames.
563	 */
564	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567	/*
568	 * This controls when hardware reports transmit completion
569	 * status.
570	 */
571	adapter->hw.mac.report_tx_early = 1;
572
573	/*
574	** Get queue/ring memory
575	*/
576	if (em_allocate_queues(adapter)) {
577		error = ENOMEM;
578		goto err_pci;
579	}
580
581	/*
582	** Start from a known state, this is
583	** important in reading the nvm and
584	** mac from that.
585	*/
586	e1000_reset_hw(&adapter->hw);
587
588	/* Make sure we have a good EEPROM before we read from it */
589	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590		/*
591		** Some PCI-E parts fail the first check due to
592		** the link being in sleep state, call it again,
593		** if it fails a second time its a real issue.
594		*/
595		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596			device_printf(dev,
597			    "The EEPROM Checksum Is Not Valid\n");
598			error = EIO;
599			goto err_late;
600		}
601	}
602
603	/* Copy the permanent MAC address out of the EEPROM */
604	if (e1000_read_mac_addr(&adapter->hw) < 0) {
605		device_printf(dev, "EEPROM read error while reading MAC"
606		    " address\n");
607		error = EIO;
608		goto err_late;
609	}
610
611	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
612		device_printf(dev, "Invalid MAC address\n");
613		error = EIO;
614		goto err_late;
615	}
616
617	/*
618	**  Do interrupt configuration
619	*/
620	if (adapter->msix > 1) /* Do MSIX */
621		error = em_allocate_msix(adapter);
622	else  /* MSI or Legacy */
623		error = em_allocate_legacy(adapter);
624	if (error)
625		goto err_late;
626
627	/*
628	 * Get Wake-on-Lan and Management info for later use
629	 */
630	em_get_wakeup(dev);
631
632	/* Setup OS specific network interface */
633	em_setup_interface(dev, adapter);
634
635	em_reset(adapter);
636
637	/* Initialize statistics */
638	em_update_stats_counters(adapter);
639
640	adapter->hw.mac.get_link_status = 1;
641	em_update_link_status(adapter);
642
643	/* Indicate SOL/IDER usage */
644	if (e1000_check_reset_block(&adapter->hw))
645		device_printf(dev,
646		    "PHY reset is blocked due to SOL/IDER session.\n");
647
648	/* Register for VLAN events */
649	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
653
654	/* Non-AMT based hardware can now take control from firmware */
655	if (adapter->has_manage && !adapter->has_amt)
656		em_get_hw_control(adapter);
657
658	/* Tell the stack that the interface is not active */
659	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661	adapter->led_dev = led_create(em_led_func, adapter,
662	    device_get_nameunit(dev));
663
664	INIT_DEBUGOUT("em_attach: end");
665
666	return (0);
667
668err_late:
669	em_free_transmit_structures(adapter);
670	em_free_receive_structures(adapter);
671	em_release_hw_control(adapter);
672err_pci:
673	em_free_pci_resources(adapter);
674	EM_CORE_LOCK_DESTROY(adapter);
675
676	return (error);
677}
678
679/*********************************************************************
680 *  Device removal routine
681 *
682 *  The detach entry point is called when the driver is being removed.
683 *  This routine stops the adapter and deallocates all the resources
684 *  that were allocated for driver operation.
685 *
686 *  return 0 on success, positive on failure
687 *********************************************************************/
688
689static int
690em_detach(device_t dev)
691{
692	struct adapter	*adapter = device_get_softc(dev);
693	struct ifnet	*ifp = adapter->ifp;
694
695	INIT_DEBUGOUT("em_detach: begin");
696
697	/* Make sure VLANS are not using driver */
698	if (adapter->ifp->if_vlantrunk != NULL) {
699		device_printf(dev,"Vlan in use, detach first\n");
700		return (EBUSY);
701	}
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	EM_CORE_LOCK(adapter);
709	adapter->in_detach = 1;
710	em_stop(adapter);
711	EM_CORE_UNLOCK(adapter);
712	EM_CORE_LOCK_DESTROY(adapter);
713
714	e1000_phy_hw_reset(&adapter->hw);
715
716	em_release_manageability(adapter);
717	em_release_hw_control(adapter);
718
719	/* Unregister VLAN events */
720	if (adapter->vlan_attach != NULL)
721		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
722	if (adapter->vlan_detach != NULL)
723		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
724
725	ether_ifdetach(adapter->ifp);
726	callout_drain(&adapter->timer);
727
728	em_free_pci_resources(adapter);
729	bus_generic_detach(dev);
730	if_free(ifp);
731
732	em_free_transmit_structures(adapter);
733	em_free_receive_structures(adapter);
734
735	em_release_hw_control(adapter);
736
737	return (0);
738}
739
740/*********************************************************************
741 *
742 *  Shutdown entry point
743 *
744 **********************************************************************/
745
746static int
747em_shutdown(device_t dev)
748{
749	return em_suspend(dev);
750}
751
752/*
753 * Suspend/resume device methods.
754 */
755static int
756em_suspend(device_t dev)
757{
758	struct adapter *adapter = device_get_softc(dev);
759
760	EM_CORE_LOCK(adapter);
761
762        em_release_manageability(adapter);
763	em_release_hw_control(adapter);
764	em_enable_wakeup(dev);
765
766	EM_CORE_UNLOCK(adapter);
767
768	return bus_generic_suspend(dev);
769}
770
771static int
772em_resume(device_t dev)
773{
774	struct adapter *adapter = device_get_softc(dev);
775	struct ifnet *ifp = adapter->ifp;
776
777	if (adapter->led_dev != NULL)
778		led_destroy(adapter->led_dev);
779
780	EM_CORE_LOCK(adapter);
781	em_init_locked(adapter);
782	em_init_manageability(adapter);
783	EM_CORE_UNLOCK(adapter);
784	em_start(ifp);
785
786	return bus_generic_resume(dev);
787}
788
789
790/*********************************************************************
791 *  Transmit entry point
792 *
793 *  em_start is called by the stack to initiate a transmit.
794 *  The driver will remain in this routine as long as there are
795 *  packets to transmit and transmit resources are available.
796 *  In case resources are not available stack is notified and
797 *  the packet is requeued.
798 **********************************************************************/
799
800#ifdef EM_MULTIQUEUE
801static int
802em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803{
804	struct adapter  *adapter = txr->adapter;
805        struct mbuf     *next;
806        int             err = 0, enq = 0;
807
808	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810		if (m != NULL)
811			err = drbr_enqueue(ifp, txr->br, m);
812		return (err);
813	}
814
815        /* Call cleanup if number of TX descriptors low */
816	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817		em_txeof(txr);
818
819	enq = 0;
820	if (m == NULL) {
821		next = drbr_dequeue(ifp, txr->br);
822	} else if (drbr_needs_enqueue(ifp, txr->br)) {
823		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824			return (err);
825		next = drbr_dequeue(ifp, txr->br);
826	} else
827		next = m;
828
829	/* Process the queue */
830	while (next != NULL) {
831		if ((err = em_xmit(txr, &next)) != 0) {
832                        if (next != NULL)
833                                err = drbr_enqueue(ifp, txr->br, next);
834                        break;
835		}
836		enq++;
837		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838		ETHER_BPF_MTAP(ifp, next);
839		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                        break;
841		if (txr->tx_avail < EM_MAX_SCATTER) {
842			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843			break;
844		}
845		next = drbr_dequeue(ifp, txr->br);
846	}
847
848	if (enq > 0) {
849                /* Set the watchdog */
850                txr->watchdog_check = TRUE;
851		txr->watchdog_time = ticks;
852	}
853	return (err);
854}
855
856/*
857** Multiqueue capable stack interface, this is not
858** yet truely multiqueue, but that is coming...
859*/
860static int
861em_mq_start(struct ifnet *ifp, struct mbuf *m)
862{
863	struct adapter	*adapter = ifp->if_softc;
864	struct tx_ring	*txr;
865	int 		i, error = 0;
866
867	/* Which queue to use */
868	if ((m->m_flags & M_FLOWID) != 0)
869                i = m->m_pkthdr.flowid % adapter->num_queues;
870	else
871		i = curcpu % adapter->num_queues;
872
873	txr = &adapter->tx_rings[i];
874
875	if (EM_TX_TRYLOCK(txr)) {
876		error = em_mq_start_locked(ifp, txr, m);
877		EM_TX_UNLOCK(txr);
878	} else
879		error = drbr_enqueue(ifp, txr->br, m);
880
881	return (error);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888em_qflush(struct ifnet *ifp)
889{
890	struct adapter  *adapter = ifp->if_softc;
891	struct tx_ring  *txr = adapter->tx_rings;
892	struct mbuf     *m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		EM_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		EM_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902
903#endif /* EM_MULTIQUEUE */
904
905static void
906em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907{
908	struct adapter	*adapter = ifp->if_softc;
909	struct mbuf	*m_head;
910
911	EM_TX_LOCK_ASSERT(txr);
912
913	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914	    IFF_DRV_RUNNING)
915		return;
916
917	if (!adapter->link_active)
918		return;
919
920        /* Call cleanup if number of TX descriptors low */
921	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922		em_txeof(txr);
923
924	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925		if (txr->tx_avail < EM_MAX_SCATTER) {
926			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927			break;
928		}
929                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930		if (m_head == NULL)
931			break;
932		/*
933		 *  Encapsulation can modify our pointer, and or make it
934		 *  NULL on failure.  In that event, we can't requeue.
935		 */
936		if (em_xmit(txr, &m_head)) {
937			if (m_head == NULL)
938				break;
939			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941			break;
942		}
943
944		/* Send a copy of the frame to the BPF listener */
945		ETHER_BPF_MTAP(ifp, m_head);
946
947		/* Set timeout in case hardware has problems transmitting. */
948		txr->watchdog_time = ticks;
949		txr->watchdog_check = TRUE;
950	}
951
952	return;
953}
954
955static void
956em_start(struct ifnet *ifp)
957{
958	struct adapter	*adapter = ifp->if_softc;
959	struct tx_ring	*txr = adapter->tx_rings;
960
961	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962		EM_TX_LOCK(txr);
963		em_start_locked(ifp, txr);
964		EM_TX_UNLOCK(txr);
965	}
966	return;
967}
968
969/*********************************************************************
970 *  Ioctl entry point
971 *
972 *  em_ioctl is called when the user wants to configure the
973 *  interface.
974 *
975 *  return 0 on success, positive on failure
976 **********************************************************************/
977
978static int
979em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980{
981	struct adapter	*adapter = ifp->if_softc;
982	struct ifreq *ifr = (struct ifreq *)data;
983#ifdef INET
984	struct ifaddr *ifa = (struct ifaddr *)data;
985#endif
986	int error = 0;
987
988	if (adapter->in_detach)
989		return (error);
990
991	switch (command) {
992	case SIOCSIFADDR:
993#ifdef INET
994		if (ifa->ifa_addr->sa_family == AF_INET) {
995			/*
996			 * XXX
997			 * Since resetting hardware takes a very long time
998			 * and results in link renegotiation we only
999			 * initialize the hardware only when it is absolutely
1000			 * required.
1001			 */
1002			ifp->if_flags |= IFF_UP;
1003			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004				EM_CORE_LOCK(adapter);
1005				em_init_locked(adapter);
1006				EM_CORE_UNLOCK(adapter);
1007			}
1008			arp_ifinit(ifp, ifa);
1009		} else
1010#endif
1011			error = ether_ioctl(ifp, command, data);
1012		break;
1013	case SIOCSIFMTU:
1014	    {
1015		int max_frame_size;
1016
1017		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018
1019		EM_CORE_LOCK(adapter);
1020		switch (adapter->hw.mac.type) {
1021		case e1000_82571:
1022		case e1000_82572:
1023		case e1000_ich9lan:
1024		case e1000_ich10lan:
1025		case e1000_82574:
1026		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1027			max_frame_size = 9234;
1028			break;
1029		case e1000_pchlan:
1030			max_frame_size = 4096;
1031			break;
1032			/* Adapters that do not support jumbo frames */
1033		case e1000_82583:
1034		case e1000_ich8lan:
1035			max_frame_size = ETHER_MAX_LEN;
1036			break;
1037		default:
1038			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039		}
1040		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041		    ETHER_CRC_LEN) {
1042			EM_CORE_UNLOCK(adapter);
1043			error = EINVAL;
1044			break;
1045		}
1046
1047		ifp->if_mtu = ifr->ifr_mtu;
1048		adapter->max_frame_size =
1049		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050		em_init_locked(adapter);
1051		EM_CORE_UNLOCK(adapter);
1052		break;
1053	    }
1054	case SIOCSIFFLAGS:
1055		IOCTL_DEBUGOUT("ioctl rcv'd:\
1056		    SIOCSIFFLAGS (Set Interface Flags)");
1057		EM_CORE_LOCK(adapter);
1058		if (ifp->if_flags & IFF_UP) {
1059			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060				if ((ifp->if_flags ^ adapter->if_flags) &
1061				    (IFF_PROMISC | IFF_ALLMULTI)) {
1062					em_disable_promisc(adapter);
1063					em_set_promisc(adapter);
1064				}
1065			} else
1066				em_init_locked(adapter);
1067		} else
1068			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069				em_stop(adapter);
1070		adapter->if_flags = ifp->if_flags;
1071		EM_CORE_UNLOCK(adapter);
1072		break;
1073	case SIOCADDMULTI:
1074	case SIOCDELMULTI:
1075		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077			EM_CORE_LOCK(adapter);
1078			em_disable_intr(adapter);
1079			em_set_multi(adapter);
1080#ifdef DEVICE_POLLING
1081			if (!(ifp->if_capenable & IFCAP_POLLING))
1082#endif
1083				em_enable_intr(adapter);
1084			EM_CORE_UNLOCK(adapter);
1085		}
1086		break;
1087	case SIOCSIFMEDIA:
1088		/* Check SOL/IDER usage */
1089		EM_CORE_LOCK(adapter);
1090		if (e1000_check_reset_block(&adapter->hw)) {
1091			EM_CORE_UNLOCK(adapter);
1092			device_printf(adapter->dev, "Media change is"
1093			    " blocked due to SOL/IDER session.\n");
1094			break;
1095		}
1096		EM_CORE_UNLOCK(adapter);
1097	case SIOCGIFMEDIA:
1098		IOCTL_DEBUGOUT("ioctl rcv'd: \
1099		    SIOCxIFMEDIA (Get/Set Interface Media)");
1100		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101		break;
1102	case SIOCSIFCAP:
1103	    {
1104		int mask, reinit;
1105
1106		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107		reinit = 0;
1108		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109#ifdef DEVICE_POLLING
1110		if (mask & IFCAP_POLLING) {
1111			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112				error = ether_poll_register(em_poll, ifp);
1113				if (error)
1114					return (error);
1115				EM_CORE_LOCK(adapter);
1116				em_disable_intr(adapter);
1117				ifp->if_capenable |= IFCAP_POLLING;
1118				EM_CORE_UNLOCK(adapter);
1119			} else {
1120				error = ether_poll_deregister(ifp);
1121				/* Enable interrupt even in error case */
1122				EM_CORE_LOCK(adapter);
1123				em_enable_intr(adapter);
1124				ifp->if_capenable &= ~IFCAP_POLLING;
1125				EM_CORE_UNLOCK(adapter);
1126			}
1127		}
1128#endif
1129		if (mask & IFCAP_HWCSUM) {
1130			ifp->if_capenable ^= IFCAP_HWCSUM;
1131			reinit = 1;
1132		}
1133		if (mask & IFCAP_TSO4) {
1134			ifp->if_capenable ^= IFCAP_TSO4;
1135			reinit = 1;
1136		}
1137		if (mask & IFCAP_VLAN_HWTAGGING) {
1138			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139			reinit = 1;
1140		}
1141		if (mask & IFCAP_VLAN_HWFILTER) {
1142			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143			reinit = 1;
1144		}
1145		if ((mask & IFCAP_WOL) &&
1146		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147			if (mask & IFCAP_WOL_MCAST)
1148				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149			if (mask & IFCAP_WOL_MAGIC)
1150				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151		}
1152		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153			em_init(adapter);
1154		VLAN_CAPABILITIES(ifp);
1155		break;
1156	    }
1157
1158	default:
1159		error = ether_ioctl(ifp, command, data);
1160		break;
1161	}
1162
1163	return (error);
1164}
1165
1166
1167/*********************************************************************
1168 *  Init entry point
1169 *
1170 *  This routine is used in two ways. It is used by the stack as
1171 *  init entry point in network interface structure. It is also used
1172 *  by the driver as a hw/sw initialization routine to get to a
1173 *  consistent state.
1174 *
1175 *  return 0 on success, positive on failure
1176 **********************************************************************/
1177
1178static void
1179em_init_locked(struct adapter *adapter)
1180{
1181	struct ifnet	*ifp = adapter->ifp;
1182	device_t	dev = adapter->dev;
1183	u32		pba;
1184
1185	INIT_DEBUGOUT("em_init: begin");
1186
1187	EM_CORE_LOCK_ASSERT(adapter);
1188
1189	em_disable_intr(adapter);
1190	callout_stop(&adapter->timer);
1191
1192	/*
1193	 * Packet Buffer Allocation (PBA)
1194	 * Writing PBA sets the receive portion of the buffer
1195	 * the remainder is used for the transmit buffer.
1196	 */
1197	switch (adapter->hw.mac.type) {
1198	/* Total Packet Buffer on these is 48K */
1199	case e1000_82571:
1200	case e1000_82572:
1201	case e1000_80003es2lan:
1202			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203		break;
1204	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206		break;
1207	case e1000_82574:
1208	case e1000_82583:
1209			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210		break;
1211	case e1000_ich9lan:
1212	case e1000_ich10lan:
1213	case e1000_pchlan:
1214		pba = E1000_PBA_10K;
1215		break;
1216	case e1000_ich8lan:
1217		pba = E1000_PBA_8K;
1218		break;
1219	default:
1220		if (adapter->max_frame_size > 8192)
1221			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222		else
1223			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224	}
1225
1226	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228
1229	/* Get the latest mac address, User can use a LAA */
1230        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231              ETHER_ADDR_LEN);
1232
1233	/* Put the address into the Receive Address Array */
1234	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235
1236	/*
1237	 * With the 82571 adapter, RAR[0] may be overwritten
1238	 * when the other port is reset, we make a duplicate
1239	 * in RAR[14] for that eventuality, this assures
1240	 * the interface continues to function.
1241	 */
1242	if (adapter->hw.mac.type == e1000_82571) {
1243		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245		    E1000_RAR_ENTRIES - 1);
1246	}
1247
1248	/* Initialize the hardware */
1249	em_reset(adapter);
1250	em_update_link_status(adapter);
1251
1252	/* Setup VLAN support, basic and offload if available */
1253	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254
1255	/* Use real VLAN Filter support? */
1256	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258			/* Use real VLAN Filter support */
1259			em_setup_vlan_hw_support(adapter);
1260		else {
1261			u32 ctrl;
1262			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263			ctrl |= E1000_CTRL_VME;
1264			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265		}
1266	}
1267
1268	/* Set hardware offload abilities */
1269	ifp->if_hwassist = 0;
1270	if (ifp->if_capenable & IFCAP_TXCSUM)
1271		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272	if (ifp->if_capenable & IFCAP_TSO4)
1273		ifp->if_hwassist |= CSUM_TSO;
1274
1275	/* Configure for OS presence */
1276	em_init_manageability(adapter);
1277
1278	/* Prepare transmit descriptors and buffers */
1279	em_setup_transmit_structures(adapter);
1280	em_initialize_transmit_unit(adapter);
1281
1282	/* Setup Multicast table */
1283	em_set_multi(adapter);
1284
1285	/* Prepare receive descriptors and buffers */
1286	if (em_setup_receive_structures(adapter)) {
1287		device_printf(dev, "Could not setup receive structures\n");
1288		em_stop(adapter);
1289		return;
1290	}
1291	em_initialize_receive_unit(adapter);
1292
1293	/* Don't lose promiscuous settings */
1294	em_set_promisc(adapter);
1295
1296	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298
1299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301
1302	/* MSI/X configuration for 82574 */
1303	if (adapter->hw.mac.type == e1000_82574) {
1304		int tmp;
1305		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306		tmp |= E1000_CTRL_EXT_PBA_CLR;
1307		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308		/* Set the IVAR - interrupt vector routing. */
1309		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310	}
1311
1312#ifdef DEVICE_POLLING
1313	/*
1314	 * Only enable interrupts if we are not polling, make sure
1315	 * they are off otherwise.
1316	 */
1317	if (ifp->if_capenable & IFCAP_POLLING)
1318		em_disable_intr(adapter);
1319	else
1320#endif /* DEVICE_POLLING */
1321		em_enable_intr(adapter);
1322
1323	/* AMT based hardware can now take control from firmware */
1324	if (adapter->has_manage && adapter->has_amt)
1325		em_get_hw_control(adapter);
1326
1327	/* Don't reset the phy next time init gets called */
1328	adapter->hw.phy.reset_disable = TRUE;
1329}
1330
1331static void
1332em_init(void *arg)
1333{
1334	struct adapter *adapter = arg;
1335
1336	EM_CORE_LOCK(adapter);
1337	em_init_locked(adapter);
1338	EM_CORE_UNLOCK(adapter);
1339}
1340
1341
1342#ifdef DEVICE_POLLING
1343/*********************************************************************
1344 *
1345 *  Legacy polling routine: note this only works with single queue
1346 *
1347 *********************************************************************/
1348static int
1349em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350{
1351	struct adapter *adapter = ifp->if_softc;
1352	struct tx_ring	*txr = adapter->tx_rings;
1353	struct rx_ring	*rxr = adapter->rx_rings;
1354	u32		reg_icr, rx_done = 0;
1355
1356	EM_CORE_LOCK(adapter);
1357	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1358		EM_CORE_UNLOCK(adapter);
1359		return (rx_done);
1360	}
1361
1362	if (cmd == POLL_AND_CHECK_STATUS) {
1363		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1365			callout_stop(&adapter->timer);
1366			adapter->hw.mac.get_link_status = 1;
1367			em_update_link_status(adapter);
1368			callout_reset(&adapter->timer, hz,
1369			    em_local_timer, adapter);
1370		}
1371	}
1372	EM_CORE_UNLOCK(adapter);
1373
1374	rx_done = em_rxeof(rxr, count);
1375
1376	EM_TX_LOCK(txr);
1377	em_txeof(txr);
1378#ifdef EM_MULTIQUEUE
1379	if (!drbr_empty(ifp, txr->br))
1380		em_mq_start_locked(ifp, txr, NULL);
1381#else
1382	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1383		em_start_locked(ifp, txr);
1384#endif
1385	EM_TX_UNLOCK(txr);
1386
1387	return (rx_done);
1388}
1389#endif /* DEVICE_POLLING */
1390
1391
1392/*********************************************************************
1393 *
1394 *  Fast Legacy/MSI Combined Interrupt Service routine
1395 *
1396 *********************************************************************/
1397static int
1398em_irq_fast(void *arg)
1399{
1400	struct adapter	*adapter = arg;
1401	struct ifnet	*ifp;
1402	u32		reg_icr;
1403
1404	ifp = adapter->ifp;
1405
1406	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1407
1408	/* Hot eject?  */
1409	if (reg_icr == 0xffffffff)
1410		return FILTER_STRAY;
1411
1412	/* Definitely not our interrupt.  */
1413	if (reg_icr == 0x0)
1414		return FILTER_STRAY;
1415
1416	/*
1417	 * Starting with the 82571 chip, bit 31 should be used to
1418	 * determine whether the interrupt belongs to us.
1419	 */
1420	if (adapter->hw.mac.type >= e1000_82571 &&
1421	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1422		return FILTER_STRAY;
1423
1424	em_disable_intr(adapter);
1425	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1426
1427	/* Link status change */
1428	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1429		adapter->hw.mac.get_link_status = 1;
1430		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1431	}
1432
1433	if (reg_icr & E1000_ICR_RXO)
1434		adapter->rx_overruns++;
1435	return FILTER_HANDLED;
1436}
1437
1438/* Combined RX/TX handler, used by Legacy and MSI */
1439static void
1440em_handle_que(void *context, int pending)
1441{
1442	struct adapter	*adapter = context;
1443	struct ifnet	*ifp = adapter->ifp;
1444	struct tx_ring	*txr = adapter->tx_rings;
1445	struct rx_ring	*rxr = adapter->rx_rings;
1446	bool		more_rx;
1447
1448
1449	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1450		more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1451		EM_TX_LOCK(txr);
1452		em_txeof(txr);
1453#ifdef EM_MULTIQUEUE
1454		if (!drbr_empty(ifp, txr->br))
1455			em_mq_start_locked(ifp, txr, NULL);
1456#else
1457		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1458			em_start_locked(ifp, txr);
1459#endif
1460		EM_TX_UNLOCK(txr);
1461		if (more_rx) {
1462			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1463			return;
1464		}
1465	}
1466
1467	em_enable_intr(adapter);
1468	return;
1469}
1470
1471
1472/*********************************************************************
1473 *
1474 *  MSIX Interrupt Service Routines
1475 *
1476 **********************************************************************/
1477static void
1478em_msix_tx(void *arg)
1479{
1480	struct tx_ring *txr = arg;
1481	struct adapter *adapter = txr->adapter;
1482
1483	++txr->tx_irq;
1484	EM_TX_LOCK(txr);
1485	em_txeof(txr);
1486	EM_TX_UNLOCK(txr);
1487	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1488	return;
1489}
1490
1491/*********************************************************************
1492 *
1493 *  MSIX RX Interrupt Service routine
1494 *
1495 **********************************************************************/
1496
1497static void
1498em_msix_rx(void *arg)
1499{
1500	struct rx_ring	*rxr = arg;
1501	struct adapter	*adapter = rxr->adapter;
1502	bool		more;
1503
1504	EM_RX_LOCK(rxr);
1505	++rxr->rx_irq;
1506	more = em_rxeof(rxr, adapter->rx_process_limit);
1507	EM_RX_UNLOCK(rxr);
1508	if (more)
1509		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1510	else
1511		/* Reenable this interrupt */
1512		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1513	return;
1514}
1515
1516/*********************************************************************
1517 *
1518 *  MSIX Link Fast Interrupt Service routine
1519 *
1520 **********************************************************************/
1521static void
1522em_msix_link(void *arg)
1523{
1524	struct adapter	*adapter = arg;
1525	u32		reg_icr;
1526
1527	++adapter->link_irq;
1528	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1529
1530	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1531		adapter->hw.mac.get_link_status = 1;
1532		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1533	} else
1534		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1535		    EM_MSIX_LINK | E1000_IMS_LSC);
1536	return;
1537}
1538
1539static void
1540em_handle_rx(void *context, int pending)
1541{
1542	struct rx_ring	*rxr = context;
1543	struct adapter	*adapter = rxr->adapter;
1544        bool            more;
1545
1546	EM_RX_LOCK(rxr);
1547	more = em_rxeof(rxr, adapter->rx_process_limit);
1548	EM_RX_UNLOCK(rxr);
1549	if (more)
1550		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1551	else
1552		/* Reenable this interrupt */
1553		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1554}
1555
1556static void
1557em_handle_tx(void *context, int pending)
1558{
1559	struct tx_ring	*txr = context;
1560	struct adapter	*adapter = txr->adapter;
1561	struct ifnet	*ifp = adapter->ifp;
1562
1563	if (!EM_TX_TRYLOCK(txr))
1564		return;
1565
1566	em_txeof(txr);
1567
1568#ifdef EM_MULTIQUEUE
1569	if (!drbr_empty(ifp, txr->br))
1570		em_mq_start_locked(ifp, txr, NULL);
1571#else
1572	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1573		em_start_locked(ifp, txr);
1574#endif
1575	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1576	EM_TX_UNLOCK(txr);
1577}
1578
1579static void
1580em_handle_link(void *context, int pending)
1581{
1582	struct adapter	*adapter = context;
1583	struct ifnet *ifp = adapter->ifp;
1584
1585	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1586		return;
1587
1588	EM_CORE_LOCK(adapter);
1589	callout_stop(&adapter->timer);
1590	em_update_link_status(adapter);
1591	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1592	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1593	    EM_MSIX_LINK | E1000_IMS_LSC);
1594	EM_CORE_UNLOCK(adapter);
1595}
1596
1597
1598/*********************************************************************
1599 *
1600 *  Media Ioctl callback
1601 *
1602 *  This routine is called whenever the user queries the status of
1603 *  the interface using ifconfig.
1604 *
1605 **********************************************************************/
1606static void
1607em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1608{
1609	struct adapter *adapter = ifp->if_softc;
1610	u_char fiber_type = IFM_1000_SX;
1611
1612	INIT_DEBUGOUT("em_media_status: begin");
1613
1614	EM_CORE_LOCK(adapter);
1615	em_update_link_status(adapter);
1616
1617	ifmr->ifm_status = IFM_AVALID;
1618	ifmr->ifm_active = IFM_ETHER;
1619
1620	if (!adapter->link_active) {
1621		EM_CORE_UNLOCK(adapter);
1622		return;
1623	}
1624
1625	ifmr->ifm_status |= IFM_ACTIVE;
1626
1627	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1628	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1629		ifmr->ifm_active |= fiber_type | IFM_FDX;
1630	} else {
1631		switch (adapter->link_speed) {
1632		case 10:
1633			ifmr->ifm_active |= IFM_10_T;
1634			break;
1635		case 100:
1636			ifmr->ifm_active |= IFM_100_TX;
1637			break;
1638		case 1000:
1639			ifmr->ifm_active |= IFM_1000_T;
1640			break;
1641		}
1642		if (adapter->link_duplex == FULL_DUPLEX)
1643			ifmr->ifm_active |= IFM_FDX;
1644		else
1645			ifmr->ifm_active |= IFM_HDX;
1646	}
1647	EM_CORE_UNLOCK(adapter);
1648}
1649
1650/*********************************************************************
1651 *
1652 *  Media Ioctl callback
1653 *
1654 *  This routine is called when the user changes speed/duplex using
1655 *  media/mediopt option with ifconfig.
1656 *
1657 **********************************************************************/
1658static int
1659em_media_change(struct ifnet *ifp)
1660{
1661	struct adapter *adapter = ifp->if_softc;
1662	struct ifmedia  *ifm = &adapter->media;
1663
1664	INIT_DEBUGOUT("em_media_change: begin");
1665
1666	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1667		return (EINVAL);
1668
1669	EM_CORE_LOCK(adapter);
1670	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1671	case IFM_AUTO:
1672		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1673		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1674		break;
1675	case IFM_1000_LX:
1676	case IFM_1000_SX:
1677	case IFM_1000_T:
1678		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1679		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1680		break;
1681	case IFM_100_TX:
1682		adapter->hw.mac.autoneg = FALSE;
1683		adapter->hw.phy.autoneg_advertised = 0;
1684		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1685			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1686		else
1687			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1688		break;
1689	case IFM_10_T:
1690		adapter->hw.mac.autoneg = FALSE;
1691		adapter->hw.phy.autoneg_advertised = 0;
1692		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1693			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1694		else
1695			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1696		break;
1697	default:
1698		device_printf(adapter->dev, "Unsupported media type\n");
1699	}
1700
1701	/* As the speed/duplex settings my have changed we need to
1702	 * reset the PHY.
1703	 */
1704	adapter->hw.phy.reset_disable = FALSE;
1705
1706	em_init_locked(adapter);
1707	EM_CORE_UNLOCK(adapter);
1708
1709	return (0);
1710}
1711
1712/*********************************************************************
1713 *
1714 *  This routine maps the mbufs to tx descriptors.
1715 *
1716 *  return 0 on success, positive on failure
1717 **********************************************************************/
1718
1719static int
1720em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1721{
1722	struct adapter		*adapter = txr->adapter;
1723	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1724	bus_dmamap_t		map;
1725	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1726	struct e1000_tx_desc	*ctxd = NULL;
1727	struct mbuf		*m_head;
1728	u32			txd_upper, txd_lower, txd_used, txd_saved;
1729	int			nsegs, i, j, first, last = 0;
1730	int			error, do_tso, tso_desc = 0;
1731
1732	m_head = *m_headp;
1733	txd_upper = txd_lower = txd_used = txd_saved = 0;
1734	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1735
1736	/*
1737	 * TSO workaround:
1738	 *  If an mbuf is only header we need
1739	 *     to pull 4 bytes of data into it.
1740	 */
1741	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1742		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1743		*m_headp = m_head;
1744		if (m_head == NULL)
1745			return (ENOBUFS);
1746	}
1747
1748	/*
1749	 * Map the packet for DMA
1750	 *
1751	 * Capture the first descriptor index,
1752	 * this descriptor will have the index
1753	 * of the EOP which is the only one that
1754	 * now gets a DONE bit writeback.
1755	 */
1756	first = txr->next_avail_desc;
1757	tx_buffer = &txr->tx_buffers[first];
1758	tx_buffer_mapped = tx_buffer;
1759	map = tx_buffer->map;
1760
1761	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1762	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1763
1764	/*
1765	 * There are two types of errors we can (try) to handle:
1766	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1767	 *   out of segments.  Defragment the mbuf chain and try again.
1768	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1769	 *   at this point in time.  Defer sending and try again later.
1770	 * All other errors, in particular EINVAL, are fatal and prevent the
1771	 * mbuf chain from ever going through.  Drop it and report error.
1772	 */
1773	if (error == EFBIG) {
1774		struct mbuf *m;
1775
1776		m = m_defrag(*m_headp, M_DONTWAIT);
1777		if (m == NULL) {
1778			adapter->mbuf_alloc_failed++;
1779			m_freem(*m_headp);
1780			*m_headp = NULL;
1781			return (ENOBUFS);
1782		}
1783		*m_headp = m;
1784
1785		/* Try it again */
1786		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1787		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1788
1789		if (error) {
1790			adapter->no_tx_dma_setup++;
1791			m_freem(*m_headp);
1792			*m_headp = NULL;
1793			return (error);
1794		}
1795	} else if (error != 0) {
1796		adapter->no_tx_dma_setup++;
1797		return (error);
1798	}
1799
1800	/*
1801	 * TSO Hardware workaround, if this packet is not
1802	 * TSO, and is only a single descriptor long, and
1803	 * it follows a TSO burst, then we need to add a
1804	 * sentinel descriptor to prevent premature writeback.
1805	 */
1806	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1807		if (nsegs == 1)
1808			tso_desc = TRUE;
1809		txr->tx_tso = FALSE;
1810	}
1811
1812        if (nsegs > (txr->tx_avail - 2)) {
1813                txr->no_desc_avail++;
1814		bus_dmamap_unload(txr->txtag, map);
1815		return (ENOBUFS);
1816        }
1817	m_head = *m_headp;
1818
1819	/* Do hardware assists */
1820#if __FreeBSD_version >= 700000
1821	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1822		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1823		if (error != TRUE)
1824			return (ENXIO); /* something foobar */
1825		/* we need to make a final sentinel transmit desc */
1826		tso_desc = TRUE;
1827	} else
1828#endif
1829	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1830		em_transmit_checksum_setup(txr,  m_head,
1831		    &txd_upper, &txd_lower);
1832
1833	i = txr->next_avail_desc;
1834
1835	/* Set up our transmit descriptors */
1836	for (j = 0; j < nsegs; j++) {
1837		bus_size_t seg_len;
1838		bus_addr_t seg_addr;
1839
1840		tx_buffer = &txr->tx_buffers[i];
1841		ctxd = &txr->tx_base[i];
1842		seg_addr = segs[j].ds_addr;
1843		seg_len  = segs[j].ds_len;
1844		/*
1845		** TSO Workaround:
1846		** If this is the last descriptor, we want to
1847		** split it so we have a small final sentinel
1848		*/
1849		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1850			seg_len -= 4;
1851			ctxd->buffer_addr = htole64(seg_addr);
1852			ctxd->lower.data = htole32(
1853			adapter->txd_cmd | txd_lower | seg_len);
1854			ctxd->upper.data =
1855			    htole32(txd_upper);
1856			if (++i == adapter->num_tx_desc)
1857				i = 0;
1858			/* Now make the sentinel */
1859			++txd_used; /* using an extra txd */
1860			ctxd = &txr->tx_base[i];
1861			tx_buffer = &txr->tx_buffers[i];
1862			ctxd->buffer_addr =
1863			    htole64(seg_addr + seg_len);
1864			ctxd->lower.data = htole32(
1865			adapter->txd_cmd | txd_lower | 4);
1866			ctxd->upper.data =
1867			    htole32(txd_upper);
1868			last = i;
1869			if (++i == adapter->num_tx_desc)
1870				i = 0;
1871		} else {
1872			ctxd->buffer_addr = htole64(seg_addr);
1873			ctxd->lower.data = htole32(
1874			adapter->txd_cmd | txd_lower | seg_len);
1875			ctxd->upper.data =
1876			    htole32(txd_upper);
1877			last = i;
1878			if (++i == adapter->num_tx_desc)
1879				i = 0;
1880		}
1881		tx_buffer->m_head = NULL;
1882		tx_buffer->next_eop = -1;
1883	}
1884
1885	txr->next_avail_desc = i;
1886	txr->tx_avail -= nsegs;
1887	if (tso_desc) /* TSO used an extra for sentinel */
1888		txr->tx_avail -= txd_used;
1889
1890	if (m_head->m_flags & M_VLANTAG) {
1891		/* Set the vlan id. */
1892		ctxd->upper.fields.special =
1893		    htole16(m_head->m_pkthdr.ether_vtag);
1894                /* Tell hardware to add tag */
1895                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1896        }
1897
1898        tx_buffer->m_head = m_head;
1899	tx_buffer_mapped->map = tx_buffer->map;
1900	tx_buffer->map = map;
1901        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1902
1903        /*
1904         * Last Descriptor of Packet
1905	 * needs End Of Packet (EOP)
1906	 * and Report Status (RS)
1907         */
1908        ctxd->lower.data |=
1909	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1910	/*
1911	 * Keep track in the first buffer which
1912	 * descriptor will be written back
1913	 */
1914	tx_buffer = &txr->tx_buffers[first];
1915	tx_buffer->next_eop = last;
1916
1917	/*
1918	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1919	 * that this frame is available to transmit.
1920	 */
1921	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1922	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1923	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1924
1925	return (0);
1926}
1927
1928static void
1929em_set_promisc(struct adapter *adapter)
1930{
1931	struct ifnet	*ifp = adapter->ifp;
1932	u32		reg_rctl;
1933
1934	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1935
1936	if (ifp->if_flags & IFF_PROMISC) {
1937		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1938		/* Turn this on if you want to see bad packets */
1939		if (em_debug_sbp)
1940			reg_rctl |= E1000_RCTL_SBP;
1941		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1942	} else if (ifp->if_flags & IFF_ALLMULTI) {
1943		reg_rctl |= E1000_RCTL_MPE;
1944		reg_rctl &= ~E1000_RCTL_UPE;
1945		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1946	}
1947}
1948
1949static void
1950em_disable_promisc(struct adapter *adapter)
1951{
1952	u32	reg_rctl;
1953
1954	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1955
1956	reg_rctl &=  (~E1000_RCTL_UPE);
1957	reg_rctl &=  (~E1000_RCTL_MPE);
1958	reg_rctl &=  (~E1000_RCTL_SBP);
1959	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1960}
1961
1962
1963/*********************************************************************
1964 *  Multicast Update
1965 *
1966 *  This routine is called whenever multicast address list is updated.
1967 *
1968 **********************************************************************/
1969
1970static void
1971em_set_multi(struct adapter *adapter)
1972{
1973	struct ifnet	*ifp = adapter->ifp;
1974	struct ifmultiaddr *ifma;
1975	u32 reg_rctl = 0;
1976	u8  *mta; /* Multicast array memory */
1977	int mcnt = 0;
1978
1979	IOCTL_DEBUGOUT("em_set_multi: begin");
1980
1981	if (adapter->hw.mac.type == e1000_82542 &&
1982	    adapter->hw.revision_id == E1000_REVISION_2) {
1983		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1984		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1985			e1000_pci_clear_mwi(&adapter->hw);
1986		reg_rctl |= E1000_RCTL_RST;
1987		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1988		msec_delay(5);
1989	}
1990
1991	/* Allocate temporary memory to setup array */
1992	mta = malloc(sizeof(u8) *
1993	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
1994	    M_DEVBUF, M_NOWAIT | M_ZERO);
1995	if (mta == NULL)
1996		panic("em_set_multi memory failure\n");
1997
1998#if __FreeBSD_version < 800000
1999	IF_ADDR_LOCK(ifp);
2000#else
2001	if_maddr_rlock(ifp);
2002#endif
2003	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2004		if (ifma->ifma_addr->sa_family != AF_LINK)
2005			continue;
2006
2007		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2008			break;
2009
2010		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2011		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2012		mcnt++;
2013	}
2014#if __FreeBSD_version < 800000
2015	IF_ADDR_UNLOCK(ifp);
2016#else
2017	if_maddr_runlock(ifp);
2018#endif
2019	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2020		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2021		reg_rctl |= E1000_RCTL_MPE;
2022		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2023	} else
2024		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2025
2026	if (adapter->hw.mac.type == e1000_82542 &&
2027	    adapter->hw.revision_id == E1000_REVISION_2) {
2028		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2029		reg_rctl &= ~E1000_RCTL_RST;
2030		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2031		msec_delay(5);
2032		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2033			e1000_pci_set_mwi(&adapter->hw);
2034	}
2035	free(mta, M_DEVBUF);
2036}
2037
2038
2039/*********************************************************************
2040 *  Timer routine
2041 *
2042 *  This routine checks for link status and updates statistics.
2043 *
2044 **********************************************************************/
2045
2046static void
2047em_local_timer(void *arg)
2048{
2049	struct adapter	*adapter = arg;
2050	struct ifnet	*ifp = adapter->ifp;
2051	struct tx_ring	*txr = adapter->tx_rings;
2052
2053	EM_CORE_LOCK_ASSERT(adapter);
2054
2055	em_update_link_status(adapter);
2056	em_update_stats_counters(adapter);
2057
2058	/* Reset LAA into RAR[0] on 82571 */
2059	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2060		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2061
2062	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2063		em_print_hw_stats(adapter);
2064
2065	/*
2066	** Check for time since any descriptor was cleaned
2067	*/
2068	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2069		EM_TX_LOCK(txr);
2070		if (txr->watchdog_check == FALSE) {
2071			EM_TX_UNLOCK(txr);
2072			continue;
2073		}
2074		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2075			goto hung;
2076		EM_TX_UNLOCK(txr);
2077	}
2078
2079	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2080	return;
2081hung:
2082	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2083	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2084	adapter->watchdog_events++;
2085	EM_TX_UNLOCK(txr);
2086	em_init_locked(adapter);
2087}
2088
2089
2090static void
2091em_update_link_status(struct adapter *adapter)
2092{
2093	struct e1000_hw *hw = &adapter->hw;
2094	struct ifnet *ifp = adapter->ifp;
2095	device_t dev = adapter->dev;
2096	u32 link_check = 0;
2097
2098	/* Get the cached link value or read phy for real */
2099	switch (hw->phy.media_type) {
2100	case e1000_media_type_copper:
2101		if (hw->mac.get_link_status) {
2102			/* Do the work to read phy */
2103			e1000_check_for_link(hw);
2104			link_check = !hw->mac.get_link_status;
2105			if (link_check) /* ESB2 fix */
2106				e1000_cfg_on_link_up(hw);
2107		} else
2108			link_check = TRUE;
2109		break;
2110	case e1000_media_type_fiber:
2111		e1000_check_for_link(hw);
2112		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2113                                 E1000_STATUS_LU);
2114		break;
2115	case e1000_media_type_internal_serdes:
2116		e1000_check_for_link(hw);
2117		link_check = adapter->hw.mac.serdes_has_link;
2118		break;
2119	default:
2120	case e1000_media_type_unknown:
2121		break;
2122	}
2123
2124	/* Now check for a transition */
2125	if (link_check && (adapter->link_active == 0)) {
2126		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2127		    &adapter->link_duplex);
2128		/* Check if we must disable SPEED_MODE bit on PCI-E */
2129		if ((adapter->link_speed != SPEED_1000) &&
2130		    ((hw->mac.type == e1000_82571) ||
2131		    (hw->mac.type == e1000_82572))) {
2132			int tarc0;
2133			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2134			tarc0 &= ~SPEED_MODE_BIT;
2135			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2136		}
2137		if (bootverbose)
2138			device_printf(dev, "Link is up %d Mbps %s\n",
2139			    adapter->link_speed,
2140			    ((adapter->link_duplex == FULL_DUPLEX) ?
2141			    "Full Duplex" : "Half Duplex"));
2142		adapter->link_active = 1;
2143		adapter->smartspeed = 0;
2144		ifp->if_baudrate = adapter->link_speed * 1000000;
2145		if_link_state_change(ifp, LINK_STATE_UP);
2146	} else if (!link_check && (adapter->link_active == 1)) {
2147		ifp->if_baudrate = adapter->link_speed = 0;
2148		adapter->link_duplex = 0;
2149		if (bootverbose)
2150			device_printf(dev, "Link is Down\n");
2151		adapter->link_active = 0;
2152		/* Link down, disable watchdog */
2153		// JFV change later
2154		//adapter->watchdog_check = FALSE;
2155		if_link_state_change(ifp, LINK_STATE_DOWN);
2156	}
2157}
2158
2159/*********************************************************************
2160 *
2161 *  This routine disables all traffic on the adapter by issuing a
2162 *  global reset on the MAC and deallocates TX/RX buffers.
2163 *
2164 *  This routine should always be called with BOTH the CORE
2165 *  and TX locks.
2166 **********************************************************************/
2167
2168static void
2169em_stop(void *arg)
2170{
2171	struct adapter	*adapter = arg;
2172	struct ifnet	*ifp = adapter->ifp;
2173	struct tx_ring	*txr = adapter->tx_rings;
2174
2175	EM_CORE_LOCK_ASSERT(adapter);
2176
2177	INIT_DEBUGOUT("em_stop: begin");
2178
2179	em_disable_intr(adapter);
2180	callout_stop(&adapter->timer);
2181
2182	/* Tell the stack that the interface is no longer active */
2183	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2184
2185        /* Unarm watchdog timer. */
2186	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2187		EM_TX_LOCK(txr);
2188		txr->watchdog_check = FALSE;
2189		EM_TX_UNLOCK(txr);
2190	}
2191
2192	e1000_reset_hw(&adapter->hw);
2193	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2194
2195	e1000_led_off(&adapter->hw);
2196	e1000_cleanup_led(&adapter->hw);
2197}
2198
2199
2200/*********************************************************************
2201 *
2202 *  Determine hardware revision.
2203 *
2204 **********************************************************************/
2205static void
2206em_identify_hardware(struct adapter *adapter)
2207{
2208	device_t dev = adapter->dev;
2209
2210	/* Make sure our PCI config space has the necessary stuff set */
2211	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2212	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2213	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2214		device_printf(dev, "Memory Access and/or Bus Master bits "
2215		    "were not set!\n");
2216		adapter->hw.bus.pci_cmd_word |=
2217		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2218		pci_write_config(dev, PCIR_COMMAND,
2219		    adapter->hw.bus.pci_cmd_word, 2);
2220	}
2221
2222	/* Save off the information about this board */
2223	adapter->hw.vendor_id = pci_get_vendor(dev);
2224	adapter->hw.device_id = pci_get_device(dev);
2225	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2226	adapter->hw.subsystem_vendor_id =
2227	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2228	adapter->hw.subsystem_device_id =
2229	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2230
2231	/* Do Shared Code Init and Setup */
2232	if (e1000_set_mac_type(&adapter->hw)) {
2233		device_printf(dev, "Setup init failure\n");
2234		return;
2235	}
2236}
2237
2238static int
2239em_allocate_pci_resources(struct adapter *adapter)
2240{
2241	device_t	dev = adapter->dev;
2242	int		rid;
2243
2244	rid = PCIR_BAR(0);
2245	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2246	    &rid, RF_ACTIVE);
2247	if (adapter->memory == NULL) {
2248		device_printf(dev, "Unable to allocate bus resource: memory\n");
2249		return (ENXIO);
2250	}
2251	adapter->osdep.mem_bus_space_tag =
2252	    rman_get_bustag(adapter->memory);
2253	adapter->osdep.mem_bus_space_handle =
2254	    rman_get_bushandle(adapter->memory);
2255	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2256
2257	/* Default to a single queue */
2258	adapter->num_queues = 1;
2259
2260	/*
2261	 * Setup MSI/X or MSI if PCI Express
2262	 */
2263	adapter->msix = em_setup_msix(adapter);
2264
2265	adapter->hw.back = &adapter->osdep;
2266
2267	return (0);
2268}
2269
2270/*********************************************************************
2271 *
2272 *  Setup the Legacy or MSI Interrupt handler
2273 *
2274 **********************************************************************/
2275int
2276em_allocate_legacy(struct adapter *adapter)
2277{
2278	device_t dev = adapter->dev;
2279	int error, rid = 0;
2280
2281	/* Manually turn off all interrupts */
2282	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2283
2284	if (adapter->msix == 1) /* using MSI */
2285		rid = 1;
2286	/* We allocate a single interrupt resource */
2287	adapter->res = bus_alloc_resource_any(dev,
2288	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2289	if (adapter->res == NULL) {
2290		device_printf(dev, "Unable to allocate bus resource: "
2291		    "interrupt\n");
2292		return (ENXIO);
2293	}
2294
2295	/*
2296	 * Allocate a fast interrupt and the associated
2297	 * deferred processing contexts.
2298	 */
2299	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2300	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2301	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2302	    taskqueue_thread_enqueue, &adapter->tq);
2303	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2304	    device_get_nameunit(adapter->dev));
2305	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2306	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2307		device_printf(dev, "Failed to register fast interrupt "
2308			    "handler: %d\n", error);
2309		taskqueue_free(adapter->tq);
2310		adapter->tq = NULL;
2311		return (error);
2312	}
2313
2314	return (0);
2315}
2316
2317/*********************************************************************
2318 *
2319 *  Setup the MSIX Interrupt handlers
2320 *   This is not really Multiqueue, rather
2321 *   its just multiple interrupt vectors.
2322 *
2323 **********************************************************************/
2324int
2325em_allocate_msix(struct adapter *adapter)
2326{
2327	device_t	dev = adapter->dev;
2328	struct		tx_ring *txr = adapter->tx_rings;
2329	struct		rx_ring *rxr = adapter->rx_rings;
2330	int		error, rid, vector = 0;
2331
2332
2333	/* Make sure all interrupts are disabled */
2334	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2335
2336	/* First set up ring resources */
2337	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2338
2339		/* RX ring */
2340		rid = vector + 1;
2341
2342		rxr->res = bus_alloc_resource_any(dev,
2343		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2344		if (rxr->res == NULL) {
2345			device_printf(dev,
2346			    "Unable to allocate bus resource: "
2347			    "RX MSIX Interrupt %d\n", i);
2348			return (ENXIO);
2349		}
2350		if ((error = bus_setup_intr(dev, rxr->res,
2351		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2352		    rxr, &rxr->tag)) != 0) {
2353			device_printf(dev, "Failed to register RX handler");
2354			return (error);
2355		}
2356		rxr->msix = vector++; /* NOTE increment vector for TX */
2357		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2358		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2359		    taskqueue_thread_enqueue, &rxr->tq);
2360		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2361		    device_get_nameunit(adapter->dev));
2362		/*
2363		** Set the bit to enable interrupt
2364		** in E1000_IMS -- bits 20 and 21
2365		** are for RX0 and RX1, note this has
2366		** NOTHING to do with the MSIX vector
2367		*/
2368		rxr->ims = 1 << (20 + i);
2369		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2370
2371		/* TX ring */
2372		rid = vector + 1;
2373		txr->res = bus_alloc_resource_any(dev,
2374		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2375		if (txr->res == NULL) {
2376			device_printf(dev,
2377			    "Unable to allocate bus resource: "
2378			    "TX MSIX Interrupt %d\n", i);
2379			return (ENXIO);
2380		}
2381		if ((error = bus_setup_intr(dev, txr->res,
2382		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2383		    txr, &txr->tag)) != 0) {
2384			device_printf(dev, "Failed to register TX handler");
2385			return (error);
2386		}
2387		txr->msix = vector++; /* Increment vector for next pass */
2388		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2389		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2390		    taskqueue_thread_enqueue, &txr->tq);
2391		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2392		    device_get_nameunit(adapter->dev));
2393		/*
2394		** Set the bit to enable interrupt
2395		** in E1000_IMS -- bits 22 and 23
2396		** are for TX0 and TX1, note this has
2397		** NOTHING to do with the MSIX vector
2398		*/
2399		txr->ims = 1 << (22 + i);
2400		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2401	}
2402
2403	/* Link interrupt */
2404	++rid;
2405	adapter->res = bus_alloc_resource_any(dev,
2406	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2407	if (!adapter->res) {
2408		device_printf(dev,"Unable to allocate "
2409		    "bus resource: Link interrupt [%d]\n", rid);
2410		return (ENXIO);
2411        }
2412	/* Set the link handler function */
2413	error = bus_setup_intr(dev, adapter->res,
2414	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2415	    em_msix_link, adapter, &adapter->tag);
2416	if (error) {
2417		adapter->res = NULL;
2418		device_printf(dev, "Failed to register LINK handler");
2419		return (error);
2420	}
2421	adapter->linkvec = vector;
2422	adapter->ivars |=  (8 | vector) << 16;
2423	adapter->ivars |= 0x80000000;
2424	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2425	adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2426	    taskqueue_thread_enqueue, &adapter->tq);
2427	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2428	    device_get_nameunit(adapter->dev));
2429
2430	return (0);
2431}
2432
2433
2434static void
2435em_free_pci_resources(struct adapter *adapter)
2436{
2437	device_t	dev = adapter->dev;
2438	struct tx_ring	*txr;
2439	struct rx_ring	*rxr;
2440	int		rid;
2441
2442
2443	/*
2444	** Release all the queue interrupt resources:
2445	*/
2446	for (int i = 0; i < adapter->num_queues; i++) {
2447		txr = &adapter->tx_rings[i];
2448		rxr = &adapter->rx_rings[i];
2449		rid = txr->msix +1;
2450		if (txr->tag != NULL) {
2451			bus_teardown_intr(dev, txr->res, txr->tag);
2452			txr->tag = NULL;
2453		}
2454		if (txr->res != NULL)
2455			bus_release_resource(dev, SYS_RES_IRQ,
2456			    rid, txr->res);
2457		rid = rxr->msix +1;
2458		if (rxr->tag != NULL) {
2459			bus_teardown_intr(dev, rxr->res, rxr->tag);
2460			rxr->tag = NULL;
2461		}
2462		if (rxr->res != NULL)
2463			bus_release_resource(dev, SYS_RES_IRQ,
2464			    rid, rxr->res);
2465	}
2466
2467        if (adapter->linkvec) /* we are doing MSIX */
2468                rid = adapter->linkvec + 1;
2469        else
2470                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2471
2472	if (adapter->tag != NULL) {
2473		bus_teardown_intr(dev, adapter->res, adapter->tag);
2474		adapter->tag = NULL;
2475	}
2476
2477	if (adapter->res != NULL)
2478		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2479
2480
2481	if (adapter->msix)
2482		pci_release_msi(dev);
2483
2484	if (adapter->msix_mem != NULL)
2485		bus_release_resource(dev, SYS_RES_MEMORY,
2486		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2487
2488	if (adapter->memory != NULL)
2489		bus_release_resource(dev, SYS_RES_MEMORY,
2490		    PCIR_BAR(0), adapter->memory);
2491
2492	if (adapter->flash != NULL)
2493		bus_release_resource(dev, SYS_RES_MEMORY,
2494		    EM_FLASH, adapter->flash);
2495}
2496
2497/*
2498 * Setup MSI or MSI/X
2499 */
2500static int
2501em_setup_msix(struct adapter *adapter)
2502{
2503	device_t dev = adapter->dev;
2504	int val = 0;
2505
2506
2507	/* Setup MSI/X for Hartwell */
2508	if ((adapter->hw.mac.type == e1000_82574) &&
2509	    (em_enable_msix == TRUE)) {
2510		/* Map the MSIX BAR */
2511		int rid = PCIR_BAR(EM_MSIX_BAR);
2512		adapter->msix_mem = bus_alloc_resource_any(dev,
2513		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2514       		if (!adapter->msix_mem) {
2515			/* May not be enabled */
2516               		device_printf(adapter->dev,
2517			    "Unable to map MSIX table \n");
2518			goto msi;
2519       		}
2520		val = pci_msix_count(dev);
2521		if (val != 5) {
2522			bus_release_resource(dev, SYS_RES_MEMORY,
2523			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2524			adapter->msix_mem = NULL;
2525               		device_printf(adapter->dev,
2526			    "MSIX vectors wrong, using MSI \n");
2527			goto msi;
2528		}
2529		if (em_msix_queues == 2) {
2530			val = 5;
2531			adapter->num_queues = 2;
2532		} else {
2533			val = 3;
2534			adapter->num_queues = 1;
2535		}
2536		if (pci_alloc_msix(dev, &val) == 0) {
2537			device_printf(adapter->dev,
2538			    "Using MSIX interrupts "
2539			    "with %d vectors\n", val);
2540		}
2541
2542		return (val);
2543	}
2544msi:
2545       	val = pci_msi_count(dev);
2546       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2547               	adapter->msix = 1;
2548               	device_printf(adapter->dev,"Using MSI interrupt\n");
2549		return (val);
2550	}
2551	/* Should only happen due to manual invention */
2552	device_printf(adapter->dev,"Setup MSIX failure\n");
2553	return (0);
2554}
2555
2556
2557/*********************************************************************
2558 *
2559 *  Initialize the hardware to a configuration
2560 *  as specified by the adapter structure.
2561 *
2562 **********************************************************************/
2563static void
2564em_reset(struct adapter *adapter)
2565{
2566	device_t	dev = adapter->dev;
2567	struct e1000_hw	*hw = &adapter->hw;
2568	u16		rx_buffer_size;
2569
2570	INIT_DEBUGOUT("em_reset: begin");
2571
2572	/* Set up smart power down as default off on newer adapters. */
2573	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2574	    hw->mac.type == e1000_82572)) {
2575		u16 phy_tmp = 0;
2576
2577		/* Speed up time to link by disabling smart power down. */
2578		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2579		phy_tmp &= ~IGP02E1000_PM_SPD;
2580		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2581	}
2582
2583	/*
2584	 * These parameters control the automatic generation (Tx) and
2585	 * response (Rx) to Ethernet PAUSE frames.
2586	 * - High water mark should allow for at least two frames to be
2587	 *   received after sending an XOFF.
2588	 * - Low water mark works best when it is very near the high water mark.
2589	 *   This allows the receiver to restart by sending XON when it has
2590	 *   drained a bit. Here we use an arbitary value of 1500 which will
2591	 *   restart after one full frame is pulled from the buffer. There
2592	 *   could be several smaller frames in the buffer and if so they will
2593	 *   not trigger the XON until their total number reduces the buffer
2594	 *   by 1500.
2595	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2596	 */
2597	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2598
2599	hw->fc.high_water = rx_buffer_size -
2600	    roundup2(adapter->max_frame_size, 1024);
2601	hw->fc.low_water = hw->fc.high_water - 1500;
2602
2603	if (hw->mac.type == e1000_80003es2lan)
2604		hw->fc.pause_time = 0xFFFF;
2605	else
2606		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2607
2608	hw->fc.send_xon = TRUE;
2609
2610        /* Set Flow control, use the tunable location if sane */
2611        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2612		hw->fc.requested_mode = em_fc_setting;
2613	else
2614		hw->fc.requested_mode = e1000_fc_none;
2615
2616	/* Override - workaround for PCHLAN issue */
2617	if (hw->mac.type == e1000_pchlan)
2618                hw->fc.requested_mode = e1000_fc_rx_pause;
2619
2620	/* Issue a global reset */
2621	e1000_reset_hw(hw);
2622	E1000_WRITE_REG(hw, E1000_WUC, 0);
2623
2624	if (e1000_init_hw(hw) < 0) {
2625		device_printf(dev, "Hardware Initialization Failed\n");
2626		return;
2627	}
2628
2629	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2630	e1000_get_phy_info(hw);
2631	e1000_check_for_link(hw);
2632	return;
2633}
2634
2635/*********************************************************************
2636 *
2637 *  Setup networking device structure and register an interface.
2638 *
2639 **********************************************************************/
2640static void
2641em_setup_interface(device_t dev, struct adapter *adapter)
2642{
2643	struct ifnet   *ifp;
2644
2645	INIT_DEBUGOUT("em_setup_interface: begin");
2646
2647	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2648	if (ifp == NULL)
2649		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2650	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2651	ifp->if_mtu = ETHERMTU;
2652	ifp->if_init =  em_init;
2653	ifp->if_softc = adapter;
2654	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2655	ifp->if_ioctl = em_ioctl;
2656	ifp->if_start = em_start;
2657	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2658	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2659	IFQ_SET_READY(&ifp->if_snd);
2660
2661	ether_ifattach(ifp, adapter->hw.mac.addr);
2662
2663	ifp->if_capabilities = ifp->if_capenable = 0;
2664
2665#ifdef EM_MULTIQUEUE
2666	/* Multiqueue tx functions */
2667	ifp->if_transmit = em_mq_start;
2668	ifp->if_qflush = em_qflush;
2669#endif
2670
2671	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2672	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2673
2674	/* Enable TSO by default, can disable with ifconfig */
2675	ifp->if_capabilities |= IFCAP_TSO4;
2676	ifp->if_capenable |= IFCAP_TSO4;
2677
2678	/*
2679	 * Tell the upper layer(s) we
2680	 * support full VLAN capability
2681	 */
2682	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2683	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2684	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2685
2686	/*
2687	** Dont turn this on by default, if vlans are
2688	** created on another pseudo device (eg. lagg)
2689	** then vlan events are not passed thru, breaking
2690	** operation, but with HW FILTER off it works. If
2691	** using vlans directly on the em driver you can
2692	** enable this and get full hardware tag filtering.
2693	*/
2694	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2695
2696#ifdef DEVICE_POLLING
2697	ifp->if_capabilities |= IFCAP_POLLING;
2698#endif
2699
2700	/* Enable All WOL methods by default */
2701	if (adapter->wol) {
2702		ifp->if_capabilities |= IFCAP_WOL;
2703		ifp->if_capenable |= IFCAP_WOL;
2704	}
2705
2706	/*
2707	 * Specify the media types supported by this adapter and register
2708	 * callbacks to update media and link information
2709	 */
2710	ifmedia_init(&adapter->media, IFM_IMASK,
2711	    em_media_change, em_media_status);
2712	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2713	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2714		u_char fiber_type = IFM_1000_SX;	/* default type */
2715
2716		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2717			    0, NULL);
2718		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2719	} else {
2720		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2721		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2722			    0, NULL);
2723		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2724			    0, NULL);
2725		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2726			    0, NULL);
2727		if (adapter->hw.phy.type != e1000_phy_ife) {
2728			ifmedia_add(&adapter->media,
2729				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2730			ifmedia_add(&adapter->media,
2731				IFM_ETHER | IFM_1000_T, 0, NULL);
2732		}
2733	}
2734	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2735	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2736}
2737
2738
2739/*
2740 * Manage DMA'able memory.
2741 */
2742static void
2743em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2744{
2745	if (error)
2746		return;
2747	*(bus_addr_t *) arg = segs[0].ds_addr;
2748}
2749
2750static int
2751em_dma_malloc(struct adapter *adapter, bus_size_t size,
2752        struct em_dma_alloc *dma, int mapflags)
2753{
2754	int error;
2755
2756	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2757				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2758				BUS_SPACE_MAXADDR,	/* lowaddr */
2759				BUS_SPACE_MAXADDR,	/* highaddr */
2760				NULL, NULL,		/* filter, filterarg */
2761				size,			/* maxsize */
2762				1,			/* nsegments */
2763				size,			/* maxsegsize */
2764				0,			/* flags */
2765				NULL,			/* lockfunc */
2766				NULL,			/* lockarg */
2767				&dma->dma_tag);
2768	if (error) {
2769		device_printf(adapter->dev,
2770		    "%s: bus_dma_tag_create failed: %d\n",
2771		    __func__, error);
2772		goto fail_0;
2773	}
2774
2775	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2776	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2777	if (error) {
2778		device_printf(adapter->dev,
2779		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2780		    __func__, (uintmax_t)size, error);
2781		goto fail_2;
2782	}
2783
2784	dma->dma_paddr = 0;
2785	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2786	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2787	if (error || dma->dma_paddr == 0) {
2788		device_printf(adapter->dev,
2789		    "%s: bus_dmamap_load failed: %d\n",
2790		    __func__, error);
2791		goto fail_3;
2792	}
2793
2794	return (0);
2795
2796fail_3:
2797	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2798fail_2:
2799	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2800	bus_dma_tag_destroy(dma->dma_tag);
2801fail_0:
2802	dma->dma_map = NULL;
2803	dma->dma_tag = NULL;
2804
2805	return (error);
2806}
2807
2808static void
2809em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2810{
2811	if (dma->dma_tag == NULL)
2812		return;
2813	if (dma->dma_map != NULL) {
2814		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2815		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2816		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2817		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2818		dma->dma_map = NULL;
2819	}
2820	bus_dma_tag_destroy(dma->dma_tag);
2821	dma->dma_tag = NULL;
2822}
2823
2824
2825/*********************************************************************
2826 *
2827 *  Allocate memory for the transmit and receive rings, and then
2828 *  the descriptors associated with each, called only once at attach.
2829 *
2830 **********************************************************************/
2831static int
2832em_allocate_queues(struct adapter *adapter)
2833{
2834	device_t		dev = adapter->dev;
2835	struct tx_ring		*txr = NULL;
2836	struct rx_ring		*rxr = NULL;
2837	int rsize, tsize, error = E1000_SUCCESS;
2838	int txconf = 0, rxconf = 0;
2839
2840
2841	/* Allocate the TX ring struct memory */
2842	if (!(adapter->tx_rings =
2843	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2844	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2845		device_printf(dev, "Unable to allocate TX ring memory\n");
2846		error = ENOMEM;
2847		goto fail;
2848	}
2849
2850	/* Now allocate the RX */
2851	if (!(adapter->rx_rings =
2852	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2853	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2854		device_printf(dev, "Unable to allocate RX ring memory\n");
2855		error = ENOMEM;
2856		goto rx_fail;
2857	}
2858
2859	tsize = roundup2(adapter->num_tx_desc *
2860	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2861	/*
2862	 * Now set up the TX queues, txconf is needed to handle the
2863	 * possibility that things fail midcourse and we need to
2864	 * undo memory gracefully
2865	 */
2866	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2867		/* Set up some basics */
2868		txr = &adapter->tx_rings[i];
2869		txr->adapter = adapter;
2870		txr->me = i;
2871
2872		/* Initialize the TX lock */
2873		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2874		    device_get_nameunit(dev), txr->me);
2875		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2876
2877		if (em_dma_malloc(adapter, tsize,
2878			&txr->txdma, BUS_DMA_NOWAIT)) {
2879			device_printf(dev,
2880			    "Unable to allocate TX Descriptor memory\n");
2881			error = ENOMEM;
2882			goto err_tx_desc;
2883		}
2884		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2885		bzero((void *)txr->tx_base, tsize);
2886
2887        	if (em_allocate_transmit_buffers(txr)) {
2888			device_printf(dev,
2889			    "Critical Failure setting up transmit buffers\n");
2890			error = ENOMEM;
2891			goto err_tx_desc;
2892        	}
2893#if __FreeBSD_version >= 800000
2894		/* Allocate a buf ring */
2895		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2896		    M_WAITOK, &txr->tx_mtx);
2897#endif
2898	}
2899
2900	/*
2901	 * Next the RX queues...
2902	 */
2903	rsize = roundup2(adapter->num_rx_desc *
2904	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2905	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2906		rxr = &adapter->rx_rings[i];
2907		rxr->adapter = adapter;
2908		rxr->me = i;
2909
2910		/* Initialize the RX lock */
2911		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2912		    device_get_nameunit(dev), txr->me);
2913		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2914
2915		if (em_dma_malloc(adapter, rsize,
2916			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2917			device_printf(dev,
2918			    "Unable to allocate RxDescriptor memory\n");
2919			error = ENOMEM;
2920			goto err_rx_desc;
2921		}
2922		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2923		bzero((void *)rxr->rx_base, rsize);
2924
2925        	/* Allocate receive buffers for the ring*/
2926		if (em_allocate_receive_buffers(rxr)) {
2927			device_printf(dev,
2928			    "Critical Failure setting up receive buffers\n");
2929			error = ENOMEM;
2930			goto err_rx_desc;
2931		}
2932	}
2933
2934	return (0);
2935
2936err_rx_desc:
2937	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2938		em_dma_free(adapter, &rxr->rxdma);
2939err_tx_desc:
2940	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2941		em_dma_free(adapter, &txr->txdma);
2942	free(adapter->rx_rings, M_DEVBUF);
2943rx_fail:
2944	buf_ring_free(txr->br, M_DEVBUF);
2945	free(adapter->tx_rings, M_DEVBUF);
2946fail:
2947	return (error);
2948}
2949
2950
2951/*********************************************************************
2952 *
2953 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2954 *  the information needed to transmit a packet on the wire. This is
2955 *  called only once at attach, setup is done every reset.
2956 *
2957 **********************************************************************/
2958static int
2959em_allocate_transmit_buffers(struct tx_ring *txr)
2960{
2961	struct adapter *adapter = txr->adapter;
2962	device_t dev = adapter->dev;
2963	struct em_buffer *txbuf;
2964	int error, i;
2965
2966	/*
2967	 * Setup DMA descriptor areas.
2968	 */
2969	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2970			       1, 0,			/* alignment, bounds */
2971			       BUS_SPACE_MAXADDR,	/* lowaddr */
2972			       BUS_SPACE_MAXADDR,	/* highaddr */
2973			       NULL, NULL,		/* filter, filterarg */
2974			       EM_TSO_SIZE,		/* maxsize */
2975			       EM_MAX_SCATTER,		/* nsegments */
2976			       PAGE_SIZE,		/* maxsegsize */
2977			       0,			/* flags */
2978			       NULL,			/* lockfunc */
2979			       NULL,			/* lockfuncarg */
2980			       &txr->txtag))) {
2981		device_printf(dev,"Unable to allocate TX DMA tag\n");
2982		goto fail;
2983	}
2984
2985	if (!(txr->tx_buffers =
2986	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2987	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2988		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2989		error = ENOMEM;
2990		goto fail;
2991	}
2992
2993        /* Create the descriptor buffer dma maps */
2994	txbuf = txr->tx_buffers;
2995	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2996		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2997		if (error != 0) {
2998			device_printf(dev, "Unable to create TX DMA map\n");
2999			goto fail;
3000		}
3001	}
3002
3003	return 0;
3004fail:
3005	/* We free all, it handles case where we are in the middle */
3006	em_free_transmit_structures(adapter);
3007	return (error);
3008}
3009
3010/*********************************************************************
3011 *
3012 *  Initialize a transmit ring.
3013 *
3014 **********************************************************************/
3015static void
3016em_setup_transmit_ring(struct tx_ring *txr)
3017{
3018	struct adapter *adapter = txr->adapter;
3019	struct em_buffer *txbuf;
3020	int i;
3021
3022	/* Clear the old descriptor contents */
3023	EM_TX_LOCK(txr);
3024	bzero((void *)txr->tx_base,
3025	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3026	/* Reset indices */
3027	txr->next_avail_desc = 0;
3028	txr->next_to_clean = 0;
3029
3030	/* Free any existing tx buffers. */
3031        txbuf = txr->tx_buffers;
3032	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3033		if (txbuf->m_head != NULL) {
3034			bus_dmamap_sync(txr->txtag, txbuf->map,
3035			    BUS_DMASYNC_POSTWRITE);
3036			bus_dmamap_unload(txr->txtag, txbuf->map);
3037			m_freem(txbuf->m_head);
3038			txbuf->m_head = NULL;
3039		}
3040		/* clear the watch index */
3041		txbuf->next_eop = -1;
3042        }
3043
3044	/* Set number of descriptors available */
3045	txr->tx_avail = adapter->num_tx_desc;
3046
3047	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3048	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3049	EM_TX_UNLOCK(txr);
3050}
3051
3052/*********************************************************************
3053 *
3054 *  Initialize all transmit rings.
3055 *
3056 **********************************************************************/
3057static void
3058em_setup_transmit_structures(struct adapter *adapter)
3059{
3060	struct tx_ring *txr = adapter->tx_rings;
3061
3062	for (int i = 0; i < adapter->num_queues; i++, txr++)
3063		em_setup_transmit_ring(txr);
3064
3065	return;
3066}
3067
3068/*********************************************************************
3069 *
3070 *  Enable transmit unit.
3071 *
3072 **********************************************************************/
3073static void
3074em_initialize_transmit_unit(struct adapter *adapter)
3075{
3076	struct tx_ring	*txr = adapter->tx_rings;
3077	struct e1000_hw	*hw = &adapter->hw;
3078	u32	tctl, tarc, tipg = 0;
3079
3080	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3081
3082	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3083		u64 bus_addr = txr->txdma.dma_paddr;
3084		/* Base and Len of TX Ring */
3085		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3086	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3087		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3088	    	    (u32)(bus_addr >> 32));
3089		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3090	    	    (u32)bus_addr);
3091		/* Init the HEAD/TAIL indices */
3092		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3093		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3094
3095		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3096		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3097		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3098
3099		txr->watchdog_check = FALSE;
3100	}
3101
3102	/* Set the default values for the Tx Inter Packet Gap timer */
3103	switch (adapter->hw.mac.type) {
3104	case e1000_82542:
3105		tipg = DEFAULT_82542_TIPG_IPGT;
3106		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3107		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3108		break;
3109	case e1000_80003es2lan:
3110		tipg = DEFAULT_82543_TIPG_IPGR1;
3111		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3112		    E1000_TIPG_IPGR2_SHIFT;
3113		break;
3114	default:
3115		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3116		    (adapter->hw.phy.media_type ==
3117		    e1000_media_type_internal_serdes))
3118			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3119		else
3120			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3121		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3122		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3123	}
3124
3125	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3126	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3127
3128	if(adapter->hw.mac.type >= e1000_82540)
3129		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3130		    adapter->tx_abs_int_delay.value);
3131
3132	if ((adapter->hw.mac.type == e1000_82571) ||
3133	    (adapter->hw.mac.type == e1000_82572)) {
3134		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3135		tarc |= SPEED_MODE_BIT;
3136		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3137	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3138		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3139		tarc |= 1;
3140		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3141		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3142		tarc |= 1;
3143		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3144	}
3145
3146	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3147	if (adapter->tx_int_delay.value > 0)
3148		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3149
3150	/* Program the Transmit Control Register */
3151	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3152	tctl &= ~E1000_TCTL_CT;
3153	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3154		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3155
3156	if (adapter->hw.mac.type >= e1000_82571)
3157		tctl |= E1000_TCTL_MULR;
3158
3159	/* This write will effectively turn on the transmit unit. */
3160	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3161
3162}
3163
3164
3165/*********************************************************************
3166 *
3167 *  Free all transmit rings.
3168 *
3169 **********************************************************************/
3170static void
3171em_free_transmit_structures(struct adapter *adapter)
3172{
3173	struct tx_ring *txr = adapter->tx_rings;
3174
3175	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3176		EM_TX_LOCK(txr);
3177		em_free_transmit_buffers(txr);
3178		em_dma_free(adapter, &txr->txdma);
3179		EM_TX_UNLOCK(txr);
3180		EM_TX_LOCK_DESTROY(txr);
3181	}
3182
3183	free(adapter->tx_rings, M_DEVBUF);
3184}
3185
3186/*********************************************************************
3187 *
3188 *  Free transmit ring related data structures.
3189 *
3190 **********************************************************************/
3191static void
3192em_free_transmit_buffers(struct tx_ring *txr)
3193{
3194	struct adapter		*adapter = txr->adapter;
3195	struct em_buffer	*txbuf;
3196
3197	INIT_DEBUGOUT("free_transmit_ring: begin");
3198
3199	if (txr->tx_buffers == NULL)
3200		return;
3201
3202	for (int i = 0; i < adapter->num_tx_desc; i++) {
3203		txbuf = &txr->tx_buffers[i];
3204		if (txbuf->m_head != NULL) {
3205			bus_dmamap_sync(txr->txtag, txbuf->map,
3206			    BUS_DMASYNC_POSTWRITE);
3207			bus_dmamap_unload(txr->txtag,
3208			    txbuf->map);
3209			m_freem(txbuf->m_head);
3210			txbuf->m_head = NULL;
3211			if (txbuf->map != NULL) {
3212				bus_dmamap_destroy(txr->txtag,
3213				    txbuf->map);
3214				txbuf->map = NULL;
3215			}
3216		} else if (txbuf->map != NULL) {
3217			bus_dmamap_unload(txr->txtag,
3218			    txbuf->map);
3219			bus_dmamap_destroy(txr->txtag,
3220			    txbuf->map);
3221			txbuf->map = NULL;
3222		}
3223	}
3224#if __FreeBSD_version >= 800000
3225	if (txr->br != NULL)
3226		buf_ring_free(txr->br, M_DEVBUF);
3227#endif
3228	if (txr->tx_buffers != NULL) {
3229		free(txr->tx_buffers, M_DEVBUF);
3230		txr->tx_buffers = NULL;
3231	}
3232	if (txr->txtag != NULL) {
3233		bus_dma_tag_destroy(txr->txtag);
3234		txr->txtag = NULL;
3235	}
3236	return;
3237}
3238
3239
3240/*********************************************************************
3241 *
3242 *  The offload context needs to be set when we transfer the first
3243 *  packet of a particular protocol (TCP/UDP). This routine has been
3244 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3245 *
3246 *  Added back the old method of keeping the current context type
3247 *  and not setting if unnecessary, as this is reported to be a
3248 *  big performance win.  -jfv
3249 **********************************************************************/
3250static void
3251em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3252    u32 *txd_upper, u32 *txd_lower)
3253{
3254	struct adapter			*adapter = txr->adapter;
3255	struct e1000_context_desc	*TXD = NULL;
3256	struct em_buffer *tx_buffer;
3257	struct ether_vlan_header *eh;
3258	struct ip *ip = NULL;
3259	struct ip6_hdr *ip6;
3260	int cur, ehdrlen;
3261	u32 cmd, hdr_len, ip_hlen;
3262	u16 etype;
3263	u8 ipproto;
3264
3265
3266	cmd = hdr_len = ipproto = 0;
3267	cur = txr->next_avail_desc;
3268
3269	/*
3270	 * Determine where frame payload starts.
3271	 * Jump over vlan headers if already present,
3272	 * helpful for QinQ too.
3273	 */
3274	eh = mtod(mp, struct ether_vlan_header *);
3275	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3276		etype = ntohs(eh->evl_proto);
3277		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3278	} else {
3279		etype = ntohs(eh->evl_encap_proto);
3280		ehdrlen = ETHER_HDR_LEN;
3281	}
3282
3283	/*
3284	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3285	 * TODO: Support SCTP too when it hits the tree.
3286	 */
3287	switch (etype) {
3288	case ETHERTYPE_IP:
3289		ip = (struct ip *)(mp->m_data + ehdrlen);
3290		ip_hlen = ip->ip_hl << 2;
3291
3292		/* Setup of IP header checksum. */
3293		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3294			/*
3295			 * Start offset for header checksum calculation.
3296			 * End offset for header checksum calculation.
3297			 * Offset of place to put the checksum.
3298			 */
3299			TXD = (struct e1000_context_desc *)
3300			    &txr->tx_base[cur];
3301			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3302			TXD->lower_setup.ip_fields.ipcse =
3303			    htole16(ehdrlen + ip_hlen);
3304			TXD->lower_setup.ip_fields.ipcso =
3305			    ehdrlen + offsetof(struct ip, ip_sum);
3306			cmd |= E1000_TXD_CMD_IP;
3307			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3308		}
3309
3310		if (mp->m_len < ehdrlen + ip_hlen)
3311			return;	/* failure */
3312
3313		hdr_len = ehdrlen + ip_hlen;
3314		ipproto = ip->ip_p;
3315
3316		break;
3317	case ETHERTYPE_IPV6:
3318		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3319		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3320
3321		if (mp->m_len < ehdrlen + ip_hlen)
3322			return;	/* failure */
3323
3324		/* IPv6 doesn't have a header checksum. */
3325
3326		hdr_len = ehdrlen + ip_hlen;
3327		ipproto = ip6->ip6_nxt;
3328
3329		break;
3330	default:
3331		*txd_upper = 0;
3332		*txd_lower = 0;
3333		return;
3334	}
3335
3336	switch (ipproto) {
3337	case IPPROTO_TCP:
3338		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3339			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3340			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3341			/* no need for context if already set */
3342			if (txr->last_hw_offload == CSUM_TCP)
3343				return;
3344			txr->last_hw_offload = CSUM_TCP;
3345			/*
3346			 * Start offset for payload checksum calculation.
3347			 * End offset for payload checksum calculation.
3348			 * Offset of place to put the checksum.
3349			 */
3350			TXD = (struct e1000_context_desc *)
3351			    &txr->tx_base[cur];
3352			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3353			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3354			TXD->upper_setup.tcp_fields.tucso =
3355			    hdr_len + offsetof(struct tcphdr, th_sum);
3356			cmd |= E1000_TXD_CMD_TCP;
3357		}
3358		break;
3359	case IPPROTO_UDP:
3360	{
3361		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3362			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3363			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3364			/* no need for context if already set */
3365			if (txr->last_hw_offload == CSUM_UDP)
3366				return;
3367			txr->last_hw_offload = CSUM_UDP;
3368			/*
3369			 * Start offset for header checksum calculation.
3370			 * End offset for header checksum calculation.
3371			 * Offset of place to put the checksum.
3372			 */
3373			TXD = (struct e1000_context_desc *)
3374			    &txr->tx_base[cur];
3375			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3376			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3377			TXD->upper_setup.tcp_fields.tucso =
3378			    hdr_len + offsetof(struct udphdr, uh_sum);
3379		}
3380		/* Fall Thru */
3381	}
3382	default:
3383		break;
3384	}
3385
3386	TXD->tcp_seg_setup.data = htole32(0);
3387	TXD->cmd_and_length =
3388	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3389	tx_buffer = &txr->tx_buffers[cur];
3390	tx_buffer->m_head = NULL;
3391	tx_buffer->next_eop = -1;
3392
3393	if (++cur == adapter->num_tx_desc)
3394		cur = 0;
3395
3396	txr->tx_avail--;
3397	txr->next_avail_desc = cur;
3398}
3399
3400
3401/**********************************************************************
3402 *
3403 *  Setup work for hardware segmentation offload (TSO)
3404 *
3405 **********************************************************************/
3406static bool
3407em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3408   u32 *txd_lower)
3409{
3410	struct adapter			*adapter = txr->adapter;
3411	struct e1000_context_desc	*TXD;
3412	struct em_buffer		*tx_buffer;
3413	struct ether_vlan_header	*eh;
3414	struct ip			*ip;
3415	struct ip6_hdr			*ip6;
3416	struct tcphdr			*th;
3417	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3418	u16 etype;
3419
3420	/*
3421	 * This function could/should be extended to support IP/IPv6
3422	 * fragmentation as well.  But as they say, one step at a time.
3423	 */
3424
3425	/*
3426	 * Determine where frame payload starts.
3427	 * Jump over vlan headers if already present,
3428	 * helpful for QinQ too.
3429	 */
3430	eh = mtod(mp, struct ether_vlan_header *);
3431	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3432		etype = ntohs(eh->evl_proto);
3433		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3434	} else {
3435		etype = ntohs(eh->evl_encap_proto);
3436		ehdrlen = ETHER_HDR_LEN;
3437	}
3438
3439	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3440	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3441		return FALSE;	/* -1 */
3442
3443	/*
3444	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3445	 * TODO: Support SCTP too when it hits the tree.
3446	 */
3447	switch (etype) {
3448	case ETHERTYPE_IP:
3449		isip6 = 0;
3450		ip = (struct ip *)(mp->m_data + ehdrlen);
3451		if (ip->ip_p != IPPROTO_TCP)
3452			return FALSE;	/* 0 */
3453		ip->ip_len = 0;
3454		ip->ip_sum = 0;
3455		ip_hlen = ip->ip_hl << 2;
3456		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3457			return FALSE;	/* -1 */
3458		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3459#if 1
3460		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3461		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3462#else
3463		th->th_sum = mp->m_pkthdr.csum_data;
3464#endif
3465		break;
3466	case ETHERTYPE_IPV6:
3467		isip6 = 1;
3468		return FALSE;			/* Not supported yet. */
3469		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3470		if (ip6->ip6_nxt != IPPROTO_TCP)
3471			return FALSE;	/* 0 */
3472		ip6->ip6_plen = 0;
3473		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3474		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3475			return FALSE;	/* -1 */
3476		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3477#if 0
3478		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3479		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3480#else
3481		th->th_sum = mp->m_pkthdr.csum_data;
3482#endif
3483		break;
3484	default:
3485		return FALSE;
3486	}
3487	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3488
3489	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3490		      E1000_TXD_DTYP_D |	/* Data descr type */
3491		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3492
3493	/* IP and/or TCP header checksum calculation and insertion. */
3494	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3495		      E1000_TXD_POPTS_TXSM) << 8;
3496
3497	cur = txr->next_avail_desc;
3498	tx_buffer = &txr->tx_buffers[cur];
3499	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3500
3501	/* IPv6 doesn't have a header checksum. */
3502	if (!isip6) {
3503		/*
3504		 * Start offset for header checksum calculation.
3505		 * End offset for header checksum calculation.
3506		 * Offset of place put the checksum.
3507		 */
3508		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3509		TXD->lower_setup.ip_fields.ipcse =
3510		    htole16(ehdrlen + ip_hlen - 1);
3511		TXD->lower_setup.ip_fields.ipcso =
3512		    ehdrlen + offsetof(struct ip, ip_sum);
3513	}
3514	/*
3515	 * Start offset for payload checksum calculation.
3516	 * End offset for payload checksum calculation.
3517	 * Offset of place to put the checksum.
3518	 */
3519	TXD->upper_setup.tcp_fields.tucss =
3520	    ehdrlen + ip_hlen;
3521	TXD->upper_setup.tcp_fields.tucse = 0;
3522	TXD->upper_setup.tcp_fields.tucso =
3523	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3524	/*
3525	 * Payload size per packet w/o any headers.
3526	 * Length of all headers up to payload.
3527	 */
3528	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3529	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3530
3531	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3532				E1000_TXD_CMD_DEXT |	/* Extended descr */
3533				E1000_TXD_CMD_TSE |	/* TSE context */
3534				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3535				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3536				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3537
3538	tx_buffer->m_head = NULL;
3539	tx_buffer->next_eop = -1;
3540
3541	if (++cur == adapter->num_tx_desc)
3542		cur = 0;
3543
3544	txr->tx_avail--;
3545	txr->next_avail_desc = cur;
3546	txr->tx_tso = TRUE;
3547
3548	return TRUE;
3549}
3550
3551
3552/**********************************************************************
3553 *
3554 *  Examine each tx_buffer in the used queue. If the hardware is done
3555 *  processing the packet then free associated resources. The
3556 *  tx_buffer is put back on the free queue.
3557 *
3558 **********************************************************************/
3559static bool
3560em_txeof(struct tx_ring *txr)
3561{
3562	struct adapter	*adapter = txr->adapter;
3563        int first, last, done, num_avail;
3564        struct em_buffer *tx_buffer;
3565        struct e1000_tx_desc   *tx_desc, *eop_desc;
3566	struct ifnet   *ifp = adapter->ifp;
3567
3568	EM_TX_LOCK_ASSERT(txr);
3569
3570        if (txr->tx_avail == adapter->num_tx_desc)
3571                return (FALSE);
3572
3573        num_avail = txr->tx_avail;
3574        first = txr->next_to_clean;
3575        tx_desc = &txr->tx_base[first];
3576        tx_buffer = &txr->tx_buffers[first];
3577	last = tx_buffer->next_eop;
3578        eop_desc = &txr->tx_base[last];
3579
3580	/*
3581	 * What this does is get the index of the
3582	 * first descriptor AFTER the EOP of the
3583	 * first packet, that way we can do the
3584	 * simple comparison on the inner while loop.
3585	 */
3586	if (++last == adapter->num_tx_desc)
3587 		last = 0;
3588	done = last;
3589
3590        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3591            BUS_DMASYNC_POSTREAD);
3592
3593        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3594		/* We clean the range of the packet */
3595		while (first != done) {
3596                	tx_desc->upper.data = 0;
3597                	tx_desc->lower.data = 0;
3598                	tx_desc->buffer_addr = 0;
3599                	++num_avail;
3600
3601			if (tx_buffer->m_head) {
3602				ifp->if_opackets++;
3603				bus_dmamap_sync(txr->txtag,
3604				    tx_buffer->map,
3605				    BUS_DMASYNC_POSTWRITE);
3606				bus_dmamap_unload(txr->txtag,
3607				    tx_buffer->map);
3608
3609                        	m_freem(tx_buffer->m_head);
3610                        	tx_buffer->m_head = NULL;
3611                	}
3612			tx_buffer->next_eop = -1;
3613			txr->watchdog_time = ticks;
3614
3615	                if (++first == adapter->num_tx_desc)
3616				first = 0;
3617
3618	                tx_buffer = &txr->tx_buffers[first];
3619			tx_desc = &txr->tx_base[first];
3620		}
3621		/* See if we can continue to the next packet */
3622		last = tx_buffer->next_eop;
3623		if (last != -1) {
3624        		eop_desc = &txr->tx_base[last];
3625			/* Get new done point */
3626			if (++last == adapter->num_tx_desc) last = 0;
3627			done = last;
3628		} else
3629			break;
3630        }
3631        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3632            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3633
3634        txr->next_to_clean = first;
3635
3636        /*
3637         * If we have enough room, clear IFF_DRV_OACTIVE to
3638         * tell the stack that it is OK to send packets.
3639         * If there are no pending descriptors, clear the watchdog.
3640         */
3641        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3642                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3643                if (num_avail == adapter->num_tx_desc) {
3644			txr->watchdog_check = FALSE;
3645        		txr->tx_avail = num_avail;
3646			return (FALSE);
3647		}
3648        }
3649
3650        txr->tx_avail = num_avail;
3651	return (TRUE);
3652}
3653
3654
3655/*********************************************************************
3656 *
3657 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3658 *
3659 **********************************************************************/
3660static void
3661em_refresh_mbufs(struct rx_ring *rxr, int limit)
3662{
3663	struct adapter		*adapter = rxr->adapter;
3664	struct mbuf		*m;
3665	bus_dma_segment_t	segs[1];
3666	bus_dmamap_t		map;
3667	struct em_buffer	*rxbuf;
3668	int			i, error, nsegs, cleaned;
3669
3670	i = rxr->next_to_refresh;
3671	cleaned = -1;
3672	while (i != limit) {
3673		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3674		if (m == NULL)
3675			goto update;
3676		m->m_len = m->m_pkthdr.len = MCLBYTES;
3677
3678		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3679			m_adj(m, ETHER_ALIGN);
3680
3681		/*
3682		 * Using memory from the mbuf cluster pool, invoke the
3683		 * bus_dma machinery to arrange the memory mapping.
3684		 */
3685		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3686		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3687		if (error != 0) {
3688			m_free(m);
3689			goto update;
3690		}
3691
3692		/* If nsegs is wrong then the stack is corrupt. */
3693		KASSERT(nsegs == 1, ("Too many segments returned!"));
3694
3695		rxbuf = &rxr->rx_buffers[i];
3696		if (rxbuf->m_head != NULL)
3697			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3698
3699		map = rxbuf->map;
3700		rxbuf->map = rxr->rx_sparemap;
3701		rxr->rx_sparemap = map;
3702		bus_dmamap_sync(rxr->rxtag,
3703		    rxbuf->map, BUS_DMASYNC_PREREAD);
3704		rxbuf->m_head = m;
3705		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3706
3707		cleaned = i;
3708		/* Calculate next index */
3709		if (++i == adapter->num_rx_desc)
3710			i = 0;
3711		/* This is the work marker for refresh */
3712		rxr->next_to_refresh = i;
3713	}
3714update:
3715	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3716	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3717	if (cleaned != -1) /* Update tail index */
3718		E1000_WRITE_REG(&adapter->hw,
3719		    E1000_RDT(rxr->me), cleaned);
3720
3721	return;
3722}
3723
3724
3725/*********************************************************************
3726 *
3727 *  Allocate memory for rx_buffer structures. Since we use one
3728 *  rx_buffer per received packet, the maximum number of rx_buffer's
3729 *  that we'll need is equal to the number of receive descriptors
3730 *  that we've allocated.
3731 *
3732 **********************************************************************/
3733static int
3734em_allocate_receive_buffers(struct rx_ring *rxr)
3735{
3736	struct adapter		*adapter = rxr->adapter;
3737	device_t		dev = adapter->dev;
3738	struct em_buffer	*rxbuf;
3739	int			error;
3740
3741	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3742	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3743	if (rxr->rx_buffers == NULL) {
3744		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3745		return (ENOMEM);
3746	}
3747
3748	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3749				1, 0,			/* alignment, bounds */
3750				BUS_SPACE_MAXADDR,	/* lowaddr */
3751				BUS_SPACE_MAXADDR,	/* highaddr */
3752				NULL, NULL,		/* filter, filterarg */
3753				MCLBYTES,		/* maxsize */
3754				1,			/* nsegments */
3755				MCLBYTES,		/* maxsegsize */
3756				0,			/* flags */
3757				NULL,			/* lockfunc */
3758				NULL,			/* lockarg */
3759				&rxr->rxtag);
3760	if (error) {
3761		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3762		    __func__, error);
3763		goto fail;
3764	}
3765
3766	/* Create the spare map (used by getbuf) */
3767	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3768	     &rxr->rx_sparemap);
3769	if (error) {
3770		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3771		    __func__, error);
3772		goto fail;
3773	}
3774
3775	rxbuf = rxr->rx_buffers;
3776	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3777		rxbuf = &rxr->rx_buffers[i];
3778		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3779		    &rxbuf->map);
3780		if (error) {
3781			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3782			    __func__, error);
3783			goto fail;
3784		}
3785	}
3786
3787	return (0);
3788
3789fail:
3790	em_free_receive_structures(adapter);
3791	return (error);
3792}
3793
3794
3795/*********************************************************************
3796 *
3797 *  Initialize a receive ring and its buffers.
3798 *
3799 **********************************************************************/
3800static int
3801em_setup_receive_ring(struct rx_ring *rxr)
3802{
3803	struct	adapter 	*adapter = rxr->adapter;
3804	struct em_buffer	*rxbuf;
3805	bus_dma_segment_t	seg[1];
3806	int			rsize, nsegs, error;
3807
3808
3809	/* Clear the ring contents */
3810	EM_RX_LOCK(rxr);
3811	rsize = roundup2(adapter->num_rx_desc *
3812	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3813	bzero((void *)rxr->rx_base, rsize);
3814
3815	/*
3816	** Free current RX buffer structs and their mbufs
3817	*/
3818	for (int i = 0; i < adapter->num_rx_desc; i++) {
3819		rxbuf = &rxr->rx_buffers[i];
3820		if (rxbuf->m_head != NULL) {
3821			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3822			    BUS_DMASYNC_POSTREAD);
3823			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3824			m_freem(rxbuf->m_head);
3825		}
3826	}
3827
3828	/* Now replenish the mbufs */
3829	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3830
3831		rxbuf = &rxr->rx_buffers[j];
3832		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3833		if (rxbuf->m_head == NULL)
3834			panic("RX ring hdr initialization failed!\n");
3835		rxbuf->m_head->m_len = MCLBYTES;
3836		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3837		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3838
3839		/* Get the memory mapping */
3840		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3841		    rxbuf->map, rxbuf->m_head, seg,
3842		    &nsegs, BUS_DMA_NOWAIT);
3843		if (error != 0)
3844			panic("RX ring dma initialization failed!\n");
3845		bus_dmamap_sync(rxr->rxtag,
3846		    rxbuf->map, BUS_DMASYNC_PREREAD);
3847
3848		/* Update descriptor */
3849		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3850	}
3851
3852
3853	/* Setup our descriptor indices */
3854	rxr->next_to_check = 0;
3855	rxr->next_to_refresh = 0;
3856
3857	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3858	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3859
3860	EM_RX_UNLOCK(rxr);
3861	return (0);
3862}
3863
3864/*********************************************************************
3865 *
3866 *  Initialize all receive rings.
3867 *
3868 **********************************************************************/
3869static int
3870em_setup_receive_structures(struct adapter *adapter)
3871{
3872	struct rx_ring *rxr = adapter->rx_rings;
3873	int j;
3874
3875	for (j = 0; j < adapter->num_queues; j++, rxr++)
3876		if (em_setup_receive_ring(rxr))
3877			goto fail;
3878
3879	return (0);
3880fail:
3881	/*
3882	 * Free RX buffers allocated so far, we will only handle
3883	 * the rings that completed, the failing case will have
3884	 * cleaned up for itself. 'j' failed, so its the terminus.
3885	 */
3886	for (int i = 0; i < j; ++i) {
3887		rxr = &adapter->rx_rings[i];
3888		for (int n = 0; n < adapter->num_rx_desc; n++) {
3889			struct em_buffer *rxbuf;
3890			rxbuf = &rxr->rx_buffers[n];
3891			if (rxbuf->m_head != NULL) {
3892				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3893			  	  BUS_DMASYNC_POSTREAD);
3894				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3895				m_freem(rxbuf->m_head);
3896				rxbuf->m_head = NULL;
3897			}
3898		}
3899	}
3900
3901	return (ENOBUFS);
3902}
3903
3904/*********************************************************************
3905 *
3906 *  Free all receive rings.
3907 *
3908 **********************************************************************/
3909static void
3910em_free_receive_structures(struct adapter *adapter)
3911{
3912	struct rx_ring *rxr = adapter->rx_rings;
3913
3914	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3915		em_free_receive_buffers(rxr);
3916		/* Free the ring memory as well */
3917		em_dma_free(adapter, &rxr->rxdma);
3918		EM_RX_LOCK_DESTROY(rxr);
3919	}
3920
3921	free(adapter->rx_rings, M_DEVBUF);
3922}
3923
3924
3925/*********************************************************************
3926 *
3927 *  Free receive ring data structures
3928 *
3929 **********************************************************************/
3930static void
3931em_free_receive_buffers(struct rx_ring *rxr)
3932{
3933	struct adapter		*adapter = rxr->adapter;
3934	struct em_buffer	*rxbuf = NULL;
3935
3936	INIT_DEBUGOUT("free_receive_buffers: begin");
3937
3938	if (rxr->rx_sparemap) {
3939		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3940		rxr->rx_sparemap = NULL;
3941	}
3942
3943	if (rxr->rx_buffers != NULL) {
3944		for (int i = 0; i < adapter->num_rx_desc; i++) {
3945			rxbuf = &rxr->rx_buffers[i];
3946			if (rxbuf->map != NULL) {
3947				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3948				    BUS_DMASYNC_POSTREAD);
3949				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3950				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3951			}
3952			if (rxbuf->m_head != NULL) {
3953				m_freem(rxbuf->m_head);
3954				rxbuf->m_head = NULL;
3955			}
3956		}
3957		free(rxr->rx_buffers, M_DEVBUF);
3958		rxr->rx_buffers = NULL;
3959	}
3960
3961	if (rxr->rxtag != NULL) {
3962		bus_dma_tag_destroy(rxr->rxtag);
3963		rxr->rxtag = NULL;
3964	}
3965
3966	return;
3967}
3968
3969
3970/*********************************************************************
3971 *
3972 *  Enable receive unit.
3973 *
3974 **********************************************************************/
3975#define MAX_INTS_PER_SEC	8000
3976#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3977
3978static void
3979em_initialize_receive_unit(struct adapter *adapter)
3980{
3981	struct rx_ring	*rxr = adapter->rx_rings;
3982	struct ifnet	*ifp = adapter->ifp;
3983	struct e1000_hw	*hw = &adapter->hw;
3984	u64	bus_addr;
3985	u32	rctl, rxcsum;
3986
3987	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3988
3989	/*
3990	 * Make sure receives are disabled while setting
3991	 * up the descriptor ring
3992	 */
3993	rctl = E1000_READ_REG(hw, E1000_RCTL);
3994	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3995
3996	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3997	    adapter->rx_abs_int_delay.value);
3998	/*
3999	 * Set the interrupt throttling rate. Value is calculated
4000	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4001	 */
4002	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4003
4004	/*
4005	** When using MSIX interrupts we need to throttle
4006	** using the EITR register (82574 only)
4007	*/
4008	if (hw->mac.type == e1000_82574)
4009		for (int i = 0; i < 4; i++)
4010			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4011			    DEFAULT_ITR);
4012
4013	/* Disable accelerated ackknowledge */
4014	if (adapter->hw.mac.type == e1000_82574)
4015		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4016
4017	if (ifp->if_capenable & IFCAP_RXCSUM) {
4018		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4019		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4020		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4021	}
4022
4023	/*
4024	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4025	** long latencies are observed, like Lenovo X60. This
4026	** change eliminates the problem, but since having positive
4027	** values in RDTR is a known source of problems on other
4028	** platforms another solution is being sought.
4029	*/
4030	if (hw->mac.type == e1000_82573)
4031		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4032
4033	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4034		/* Setup the Base and Length of the Rx Descriptor Ring */
4035		bus_addr = rxr->rxdma.dma_paddr;
4036		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4037		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4038		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4039		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4040		/* Setup the Head and Tail Descriptor Pointers */
4041		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4042		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4043	}
4044
4045	/* Setup the Receive Control Register */
4046	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4047	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4048	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4049	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4050
4051        /* Strip the CRC */
4052        rctl |= E1000_RCTL_SECRC;
4053
4054        /* Make sure VLAN Filters are off */
4055        rctl &= ~E1000_RCTL_VFE;
4056	rctl &= ~E1000_RCTL_SBP;
4057	rctl |= E1000_RCTL_SZ_2048;
4058	if (ifp->if_mtu > ETHERMTU)
4059		rctl |= E1000_RCTL_LPE;
4060	else
4061		rctl &= ~E1000_RCTL_LPE;
4062
4063	/* Write out the settings */
4064	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4065
4066	return;
4067}
4068
4069
4070/*********************************************************************
4071 *
4072 *  This routine executes in interrupt context. It replenishes
4073 *  the mbufs in the descriptor and sends data which has been
4074 *  dma'ed into host memory to upper layer.
4075 *
4076 *  We loop at most count times if count is > 0, or until done if
4077 *  count < 0.
4078 *
4079 *  For polling we also now return the number of cleaned packets
4080 *********************************************************************/
4081static int
4082em_rxeof(struct rx_ring *rxr, int count)
4083{
4084	struct adapter		*adapter = rxr->adapter;
4085	struct ifnet		*ifp = adapter->ifp;
4086	struct mbuf		*mp, *sendmp;
4087	u8			status = 0;
4088	u16 			len;
4089	int			i, processed, rxdone = 0;
4090	bool			eop;
4091	struct e1000_rx_desc	*cur;
4092
4093	EM_RX_LOCK_ASSERT(rxr);
4094
4095	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4096
4097		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4098			break;
4099
4100		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4101		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4102
4103		cur = &rxr->rx_base[i];
4104		status = cur->status;
4105		mp = sendmp = NULL;
4106
4107		if ((status & E1000_RXD_STAT_DD) == 0)
4108			break;
4109
4110		len = le16toh(cur->length);
4111		eop = (status & E1000_RXD_STAT_EOP) != 0;
4112		count--;
4113
4114		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4115
4116			/* Assign correct length to the current fragment */
4117			mp = rxr->rx_buffers[i].m_head;
4118			mp->m_len = len;
4119
4120			if (rxr->fmp == NULL) {
4121				mp->m_pkthdr.len = len;
4122				rxr->fmp = mp; /* Store the first mbuf */
4123				rxr->lmp = mp;
4124			} else {
4125				/* Chain mbuf's together */
4126				mp->m_flags &= ~M_PKTHDR;
4127				rxr->lmp->m_next = mp;
4128				rxr->lmp = rxr->lmp->m_next;
4129				rxr->fmp->m_pkthdr.len += len;
4130			}
4131
4132			if (eop) {
4133				rxr->fmp->m_pkthdr.rcvif = ifp;
4134				ifp->if_ipackets++;
4135				em_receive_checksum(cur, rxr->fmp);
4136#ifndef __NO_STRICT_ALIGNMENT
4137				if (adapter->max_frame_size >
4138				    (MCLBYTES - ETHER_ALIGN) &&
4139				    em_fixup_rx(rxr) != 0)
4140					goto skip;
4141#endif
4142				if (status & E1000_RXD_STAT_VP) {
4143					rxr->fmp->m_pkthdr.ether_vtag =
4144					    (le16toh(cur->special) &
4145					    E1000_RXD_SPC_VLAN_MASK);
4146					rxr->fmp->m_flags |= M_VLANTAG;
4147				}
4148#ifdef EM_MULTIQUEUE
4149				rxr->fmp->m_pkthdr.flowid = curcpu;
4150				rxr->fmp->m_flags |= M_FLOWID;
4151#endif
4152#ifndef __NO_STRICT_ALIGNMENT
4153skip:
4154#endif
4155				sendmp = rxr->fmp;
4156				rxr->fmp = NULL;
4157				rxr->lmp = NULL;
4158			}
4159		} else {
4160			ifp->if_ierrors++;
4161			/* Reuse loaded DMA map and just update mbuf chain */
4162			mp = rxr->rx_buffers[i].m_head;
4163			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4164			mp->m_data = mp->m_ext.ext_buf;
4165			mp->m_next = NULL;
4166			if (adapter->max_frame_size <=
4167			    (MCLBYTES - ETHER_ALIGN))
4168				m_adj(mp, ETHER_ALIGN);
4169			if (rxr->fmp != NULL) {
4170				m_freem(rxr->fmp);
4171				rxr->fmp = NULL;
4172				rxr->lmp = NULL;
4173			}
4174			sendmp = NULL;
4175		}
4176
4177		/* Zero out the receive descriptors status. */
4178		cur->status = 0;
4179		++rxdone;	/* cumulative for POLL */
4180		++processed;
4181
4182		/* Advance our pointers to the next descriptor. */
4183		if (++i == adapter->num_rx_desc)
4184			i = 0;
4185
4186		/* Send to the stack */
4187		if (sendmp != NULL)
4188			(*ifp->if_input)(ifp, sendmp);
4189
4190		/* Only refresh mbufs every 8 descriptors */
4191		if (processed == 8) {
4192			em_refresh_mbufs(rxr, i);
4193			processed = 0;
4194		}
4195	}
4196
4197	/* Catch any remaining refresh work */
4198	if (processed != 0) {
4199		em_refresh_mbufs(rxr, i);
4200		processed = 0;
4201	}
4202
4203	rxr->next_to_check = i;
4204
4205#ifdef DEVICE_POLLING
4206	return (rxdone);
4207#else
4208	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4209#endif
4210}
4211
4212#ifndef __NO_STRICT_ALIGNMENT
4213/*
4214 * When jumbo frames are enabled we should realign entire payload on
4215 * architecures with strict alignment. This is serious design mistake of 8254x
4216 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4217 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4218 * payload. On architecures without strict alignment restrictions 8254x still
4219 * performs unaligned memory access which would reduce the performance too.
4220 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4221 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4222 * existing mbuf chain.
4223 *
4224 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4225 * not used at all on architectures with strict alignment.
4226 */
4227static int
4228em_fixup_rx(struct rx_ring *rxr)
4229{
4230	struct adapter *adapter = rxr->adapter;
4231	struct mbuf *m, *n;
4232	int error;
4233
4234	error = 0;
4235	m = rxr->fmp;
4236	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4237		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4238		m->m_data += ETHER_HDR_LEN;
4239	} else {
4240		MGETHDR(n, M_DONTWAIT, MT_DATA);
4241		if (n != NULL) {
4242			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4243			m->m_data += ETHER_HDR_LEN;
4244			m->m_len -= ETHER_HDR_LEN;
4245			n->m_len = ETHER_HDR_LEN;
4246			M_MOVE_PKTHDR(n, m);
4247			n->m_next = m;
4248			rxr->fmp = n;
4249		} else {
4250			adapter->dropped_pkts++;
4251			m_freem(rxr->fmp);
4252			rxr->fmp = NULL;
4253			error = ENOMEM;
4254		}
4255	}
4256
4257	return (error);
4258}
4259#endif
4260
4261/*********************************************************************
4262 *
4263 *  Verify that the hardware indicated that the checksum is valid.
4264 *  Inform the stack about the status of checksum so that stack
4265 *  doesn't spend time verifying the checksum.
4266 *
4267 *********************************************************************/
4268static void
4269em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4270{
4271	/* Ignore Checksum bit is set */
4272	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4273		mp->m_pkthdr.csum_flags = 0;
4274		return;
4275	}
4276
4277	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4278		/* Did it pass? */
4279		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4280			/* IP Checksum Good */
4281			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4282			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4283
4284		} else {
4285			mp->m_pkthdr.csum_flags = 0;
4286		}
4287	}
4288
4289	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4290		/* Did it pass? */
4291		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4292			mp->m_pkthdr.csum_flags |=
4293			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4294			mp->m_pkthdr.csum_data = htons(0xffff);
4295		}
4296	}
4297}
4298
4299/*
4300 * This routine is run via an vlan
4301 * config EVENT
4302 */
4303static void
4304em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4305{
4306	struct adapter	*adapter = ifp->if_softc;
4307	u32		index, bit;
4308
4309	if (ifp->if_softc !=  arg)   /* Not our event */
4310		return;
4311
4312	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4313                return;
4314
4315	index = (vtag >> 5) & 0x7F;
4316	bit = vtag & 0x1F;
4317	em_shadow_vfta[index] |= (1 << bit);
4318	++adapter->num_vlans;
4319	/* Re-init to load the changes */
4320	em_init(adapter);
4321}
4322
4323/*
4324 * This routine is run via an vlan
4325 * unconfig EVENT
4326 */
4327static void
4328em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4329{
4330	struct adapter	*adapter = ifp->if_softc;
4331	u32		index, bit;
4332
4333	if (ifp->if_softc !=  arg)
4334		return;
4335
4336	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4337                return;
4338
4339	index = (vtag >> 5) & 0x7F;
4340	bit = vtag & 0x1F;
4341	em_shadow_vfta[index] &= ~(1 << bit);
4342	--adapter->num_vlans;
4343	/* Re-init to load the changes */
4344	em_init(adapter);
4345}
4346
4347static void
4348em_setup_vlan_hw_support(struct adapter *adapter)
4349{
4350	struct e1000_hw *hw = &adapter->hw;
4351	u32             reg;
4352
4353	/*
4354	** We get here thru init_locked, meaning
4355	** a soft reset, this has already cleared
4356	** the VFTA and other state, so if there
4357	** have been no vlan's registered do nothing.
4358	*/
4359	if (adapter->num_vlans == 0)
4360                return;
4361
4362	/*
4363	** A soft reset zero's out the VFTA, so
4364	** we need to repopulate it now.
4365	*/
4366	for (int i = 0; i < EM_VFTA_SIZE; i++)
4367                if (em_shadow_vfta[i] != 0)
4368			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4369                            i, em_shadow_vfta[i]);
4370
4371	reg = E1000_READ_REG(hw, E1000_CTRL);
4372	reg |= E1000_CTRL_VME;
4373	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4374
4375	/* Enable the Filter Table */
4376	reg = E1000_READ_REG(hw, E1000_RCTL);
4377	reg &= ~E1000_RCTL_CFIEN;
4378	reg |= E1000_RCTL_VFE;
4379	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4380
4381	/* Update the frame size */
4382	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4383	    adapter->max_frame_size + VLAN_TAG_SIZE);
4384}
4385
4386static void
4387em_enable_intr(struct adapter *adapter)
4388{
4389	struct e1000_hw *hw = &adapter->hw;
4390	u32 ims_mask = IMS_ENABLE_MASK;
4391
4392	if (hw->mac.type == e1000_82574) {
4393		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4394		ims_mask |= EM_MSIX_MASK;
4395	}
4396	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4397}
4398
4399static void
4400em_disable_intr(struct adapter *adapter)
4401{
4402	struct e1000_hw *hw = &adapter->hw;
4403
4404	if (hw->mac.type == e1000_82574)
4405		E1000_WRITE_REG(hw, EM_EIAC, 0);
4406	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4407}
4408
4409/*
4410 * Bit of a misnomer, what this really means is
4411 * to enable OS management of the system... aka
4412 * to disable special hardware management features
4413 */
4414static void
4415em_init_manageability(struct adapter *adapter)
4416{
4417	/* A shared code workaround */
4418#define E1000_82542_MANC2H E1000_MANC2H
4419	if (adapter->has_manage) {
4420		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4421		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4422
4423		/* disable hardware interception of ARP */
4424		manc &= ~(E1000_MANC_ARP_EN);
4425
4426                /* enable receiving management packets to the host */
4427		manc |= E1000_MANC_EN_MNG2HOST;
4428#define E1000_MNG2HOST_PORT_623 (1 << 5)
4429#define E1000_MNG2HOST_PORT_664 (1 << 6)
4430		manc2h |= E1000_MNG2HOST_PORT_623;
4431		manc2h |= E1000_MNG2HOST_PORT_664;
4432		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4433		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4434	}
4435}
4436
4437/*
4438 * Give control back to hardware management
4439 * controller if there is one.
4440 */
4441static void
4442em_release_manageability(struct adapter *adapter)
4443{
4444	if (adapter->has_manage) {
4445		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4446
4447		/* re-enable hardware interception of ARP */
4448		manc |= E1000_MANC_ARP_EN;
4449		manc &= ~E1000_MANC_EN_MNG2HOST;
4450
4451		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4452	}
4453}
4454
4455/*
4456 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4457 * For ASF and Pass Through versions of f/w this means
4458 * that the driver is loaded. For AMT version type f/w
4459 * this means that the network i/f is open.
4460 */
4461static void
4462em_get_hw_control(struct adapter *adapter)
4463{
4464	u32 ctrl_ext, swsm;
4465
4466	if (adapter->hw.mac.type == e1000_82573) {
4467		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4468		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4469		    swsm | E1000_SWSM_DRV_LOAD);
4470		return;
4471	}
4472	/* else */
4473	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4474	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4475	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4476	return;
4477}
4478
4479/*
4480 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4481 * For ASF and Pass Through versions of f/w this means that
4482 * the driver is no longer loaded. For AMT versions of the
4483 * f/w this means that the network i/f is closed.
4484 */
4485static void
4486em_release_hw_control(struct adapter *adapter)
4487{
4488	u32 ctrl_ext, swsm;
4489
4490	if (!adapter->has_manage)
4491		return;
4492
4493	if (adapter->hw.mac.type == e1000_82573) {
4494		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4495		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4496		    swsm & ~E1000_SWSM_DRV_LOAD);
4497		return;
4498	}
4499	/* else */
4500	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4501	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4502	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4503	return;
4504}
4505
4506static int
4507em_is_valid_ether_addr(u8 *addr)
4508{
4509	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4510
4511	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4512		return (FALSE);
4513	}
4514
4515	return (TRUE);
4516}
4517
4518/*
4519** Parse the interface capabilities with regard
4520** to both system management and wake-on-lan for
4521** later use.
4522*/
4523static void
4524em_get_wakeup(device_t dev)
4525{
4526	struct adapter	*adapter = device_get_softc(dev);
4527	u16		eeprom_data = 0, device_id, apme_mask;
4528
4529	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4530	apme_mask = EM_EEPROM_APME;
4531
4532	switch (adapter->hw.mac.type) {
4533	case e1000_82573:
4534	case e1000_82583:
4535		adapter->has_amt = TRUE;
4536		/* Falls thru */
4537	case e1000_82571:
4538	case e1000_82572:
4539	case e1000_80003es2lan:
4540		if (adapter->hw.bus.func == 1) {
4541			e1000_read_nvm(&adapter->hw,
4542			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4543			break;
4544		} else
4545			e1000_read_nvm(&adapter->hw,
4546			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4547		break;
4548	case e1000_ich8lan:
4549	case e1000_ich9lan:
4550	case e1000_ich10lan:
4551	case e1000_pchlan:
4552		apme_mask = E1000_WUC_APME;
4553		adapter->has_amt = TRUE;
4554		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4555		break;
4556	default:
4557		e1000_read_nvm(&adapter->hw,
4558		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4559		break;
4560	}
4561	if (eeprom_data & apme_mask)
4562		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4563	/*
4564         * We have the eeprom settings, now apply the special cases
4565         * where the eeprom may be wrong or the board won't support
4566         * wake on lan on a particular port
4567	 */
4568	device_id = pci_get_device(dev);
4569        switch (device_id) {
4570	case E1000_DEV_ID_82571EB_FIBER:
4571		/* Wake events only supported on port A for dual fiber
4572		 * regardless of eeprom setting */
4573		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4574		    E1000_STATUS_FUNC_1)
4575			adapter->wol = 0;
4576		break;
4577	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4578	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4579	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4580                /* if quad port adapter, disable WoL on all but port A */
4581		if (global_quad_port_a != 0)
4582			adapter->wol = 0;
4583		/* Reset for multiple quad port adapters */
4584		if (++global_quad_port_a == 4)
4585			global_quad_port_a = 0;
4586                break;
4587	}
4588	return;
4589}
4590
4591
4592/*
4593 * Enable PCI Wake On Lan capability
4594 */
4595static void
4596em_enable_wakeup(device_t dev)
4597{
4598	struct adapter	*adapter = device_get_softc(dev);
4599	struct ifnet	*ifp = adapter->ifp;
4600	u32		pmc, ctrl, ctrl_ext, rctl;
4601	u16     	status;
4602
4603	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4604		return;
4605
4606	/* Advertise the wakeup capability */
4607	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4608	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4609	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4610	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4611
4612	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4613	    (adapter->hw.mac.type == e1000_pchlan) ||
4614	    (adapter->hw.mac.type == e1000_ich9lan) ||
4615	    (adapter->hw.mac.type == e1000_ich10lan)) {
4616		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4617		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4618	}
4619
4620	/* Keep the laser running on Fiber adapters */
4621	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4622	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4623		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4624		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4625		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4626	}
4627
4628	/*
4629	** Determine type of Wakeup: note that wol
4630	** is set with all bits on by default.
4631	*/
4632	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4633		adapter->wol &= ~E1000_WUFC_MAG;
4634
4635	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4636		adapter->wol &= ~E1000_WUFC_MC;
4637	else {
4638		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4639		rctl |= E1000_RCTL_MPE;
4640		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4641	}
4642
4643	if (adapter->hw.mac.type == e1000_pchlan) {
4644		if (em_enable_phy_wakeup(adapter))
4645			return;
4646	} else {
4647		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4648		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4649	}
4650
4651	if (adapter->hw.phy.type == e1000_phy_igp_3)
4652		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4653
4654        /* Request PME */
4655        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4656	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4657	if (ifp->if_capenable & IFCAP_WOL)
4658		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4659        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4660
4661	return;
4662}
4663
4664/*
4665** WOL in the newer chipset interfaces (pchlan)
4666** require thing to be copied into the phy
4667*/
4668static int
4669em_enable_phy_wakeup(struct adapter *adapter)
4670{
4671	struct e1000_hw *hw = &adapter->hw;
4672	u32 mreg, ret = 0;
4673	u16 preg;
4674
4675	/* copy MAC RARs to PHY RARs */
4676	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4677		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4678		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4679		e1000_write_phy_reg(hw, BM_RAR_M(i),
4680		    (u16)((mreg >> 16) & 0xFFFF));
4681		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4682		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4683		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4684		    (u16)((mreg >> 16) & 0xFFFF));
4685	}
4686
4687	/* copy MAC MTA to PHY MTA */
4688	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4689		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4690		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4691		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4692		    (u16)((mreg >> 16) & 0xFFFF));
4693	}
4694
4695	/* configure PHY Rx Control register */
4696	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4697	mreg = E1000_READ_REG(hw, E1000_RCTL);
4698	if (mreg & E1000_RCTL_UPE)
4699		preg |= BM_RCTL_UPE;
4700	if (mreg & E1000_RCTL_MPE)
4701		preg |= BM_RCTL_MPE;
4702	preg &= ~(BM_RCTL_MO_MASK);
4703	if (mreg & E1000_RCTL_MO_3)
4704		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4705				<< BM_RCTL_MO_SHIFT);
4706	if (mreg & E1000_RCTL_BAM)
4707		preg |= BM_RCTL_BAM;
4708	if (mreg & E1000_RCTL_PMCF)
4709		preg |= BM_RCTL_PMCF;
4710	mreg = E1000_READ_REG(hw, E1000_CTRL);
4711	if (mreg & E1000_CTRL_RFCE)
4712		preg |= BM_RCTL_RFCE;
4713	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4714
4715	/* enable PHY wakeup in MAC register */
4716	E1000_WRITE_REG(hw, E1000_WUC,
4717	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4718	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4719
4720	/* configure and enable PHY wakeup in PHY registers */
4721	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4722	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4723
4724	/* activate PHY wakeup */
4725	ret = hw->phy.ops.acquire(hw);
4726	if (ret) {
4727		printf("Could not acquire PHY\n");
4728		return ret;
4729	}
4730	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4731	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4732	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4733	if (ret) {
4734		printf("Could not read PHY page 769\n");
4735		goto out;
4736	}
4737	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4738	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4739	if (ret)
4740		printf("Could not set PHY Host Wakeup bit\n");
4741out:
4742	hw->phy.ops.release(hw);
4743
4744	return ret;
4745}
4746
4747static void
4748em_led_func(void *arg, int onoff)
4749{
4750	struct adapter	*adapter = arg;
4751
4752	EM_CORE_LOCK(adapter);
4753	if (onoff) {
4754		e1000_setup_led(&adapter->hw);
4755		e1000_led_on(&adapter->hw);
4756	} else {
4757		e1000_led_off(&adapter->hw);
4758		e1000_cleanup_led(&adapter->hw);
4759	}
4760	EM_CORE_UNLOCK(adapter);
4761}
4762
4763/**********************************************************************
4764 *
4765 *  Update the board statistics counters.
4766 *
4767 **********************************************************************/
4768static void
4769em_update_stats_counters(struct adapter *adapter)
4770{
4771	struct ifnet   *ifp;
4772
4773	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4774	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4775		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4776		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4777	}
4778	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4779	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4780	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4781	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4782
4783	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4784	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4785	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4786	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4787	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4788	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4789	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4790	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4791	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4792	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4793	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4794	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4795	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4796	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4797	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4798	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4799	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4800	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4801	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4802	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4803
4804	/* For the 64-bit byte counters the low dword must be read first. */
4805	/* Both registers clear on the read of the high dword */
4806
4807	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4808	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4809
4810	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4811	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4812	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4813	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4814	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4815
4816	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4817	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4818
4819	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4820	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4821	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4822	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4823	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4824	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4825	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4826	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4827	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4828	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4829
4830	if (adapter->hw.mac.type >= e1000_82543) {
4831		adapter->stats.algnerrc +=
4832		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4833		adapter->stats.rxerrc +=
4834		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4835		adapter->stats.tncrs +=
4836		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4837		adapter->stats.cexterr +=
4838		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4839		adapter->stats.tsctc +=
4840		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4841		adapter->stats.tsctfc +=
4842		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4843	}
4844	ifp = adapter->ifp;
4845
4846	ifp->if_collisions = adapter->stats.colc;
4847
4848	/* Rx Errors */
4849	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4850	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4851	    adapter->stats.ruc + adapter->stats.roc +
4852	    adapter->stats.mpc + adapter->stats.cexterr;
4853
4854	/* Tx Errors */
4855	ifp->if_oerrors = adapter->stats.ecol +
4856	    adapter->stats.latecol + adapter->watchdog_events;
4857}
4858
4859
4860/**********************************************************************
4861 *
4862 *  This routine is called only when em_display_debug_stats is enabled.
4863 *  This routine provides a way to take a look at important statistics
4864 *  maintained by the driver and hardware.
4865 *
4866 **********************************************************************/
4867static void
4868em_print_debug_info(struct adapter *adapter)
4869{
4870	device_t dev = adapter->dev;
4871	u8 *hw_addr = adapter->hw.hw_addr;
4872	struct rx_ring *rxr = adapter->rx_rings;
4873	struct tx_ring *txr = adapter->tx_rings;
4874
4875	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4876	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4877	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4878	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4879	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4880	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4881	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4882	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4883	    adapter->hw.fc.high_water,
4884	    adapter->hw.fc.low_water);
4885	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4886	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4887	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4888	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4889	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4890	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4891
4892	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4893		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4894		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4895		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4896		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4897		    txr->me, txr->no_desc_avail);
4898		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4899		    txr->me, txr->tx_irq);
4900		device_printf(dev, "Num Tx descriptors avail = %d\n",
4901		    txr->tx_avail);
4902		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4903		    txr->no_desc_avail);
4904	}
4905	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4906		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4907		    rxr->me, rxr->rx_irq);
4908		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4909		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4910		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4911	}
4912	device_printf(dev, "Std mbuf failed = %ld\n",
4913	    adapter->mbuf_alloc_failed);
4914	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4915	    adapter->mbuf_cluster_failed);
4916	device_printf(dev, "Driver dropped packets = %ld\n",
4917	    adapter->dropped_pkts);
4918}
4919
4920static void
4921em_print_hw_stats(struct adapter *adapter)
4922{
4923	device_t dev = adapter->dev;
4924
4925	device_printf(dev, "Excessive collisions = %lld\n",
4926	    (long long)adapter->stats.ecol);
4927#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4928	device_printf(dev, "Symbol errors = %lld\n",
4929	    (long long)adapter->stats.symerrs);
4930#endif
4931	device_printf(dev, "Sequence errors = %lld\n",
4932	    (long long)adapter->stats.sec);
4933	device_printf(dev, "Defer count = %lld\n",
4934	    (long long)adapter->stats.dc);
4935	device_printf(dev, "Missed Packets = %lld\n",
4936	    (long long)adapter->stats.mpc);
4937	device_printf(dev, "Receive No Buffers = %lld\n",
4938	    (long long)adapter->stats.rnbc);
4939	/* RLEC is inaccurate on some hardware, calculate our own. */
4940	device_printf(dev, "Receive Length Errors = %lld\n",
4941	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4942	device_printf(dev, "Receive errors = %lld\n",
4943	    (long long)adapter->stats.rxerrc);
4944	device_printf(dev, "Crc errors = %lld\n",
4945	    (long long)adapter->stats.crcerrs);
4946	device_printf(dev, "Alignment errors = %lld\n",
4947	    (long long)adapter->stats.algnerrc);
4948	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4949	    (long long)adapter->stats.cexterr);
4950	device_printf(dev, "watchdog timeouts = %ld\n",
4951	    adapter->watchdog_events);
4952	device_printf(dev, "XON Rcvd = %lld\n",
4953	    (long long)adapter->stats.xonrxc);
4954	device_printf(dev, "XON Xmtd = %lld\n",
4955	    (long long)adapter->stats.xontxc);
4956	device_printf(dev, "XOFF Rcvd = %lld\n",
4957	    (long long)adapter->stats.xoffrxc);
4958	device_printf(dev, "XOFF Xmtd = %lld\n",
4959	    (long long)adapter->stats.xofftxc);
4960	device_printf(dev, "Good Packets Rcvd = %lld\n",
4961	    (long long)adapter->stats.gprc);
4962	device_printf(dev, "Good Packets Xmtd = %lld\n",
4963	    (long long)adapter->stats.gptc);
4964	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4965	    (long long)adapter->stats.tsctc);
4966	device_printf(dev, "TSO Contexts Failed = %lld\n",
4967	    (long long)adapter->stats.tsctfc);
4968}
4969
4970/**********************************************************************
4971 *
4972 *  This routine provides a way to dump out the adapter eeprom,
4973 *  often a useful debug/service tool. This only dumps the first
4974 *  32 words, stuff that matters is in that extent.
4975 *
4976 **********************************************************************/
4977static void
4978em_print_nvm_info(struct adapter *adapter)
4979{
4980	u16	eeprom_data;
4981	int	i, j, row = 0;
4982
4983	/* Its a bit crude, but it gets the job done */
4984	printf("\nInterface EEPROM Dump:\n");
4985	printf("Offset\n0x0000  ");
4986	for (i = 0, j = 0; i < 32; i++, j++) {
4987		if (j == 8) { /* Make the offset block */
4988			j = 0; ++row;
4989			printf("\n0x00%x0  ",row);
4990		}
4991		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4992		printf("%04x ", eeprom_data);
4993	}
4994	printf("\n");
4995}
4996
4997static int
4998em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4999{
5000	struct adapter *adapter;
5001	int error;
5002	int result;
5003
5004	result = -1;
5005	error = sysctl_handle_int(oidp, &result, 0, req);
5006
5007	if (error || !req->newptr)
5008		return (error);
5009
5010	if (result == 1) {
5011		adapter = (struct adapter *)arg1;
5012		em_print_debug_info(adapter);
5013	}
5014	/*
5015	 * This value will cause a hex dump of the
5016	 * first 32 16-bit words of the EEPROM to
5017	 * the screen.
5018	 */
5019	if (result == 2) {
5020		adapter = (struct adapter *)arg1;
5021		em_print_nvm_info(adapter);
5022        }
5023
5024	return (error);
5025}
5026
5027
5028static int
5029em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5030{
5031	struct adapter *adapter;
5032	int error;
5033	int result;
5034
5035	result = -1;
5036	error = sysctl_handle_int(oidp, &result, 0, req);
5037
5038	if (error || !req->newptr)
5039		return (error);
5040
5041	if (result == 1) {
5042		adapter = (struct adapter *)arg1;
5043		em_print_hw_stats(adapter);
5044	}
5045
5046	return (error);
5047}
5048
5049static int
5050em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5051{
5052	struct em_int_delay_info *info;
5053	struct adapter *adapter;
5054	u32 regval;
5055	int error, usecs, ticks;
5056
5057	info = (struct em_int_delay_info *)arg1;
5058	usecs = info->value;
5059	error = sysctl_handle_int(oidp, &usecs, 0, req);
5060	if (error != 0 || req->newptr == NULL)
5061		return (error);
5062	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5063		return (EINVAL);
5064	info->value = usecs;
5065	ticks = EM_USECS_TO_TICKS(usecs);
5066
5067	adapter = info->adapter;
5068
5069	EM_CORE_LOCK(adapter);
5070	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5071	regval = (regval & ~0xffff) | (ticks & 0xffff);
5072	/* Handle a few special cases. */
5073	switch (info->offset) {
5074	case E1000_RDTR:
5075		break;
5076	case E1000_TIDV:
5077		if (ticks == 0) {
5078			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5079			/* Don't write 0 into the TIDV register. */
5080			regval++;
5081		} else
5082			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5083		break;
5084	}
5085	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5086	EM_CORE_UNLOCK(adapter);
5087	return (0);
5088}
5089
5090static void
5091em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5092	const char *description, struct em_int_delay_info *info,
5093	int offset, int value)
5094{
5095	info->adapter = adapter;
5096	info->offset = offset;
5097	info->value = value;
5098	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5099	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5100	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5101	    info, 0, em_sysctl_int_delay, "I", description);
5102}
5103
5104static void
5105em_add_rx_process_limit(struct adapter *adapter, const char *name,
5106	const char *description, int *limit, int value)
5107{
5108	*limit = value;
5109	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5110	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5111	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5112}
5113
5114
5115