if_em.c revision 223676
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 223676 2011-06-29 16:20:52Z jhb $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.2.3";
97
98/*********************************************************************
99 *  PCI Device ID Table
100 *
101 *  Used by probe to select devices to load on
102 *  Last field stores an index into e1000_strings
103 *  Last entry must be all 0s
104 *
105 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106 *********************************************************************/
107
108static em_vendor_info_t em_vendor_info_array[] =
109{
110	/* Intel(R) PRO/1000 Network Connection */
111	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	/* required last entry */
175	{ 0, 0, 0, 0, 0}
176};
177
178/*********************************************************************
179 *  Table of branding strings for all supported NICs.
180 *********************************************************************/
181
182static char *em_strings[] = {
183	"Intel(R) PRO/1000 Network Connection"
184};
185
186/*********************************************************************
187 *  Function prototypes
188 *********************************************************************/
189static int	em_probe(device_t);
190static int	em_attach(device_t);
191static int	em_detach(device_t);
192static int	em_shutdown(device_t);
193static int	em_suspend(device_t);
194static int	em_resume(device_t);
195static void	em_start(struct ifnet *);
196static void	em_start_locked(struct ifnet *, struct tx_ring *);
197#ifdef EM_MULTIQUEUE
198static int	em_mq_start(struct ifnet *, struct mbuf *);
199static int	em_mq_start_locked(struct ifnet *,
200		    struct tx_ring *, struct mbuf *);
201static void	em_qflush(struct ifnet *);
202#endif
203static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204static void	em_init(void *);
205static void	em_init_locked(struct adapter *);
206static void	em_stop(void *);
207static void	em_media_status(struct ifnet *, struct ifmediareq *);
208static int	em_media_change(struct ifnet *);
209static void	em_identify_hardware(struct adapter *);
210static int	em_allocate_pci_resources(struct adapter *);
211static int	em_allocate_legacy(struct adapter *);
212static int	em_allocate_msix(struct adapter *);
213static int	em_allocate_queues(struct adapter *);
214static int	em_setup_msix(struct adapter *);
215static void	em_free_pci_resources(struct adapter *);
216static void	em_local_timer(void *);
217static void	em_reset(struct adapter *);
218static int	em_setup_interface(device_t, struct adapter *);
219
220static void	em_setup_transmit_structures(struct adapter *);
221static void	em_initialize_transmit_unit(struct adapter *);
222static int	em_allocate_transmit_buffers(struct tx_ring *);
223static void	em_free_transmit_structures(struct adapter *);
224static void	em_free_transmit_buffers(struct tx_ring *);
225
226static int	em_setup_receive_structures(struct adapter *);
227static int	em_allocate_receive_buffers(struct rx_ring *);
228static void	em_initialize_receive_unit(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_free_receive_buffers(struct rx_ring *);
231
232static void	em_enable_intr(struct adapter *);
233static void	em_disable_intr(struct adapter *);
234static void	em_update_stats_counters(struct adapter *);
235static void	em_add_hw_stats(struct adapter *adapter);
236static bool	em_txeof(struct tx_ring *);
237static bool	em_rxeof(struct rx_ring *, int, int *);
238#ifndef __NO_STRICT_ALIGNMENT
239static int	em_fixup_rx(struct rx_ring *);
240#endif
241static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243		    struct ip *, u32 *, u32 *);
244static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245		    struct tcphdr *, u32 *, u32 *);
246static void	em_set_promisc(struct adapter *);
247static void	em_disable_promisc(struct adapter *);
248static void	em_set_multi(struct adapter *);
249static void	em_update_link_status(struct adapter *);
250static void	em_refresh_mbufs(struct rx_ring *, int);
251static void	em_register_vlan(void *, struct ifnet *, u16);
252static void	em_unregister_vlan(void *, struct ifnet *, u16);
253static void	em_setup_vlan_hw_support(struct adapter *);
254static int	em_xmit(struct tx_ring *, struct mbuf **);
255static int	em_dma_malloc(struct adapter *, bus_size_t,
256		    struct em_dma_alloc *, int);
257static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259static void	em_print_nvm_info(struct adapter *);
260static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(u8 *);
263static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265		    const char *, struct em_int_delay_info *, int, int);
266/* Management and WOL Support */
267static void	em_init_manageability(struct adapter *);
268static void	em_release_manageability(struct adapter *);
269static void     em_get_hw_control(struct adapter *);
270static void     em_release_hw_control(struct adapter *);
271static void	em_get_wakeup(device_t);
272static void     em_enable_wakeup(device_t);
273static int	em_enable_phy_wakeup(struct adapter *);
274static void	em_led_func(void *, int);
275static void	em_disable_aspm(struct adapter *);
276
277static int	em_irq_fast(void *);
278
279/* MSIX handlers */
280static void	em_msix_tx(void *);
281static void	em_msix_rx(void *);
282static void	em_msix_link(void *);
283static void	em_handle_tx(void *context, int pending);
284static void	em_handle_rx(void *context, int pending);
285static void	em_handle_link(void *context, int pending);
286
287static void	em_set_sysctl_value(struct adapter *, const char *,
288		    const char *, int *, int);
289
290static __inline void em_rx_discard(struct rx_ring *, int);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t em_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t em_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, em_probe),
303	DEVMETHOD(device_attach, em_attach),
304	DEVMETHOD(device_detach, em_detach),
305	DEVMETHOD(device_shutdown, em_shutdown),
306	DEVMETHOD(device_suspend, em_suspend),
307	DEVMETHOD(device_resume, em_resume),
308	{0, 0}
309};
310
311static driver_t em_driver = {
312	"em", em_methods, sizeof(struct adapter),
313};
314
315devclass_t em_devclass;
316DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
317MODULE_DEPEND(em, pci, 1, 1, 1);
318MODULE_DEPEND(em, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
325#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
326#define M_TSO_LEN			66
327
328/* Allow common code without TSO */
329#ifndef CSUM_TSO
330#define CSUM_TSO	0
331#endif
332
333SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
334
335static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
340    0, "Default transmit interrupt delay in usecs");
341SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
342    0, "Default receive interrupt delay in usecs");
343
344static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
345static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
346TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
348SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
349    &em_tx_abs_int_delay_dflt, 0,
350    "Default transmit interrupt delay limit in usecs");
351SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
352    &em_rx_abs_int_delay_dflt, 0,
353    "Default receive interrupt delay limit in usecs");
354
355static int em_rxd = EM_DEFAULT_RXD;
356static int em_txd = EM_DEFAULT_TXD;
357TUNABLE_INT("hw.em.rxd", &em_rxd);
358TUNABLE_INT("hw.em.txd", &em_txd);
359SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
360    "Number of receive descriptors per queue");
361SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
362    "Number of transmit descriptors per queue");
363
364static int em_smart_pwr_down = FALSE;
365TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
366SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
367    0, "Set to true to leave smart power down enabled on newer adapters");
368
369/* Controls whether promiscuous also shows bad packets */
370static int em_debug_sbp = FALSE;
371TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
372SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
373    "Show bad packets in promiscuous mode");
374
375static int em_enable_msix = TRUE;
376TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
377SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
378    "Enable MSI-X interrupts");
379
380/* How many packets rxeof tries to clean at a time */
381static int em_rx_process_limit = 100;
382TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
383SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
384    &em_rx_process_limit, 0,
385    "Maximum number of received packets to process at a time, -1 means unlimited");
386
387/* Flow control setting - default to FULL */
388static int em_fc_setting = e1000_fc_full;
389TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
390SYSCTL_INT(_hw_em, OID_AUTO, fc_setting, CTLFLAG_RDTUN, &em_fc_setting, 0,
391    "Flow control");
392
393/* Energy efficient ethernet - default to OFF */
394static int eee_setting = 0;
395TUNABLE_INT("hw.em.eee_setting", &eee_setting);
396SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
397    "Enable Energy Efficient Ethernet");
398
399/* Global used in WOL setup with multiport cards */
400static int global_quad_port_a = 0;
401
402/*********************************************************************
403 *  Device identification routine
404 *
405 *  em_probe determines if the driver should be loaded on
406 *  adapter based on PCI vendor/device id of the adapter.
407 *
408 *  return BUS_PROBE_DEFAULT on success, positive on failure
409 *********************************************************************/
410
411static int
412em_probe(device_t dev)
413{
414	char		adapter_name[60];
415	u16		pci_vendor_id = 0;
416	u16		pci_device_id = 0;
417	u16		pci_subvendor_id = 0;
418	u16		pci_subdevice_id = 0;
419	em_vendor_info_t *ent;
420
421	INIT_DEBUGOUT("em_probe: begin");
422
423	pci_vendor_id = pci_get_vendor(dev);
424	if (pci_vendor_id != EM_VENDOR_ID)
425		return (ENXIO);
426
427	pci_device_id = pci_get_device(dev);
428	pci_subvendor_id = pci_get_subvendor(dev);
429	pci_subdevice_id = pci_get_subdevice(dev);
430
431	ent = em_vendor_info_array;
432	while (ent->vendor_id != 0) {
433		if ((pci_vendor_id == ent->vendor_id) &&
434		    (pci_device_id == ent->device_id) &&
435
436		    ((pci_subvendor_id == ent->subvendor_id) ||
437		    (ent->subvendor_id == PCI_ANY_ID)) &&
438
439		    ((pci_subdevice_id == ent->subdevice_id) ||
440		    (ent->subdevice_id == PCI_ANY_ID))) {
441			sprintf(adapter_name, "%s %s",
442				em_strings[ent->index],
443				em_driver_version);
444			device_set_desc_copy(dev, adapter_name);
445			return (BUS_PROBE_DEFAULT);
446		}
447		ent++;
448	}
449
450	return (ENXIO);
451}
452
453/*********************************************************************
454 *  Device initialization routine
455 *
456 *  The attach entry point is called when the driver is being loaded.
457 *  This routine identifies the type of hardware, allocates all resources
458 *  and initializes the hardware.
459 *
460 *  return 0 on success, positive on failure
461 *********************************************************************/
462
463static int
464em_attach(device_t dev)
465{
466	struct adapter	*adapter;
467	struct e1000_hw	*hw;
468	int		error = 0;
469
470	INIT_DEBUGOUT("em_attach: begin");
471
472	adapter = device_get_softc(dev);
473	adapter->dev = adapter->osdep.dev = dev;
474	hw = &adapter->hw;
475	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
476
477	/* SYSCTL stuff */
478	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
479	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
480	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
481	    em_sysctl_nvm_info, "I", "NVM Information");
482
483	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
484	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
485	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
486	    em_sysctl_debug_info, "I", "Debug Information");
487
488	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
489
490	/* Determine hardware and mac info */
491	em_identify_hardware(adapter);
492
493	/* Setup PCI resources */
494	if (em_allocate_pci_resources(adapter)) {
495		device_printf(dev, "Allocation of PCI resources failed\n");
496		error = ENXIO;
497		goto err_pci;
498	}
499
500	/*
501	** For ICH8 and family we need to
502	** map the flash memory, and this
503	** must happen after the MAC is
504	** identified
505	*/
506	if ((hw->mac.type == e1000_ich8lan) ||
507	    (hw->mac.type == e1000_ich9lan) ||
508	    (hw->mac.type == e1000_ich10lan) ||
509	    (hw->mac.type == e1000_pchlan) ||
510	    (hw->mac.type == e1000_pch2lan)) {
511		int rid = EM_BAR_TYPE_FLASH;
512		adapter->flash = bus_alloc_resource_any(dev,
513		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
514		if (adapter->flash == NULL) {
515			device_printf(dev, "Mapping of Flash failed\n");
516			error = ENXIO;
517			goto err_pci;
518		}
519		/* This is used in the shared code */
520		hw->flash_address = (u8 *)adapter->flash;
521		adapter->osdep.flash_bus_space_tag =
522		    rman_get_bustag(adapter->flash);
523		adapter->osdep.flash_bus_space_handle =
524		    rman_get_bushandle(adapter->flash);
525	}
526
527	/* Do Shared Code initialization */
528	if (e1000_setup_init_funcs(hw, TRUE)) {
529		device_printf(dev, "Setup of Shared code failed\n");
530		error = ENXIO;
531		goto err_pci;
532	}
533
534	e1000_get_bus_info(hw);
535
536	/* Set up some sysctls for the tunable interrupt delays */
537	em_add_int_delay_sysctl(adapter, "rx_int_delay",
538	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
539	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
540	em_add_int_delay_sysctl(adapter, "tx_int_delay",
541	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
542	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
543	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
544	    "receive interrupt delay limit in usecs",
545	    &adapter->rx_abs_int_delay,
546	    E1000_REGISTER(hw, E1000_RADV),
547	    em_rx_abs_int_delay_dflt);
548	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
549	    "transmit interrupt delay limit in usecs",
550	    &adapter->tx_abs_int_delay,
551	    E1000_REGISTER(hw, E1000_TADV),
552	    em_tx_abs_int_delay_dflt);
553
554	/* Sysctl for limiting the amount of work done in the taskqueue */
555	em_set_sysctl_value(adapter, "rx_processing_limit",
556	    "max number of rx packets to process", &adapter->rx_process_limit,
557	    em_rx_process_limit);
558
559	/* Sysctl for setting the interface flow control */
560	em_set_sysctl_value(adapter, "flow_control",
561	    "configure flow control",
562	    &adapter->fc_setting, em_fc_setting);
563
564	/*
565	 * Validate number of transmit and receive descriptors. It
566	 * must not exceed hardware maximum, and must be multiple
567	 * of E1000_DBA_ALIGN.
568	 */
569	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
570	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
571		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
572		    EM_DEFAULT_TXD, em_txd);
573		adapter->num_tx_desc = EM_DEFAULT_TXD;
574	} else
575		adapter->num_tx_desc = em_txd;
576
577	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
578	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
579		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
580		    EM_DEFAULT_RXD, em_rxd);
581		adapter->num_rx_desc = EM_DEFAULT_RXD;
582	} else
583		adapter->num_rx_desc = em_rxd;
584
585	hw->mac.autoneg = DO_AUTO_NEG;
586	hw->phy.autoneg_wait_to_complete = FALSE;
587	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
588
589	/* Copper options */
590	if (hw->phy.media_type == e1000_media_type_copper) {
591		hw->phy.mdix = AUTO_ALL_MODES;
592		hw->phy.disable_polarity_correction = FALSE;
593		hw->phy.ms_type = EM_MASTER_SLAVE;
594	}
595
596	/*
597	 * Set the frame limits assuming
598	 * standard ethernet sized frames.
599	 */
600	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
601	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
602
603	/*
604	 * This controls when hardware reports transmit completion
605	 * status.
606	 */
607	hw->mac.report_tx_early = 1;
608
609	/*
610	** Get queue/ring memory
611	*/
612	if (em_allocate_queues(adapter)) {
613		error = ENOMEM;
614		goto err_pci;
615	}
616
617	/* Allocate multicast array memory. */
618	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
619	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
620	if (adapter->mta == NULL) {
621		device_printf(dev, "Can not allocate multicast setup array\n");
622		error = ENOMEM;
623		goto err_late;
624	}
625
626	/* Check SOL/IDER usage */
627	if (e1000_check_reset_block(hw))
628		device_printf(dev, "PHY reset is blocked"
629		    " due to SOL/IDER session.\n");
630
631	/* Sysctl for setting Energy Efficient Ethernet */
632	em_set_sysctl_value(adapter, "eee_control",
633	    "enable Energy Efficient Ethernet",
634	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
635
636	/*
637	** Start from a known state, this is
638	** important in reading the nvm and
639	** mac from that.
640	*/
641	e1000_reset_hw(hw);
642
643
644	/* Make sure we have a good EEPROM before we read from it */
645	if (e1000_validate_nvm_checksum(hw) < 0) {
646		/*
647		** Some PCI-E parts fail the first check due to
648		** the link being in sleep state, call it again,
649		** if it fails a second time its a real issue.
650		*/
651		if (e1000_validate_nvm_checksum(hw) < 0) {
652			device_printf(dev,
653			    "The EEPROM Checksum Is Not Valid\n");
654			error = EIO;
655			goto err_late;
656		}
657	}
658
659	/* Copy the permanent MAC address out of the EEPROM */
660	if (e1000_read_mac_addr(hw) < 0) {
661		device_printf(dev, "EEPROM read error while reading MAC"
662		    " address\n");
663		error = EIO;
664		goto err_late;
665	}
666
667	if (!em_is_valid_ether_addr(hw->mac.addr)) {
668		device_printf(dev, "Invalid MAC address\n");
669		error = EIO;
670		goto err_late;
671	}
672
673	/*
674	**  Do interrupt configuration
675	*/
676	if (adapter->msix > 1) /* Do MSIX */
677		error = em_allocate_msix(adapter);
678	else  /* MSI or Legacy */
679		error = em_allocate_legacy(adapter);
680	if (error)
681		goto err_late;
682
683	/*
684	 * Get Wake-on-Lan and Management info for later use
685	 */
686	em_get_wakeup(dev);
687
688	/* Setup OS specific network interface */
689	if (em_setup_interface(dev, adapter) != 0)
690		goto err_late;
691
692	em_reset(adapter);
693
694	/* Initialize statistics */
695	em_update_stats_counters(adapter);
696
697	hw->mac.get_link_status = 1;
698	em_update_link_status(adapter);
699
700	/* Register for VLAN events */
701	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
702	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
703	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
704	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
705
706	em_add_hw_stats(adapter);
707
708	/* Non-AMT based hardware can now take control from firmware */
709	if (adapter->has_manage && !adapter->has_amt)
710		em_get_hw_control(adapter);
711
712	/* Tell the stack that the interface is not active */
713	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
714
715	adapter->led_dev = led_create(em_led_func, adapter,
716	    device_get_nameunit(dev));
717
718	INIT_DEBUGOUT("em_attach: end");
719
720	return (0);
721
722err_late:
723	em_free_transmit_structures(adapter);
724	em_free_receive_structures(adapter);
725	em_release_hw_control(adapter);
726	if (adapter->ifp != NULL)
727		if_free(adapter->ifp);
728err_pci:
729	em_free_pci_resources(adapter);
730	free(adapter->mta, M_DEVBUF);
731	EM_CORE_LOCK_DESTROY(adapter);
732
733	return (error);
734}
735
736/*********************************************************************
737 *  Device removal routine
738 *
739 *  The detach entry point is called when the driver is being removed.
740 *  This routine stops the adapter and deallocates all the resources
741 *  that were allocated for driver operation.
742 *
743 *  return 0 on success, positive on failure
744 *********************************************************************/
745
746static int
747em_detach(device_t dev)
748{
749	struct adapter	*adapter = device_get_softc(dev);
750	struct ifnet	*ifp = adapter->ifp;
751
752	INIT_DEBUGOUT("em_detach: begin");
753
754	/* Make sure VLANS are not using driver */
755	if (adapter->ifp->if_vlantrunk != NULL) {
756		device_printf(dev,"Vlan in use, detach first\n");
757		return (EBUSY);
758	}
759
760#ifdef DEVICE_POLLING
761	if (ifp->if_capenable & IFCAP_POLLING)
762		ether_poll_deregister(ifp);
763#endif
764
765	if (adapter->led_dev != NULL)
766		led_destroy(adapter->led_dev);
767
768	EM_CORE_LOCK(adapter);
769	adapter->in_detach = 1;
770	em_stop(adapter);
771	EM_CORE_UNLOCK(adapter);
772	EM_CORE_LOCK_DESTROY(adapter);
773
774	e1000_phy_hw_reset(&adapter->hw);
775
776	em_release_manageability(adapter);
777	em_release_hw_control(adapter);
778
779	/* Unregister VLAN events */
780	if (adapter->vlan_attach != NULL)
781		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
782	if (adapter->vlan_detach != NULL)
783		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
784
785	ether_ifdetach(adapter->ifp);
786	callout_drain(&adapter->timer);
787
788	em_free_pci_resources(adapter);
789	bus_generic_detach(dev);
790	if_free(ifp);
791
792	em_free_transmit_structures(adapter);
793	em_free_receive_structures(adapter);
794
795	em_release_hw_control(adapter);
796	free(adapter->mta, M_DEVBUF);
797
798	return (0);
799}
800
801/*********************************************************************
802 *
803 *  Shutdown entry point
804 *
805 **********************************************************************/
806
807static int
808em_shutdown(device_t dev)
809{
810	return em_suspend(dev);
811}
812
813/*
814 * Suspend/resume device methods.
815 */
816static int
817em_suspend(device_t dev)
818{
819	struct adapter *adapter = device_get_softc(dev);
820
821	EM_CORE_LOCK(adapter);
822
823        em_release_manageability(adapter);
824	em_release_hw_control(adapter);
825	em_enable_wakeup(dev);
826
827	EM_CORE_UNLOCK(adapter);
828
829	return bus_generic_suspend(dev);
830}
831
832static int
833em_resume(device_t dev)
834{
835	struct adapter *adapter = device_get_softc(dev);
836	struct ifnet *ifp = adapter->ifp;
837
838	EM_CORE_LOCK(adapter);
839	em_init_locked(adapter);
840	em_init_manageability(adapter);
841	EM_CORE_UNLOCK(adapter);
842	em_start(ifp);
843
844	return bus_generic_resume(dev);
845}
846
847
848/*********************************************************************
849 *  Transmit entry point
850 *
851 *  em_start is called by the stack to initiate a transmit.
852 *  The driver will remain in this routine as long as there are
853 *  packets to transmit and transmit resources are available.
854 *  In case resources are not available stack is notified and
855 *  the packet is requeued.
856 **********************************************************************/
857
858#ifdef EM_MULTIQUEUE
859static int
860em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
861{
862	struct adapter  *adapter = txr->adapter;
863        struct mbuf     *next;
864        int             err = 0, enq = 0;
865
866	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
867	    IFF_DRV_RUNNING || adapter->link_active == 0) {
868		if (m != NULL)
869			err = drbr_enqueue(ifp, txr->br, m);
870		return (err);
871	}
872
873        /* Call cleanup if number of TX descriptors low */
874	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
875		em_txeof(txr);
876
877	enq = 0;
878	if (m == NULL) {
879		next = drbr_dequeue(ifp, txr->br);
880	} else if (drbr_needs_enqueue(ifp, txr->br)) {
881		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
882			return (err);
883		next = drbr_dequeue(ifp, txr->br);
884	} else
885		next = m;
886
887	/* Process the queue */
888	while (next != NULL) {
889		if ((err = em_xmit(txr, &next)) != 0) {
890                        if (next != NULL)
891                                err = drbr_enqueue(ifp, txr->br, next);
892                        break;
893		}
894		enq++;
895		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
896		ETHER_BPF_MTAP(ifp, next);
897		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
898                        break;
899		if (txr->tx_avail < EM_MAX_SCATTER) {
900			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
901			break;
902		}
903		next = drbr_dequeue(ifp, txr->br);
904	}
905
906	if (enq > 0) {
907                /* Set the watchdog */
908                txr->queue_status = EM_QUEUE_WORKING;
909		txr->watchdog_time = ticks;
910	}
911	return (err);
912}
913
914/*
915** Multiqueue capable stack interface
916*/
917static int
918em_mq_start(struct ifnet *ifp, struct mbuf *m)
919{
920	struct adapter	*adapter = ifp->if_softc;
921	struct tx_ring	*txr = adapter->tx_rings;
922	int 		error;
923
924	if (EM_TX_TRYLOCK(txr)) {
925		error = em_mq_start_locked(ifp, txr, m);
926		EM_TX_UNLOCK(txr);
927	} else
928		error = drbr_enqueue(ifp, txr->br, m);
929
930	return (error);
931}
932
933/*
934** Flush all ring buffers
935*/
936static void
937em_qflush(struct ifnet *ifp)
938{
939	struct adapter  *adapter = ifp->if_softc;
940	struct tx_ring  *txr = adapter->tx_rings;
941	struct mbuf     *m;
942
943	for (int i = 0; i < adapter->num_queues; i++, txr++) {
944		EM_TX_LOCK(txr);
945		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
946			m_freem(m);
947		EM_TX_UNLOCK(txr);
948	}
949	if_qflush(ifp);
950}
951
952#endif /* EM_MULTIQUEUE */
953
954static void
955em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
956{
957	struct adapter	*adapter = ifp->if_softc;
958	struct mbuf	*m_head;
959
960	EM_TX_LOCK_ASSERT(txr);
961
962	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
963	    IFF_DRV_RUNNING)
964		return;
965
966	if (!adapter->link_active)
967		return;
968
969	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
970        	/* Call cleanup if number of TX descriptors low */
971		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
972			em_txeof(txr);
973		if (txr->tx_avail < EM_MAX_SCATTER) {
974			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
975			break;
976		}
977                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
978		if (m_head == NULL)
979			break;
980		/*
981		 *  Encapsulation can modify our pointer, and or make it
982		 *  NULL on failure.  In that event, we can't requeue.
983		 */
984		if (em_xmit(txr, &m_head)) {
985			if (m_head == NULL)
986				break;
987			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
988			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
989			break;
990		}
991
992		/* Send a copy of the frame to the BPF listener */
993		ETHER_BPF_MTAP(ifp, m_head);
994
995		/* Set timeout in case hardware has problems transmitting. */
996		txr->watchdog_time = ticks;
997                txr->queue_status = EM_QUEUE_WORKING;
998	}
999
1000	return;
1001}
1002
1003static void
1004em_start(struct ifnet *ifp)
1005{
1006	struct adapter	*adapter = ifp->if_softc;
1007	struct tx_ring	*txr = adapter->tx_rings;
1008
1009	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1010		EM_TX_LOCK(txr);
1011		em_start_locked(ifp, txr);
1012		EM_TX_UNLOCK(txr);
1013	}
1014	return;
1015}
1016
1017/*********************************************************************
1018 *  Ioctl entry point
1019 *
1020 *  em_ioctl is called when the user wants to configure the
1021 *  interface.
1022 *
1023 *  return 0 on success, positive on failure
1024 **********************************************************************/
1025
1026static int
1027em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1028{
1029	struct adapter	*adapter = ifp->if_softc;
1030	struct ifreq *ifr = (struct ifreq *)data;
1031#ifdef INET
1032	struct ifaddr *ifa = (struct ifaddr *)data;
1033#endif
1034	int error = 0;
1035
1036	if (adapter->in_detach)
1037		return (error);
1038
1039	switch (command) {
1040	case SIOCSIFADDR:
1041#ifdef INET
1042		if (ifa->ifa_addr->sa_family == AF_INET) {
1043			/*
1044			 * XXX
1045			 * Since resetting hardware takes a very long time
1046			 * and results in link renegotiation we only
1047			 * initialize the hardware only when it is absolutely
1048			 * required.
1049			 */
1050			ifp->if_flags |= IFF_UP;
1051			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1052				EM_CORE_LOCK(adapter);
1053				em_init_locked(adapter);
1054				EM_CORE_UNLOCK(adapter);
1055			}
1056			arp_ifinit(ifp, ifa);
1057		} else
1058#endif
1059			error = ether_ioctl(ifp, command, data);
1060		break;
1061	case SIOCSIFMTU:
1062	    {
1063		int max_frame_size;
1064
1065		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1066
1067		EM_CORE_LOCK(adapter);
1068		switch (adapter->hw.mac.type) {
1069		case e1000_82571:
1070		case e1000_82572:
1071		case e1000_ich9lan:
1072		case e1000_ich10lan:
1073		case e1000_pch2lan:
1074		case e1000_82574:
1075		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1076			max_frame_size = 9234;
1077			break;
1078		case e1000_pchlan:
1079			max_frame_size = 4096;
1080			break;
1081			/* Adapters that do not support jumbo frames */
1082		case e1000_82583:
1083		case e1000_ich8lan:
1084			max_frame_size = ETHER_MAX_LEN;
1085			break;
1086		default:
1087			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1088		}
1089		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1090		    ETHER_CRC_LEN) {
1091			EM_CORE_UNLOCK(adapter);
1092			error = EINVAL;
1093			break;
1094		}
1095
1096		ifp->if_mtu = ifr->ifr_mtu;
1097		adapter->max_frame_size =
1098		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1099		em_init_locked(adapter);
1100		EM_CORE_UNLOCK(adapter);
1101		break;
1102	    }
1103	case SIOCSIFFLAGS:
1104		IOCTL_DEBUGOUT("ioctl rcv'd:\
1105		    SIOCSIFFLAGS (Set Interface Flags)");
1106		EM_CORE_LOCK(adapter);
1107		if (ifp->if_flags & IFF_UP) {
1108			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1109				if ((ifp->if_flags ^ adapter->if_flags) &
1110				    (IFF_PROMISC | IFF_ALLMULTI)) {
1111					em_disable_promisc(adapter);
1112					em_set_promisc(adapter);
1113				}
1114			} else
1115				em_init_locked(adapter);
1116		} else
1117			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1118				em_stop(adapter);
1119		adapter->if_flags = ifp->if_flags;
1120		EM_CORE_UNLOCK(adapter);
1121		break;
1122	case SIOCADDMULTI:
1123	case SIOCDELMULTI:
1124		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1125		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1126			EM_CORE_LOCK(adapter);
1127			em_disable_intr(adapter);
1128			em_set_multi(adapter);
1129#ifdef DEVICE_POLLING
1130			if (!(ifp->if_capenable & IFCAP_POLLING))
1131#endif
1132				em_enable_intr(adapter);
1133			EM_CORE_UNLOCK(adapter);
1134		}
1135		break;
1136	case SIOCSIFMEDIA:
1137		/*
1138		** As the speed/duplex settings are being
1139		** changed, we need to reset the PHY.
1140		*/
1141		adapter->hw.phy.reset_disable = FALSE;
1142		/* Check SOL/IDER usage */
1143		EM_CORE_LOCK(adapter);
1144		if (e1000_check_reset_block(&adapter->hw)) {
1145			EM_CORE_UNLOCK(adapter);
1146			device_printf(adapter->dev, "Media change is"
1147			    " blocked due to SOL/IDER session.\n");
1148			break;
1149		}
1150		EM_CORE_UNLOCK(adapter);
1151		/* falls thru */
1152	case SIOCGIFMEDIA:
1153		IOCTL_DEBUGOUT("ioctl rcv'd: \
1154		    SIOCxIFMEDIA (Get/Set Interface Media)");
1155		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1156		break;
1157	case SIOCSIFCAP:
1158	    {
1159		int mask, reinit;
1160
1161		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1162		reinit = 0;
1163		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1164#ifdef DEVICE_POLLING
1165		if (mask & IFCAP_POLLING) {
1166			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1167				error = ether_poll_register(em_poll, ifp);
1168				if (error)
1169					return (error);
1170				EM_CORE_LOCK(adapter);
1171				em_disable_intr(adapter);
1172				ifp->if_capenable |= IFCAP_POLLING;
1173				EM_CORE_UNLOCK(adapter);
1174			} else {
1175				error = ether_poll_deregister(ifp);
1176				/* Enable interrupt even in error case */
1177				EM_CORE_LOCK(adapter);
1178				em_enable_intr(adapter);
1179				ifp->if_capenable &= ~IFCAP_POLLING;
1180				EM_CORE_UNLOCK(adapter);
1181			}
1182		}
1183#endif
1184		if (mask & IFCAP_HWCSUM) {
1185			ifp->if_capenable ^= IFCAP_HWCSUM;
1186			reinit = 1;
1187		}
1188		if (mask & IFCAP_TSO4) {
1189			ifp->if_capenable ^= IFCAP_TSO4;
1190			reinit = 1;
1191		}
1192		if (mask & IFCAP_VLAN_HWTAGGING) {
1193			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1194			reinit = 1;
1195		}
1196		if (mask & IFCAP_VLAN_HWFILTER) {
1197			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1198			reinit = 1;
1199		}
1200		if ((mask & IFCAP_WOL) &&
1201		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1202			if (mask & IFCAP_WOL_MCAST)
1203				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1204			if (mask & IFCAP_WOL_MAGIC)
1205				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1206		}
1207		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1208			em_init(adapter);
1209		VLAN_CAPABILITIES(ifp);
1210		break;
1211	    }
1212
1213	default:
1214		error = ether_ioctl(ifp, command, data);
1215		break;
1216	}
1217
1218	return (error);
1219}
1220
1221
1222/*********************************************************************
1223 *  Init entry point
1224 *
1225 *  This routine is used in two ways. It is used by the stack as
1226 *  init entry point in network interface structure. It is also used
1227 *  by the driver as a hw/sw initialization routine to get to a
1228 *  consistent state.
1229 *
1230 *  return 0 on success, positive on failure
1231 **********************************************************************/
1232
1233static void
1234em_init_locked(struct adapter *adapter)
1235{
1236	struct ifnet	*ifp = adapter->ifp;
1237	device_t	dev = adapter->dev;
1238	u32		pba;
1239
1240	INIT_DEBUGOUT("em_init: begin");
1241
1242	EM_CORE_LOCK_ASSERT(adapter);
1243
1244	em_disable_intr(adapter);
1245	callout_stop(&adapter->timer);
1246
1247	/*
1248	 * Packet Buffer Allocation (PBA)
1249	 * Writing PBA sets the receive portion of the buffer
1250	 * the remainder is used for the transmit buffer.
1251	 */
1252	switch (adapter->hw.mac.type) {
1253	/* Total Packet Buffer on these is 48K */
1254	case e1000_82571:
1255	case e1000_82572:
1256	case e1000_80003es2lan:
1257			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1258		break;
1259	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1260			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1261		break;
1262	case e1000_82574:
1263	case e1000_82583:
1264			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1265		break;
1266	case e1000_ich8lan:
1267		pba = E1000_PBA_8K;
1268		break;
1269	case e1000_ich9lan:
1270	case e1000_ich10lan:
1271		pba = E1000_PBA_10K;
1272		break;
1273	case e1000_pchlan:
1274	case e1000_pch2lan:
1275		pba = E1000_PBA_26K;
1276		break;
1277	default:
1278		if (adapter->max_frame_size > 8192)
1279			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1280		else
1281			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1282	}
1283
1284	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1285	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1286
1287	/* Get the latest mac address, User can use a LAA */
1288        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1289              ETHER_ADDR_LEN);
1290
1291	/* Put the address into the Receive Address Array */
1292	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1293
1294	/*
1295	 * With the 82571 adapter, RAR[0] may be overwritten
1296	 * when the other port is reset, we make a duplicate
1297	 * in RAR[14] for that eventuality, this assures
1298	 * the interface continues to function.
1299	 */
1300	if (adapter->hw.mac.type == e1000_82571) {
1301		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1302		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1303		    E1000_RAR_ENTRIES - 1);
1304	}
1305
1306	/* Initialize the hardware */
1307	em_reset(adapter);
1308	em_update_link_status(adapter);
1309
1310	/* Setup VLAN support, basic and offload if available */
1311	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1312
1313	/* Set hardware offload abilities */
1314	ifp->if_hwassist = 0;
1315	if (ifp->if_capenable & IFCAP_TXCSUM)
1316		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1317	if (ifp->if_capenable & IFCAP_TSO4)
1318		ifp->if_hwassist |= CSUM_TSO;
1319
1320	/* Configure for OS presence */
1321	em_init_manageability(adapter);
1322
1323	/* Prepare transmit descriptors and buffers */
1324	em_setup_transmit_structures(adapter);
1325	em_initialize_transmit_unit(adapter);
1326
1327	/* Setup Multicast table */
1328	em_set_multi(adapter);
1329
1330	/*
1331	** Figure out the desired mbuf
1332	** pool for doing jumbos
1333	*/
1334	if (adapter->max_frame_size <= 2048)
1335		adapter->rx_mbuf_sz = MCLBYTES;
1336	else if (adapter->max_frame_size <= 4096)
1337		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1338	else
1339		adapter->rx_mbuf_sz = MJUM9BYTES;
1340
1341	/* Prepare receive descriptors and buffers */
1342	if (em_setup_receive_structures(adapter)) {
1343		device_printf(dev, "Could not setup receive structures\n");
1344		em_stop(adapter);
1345		return;
1346	}
1347	em_initialize_receive_unit(adapter);
1348
1349	/* Use real VLAN Filter support? */
1350	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1351		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1352			/* Use real VLAN Filter support */
1353			em_setup_vlan_hw_support(adapter);
1354		else {
1355			u32 ctrl;
1356			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1357			ctrl |= E1000_CTRL_VME;
1358			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1359		}
1360	}
1361
1362	/* Don't lose promiscuous settings */
1363	em_set_promisc(adapter);
1364
1365	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1366	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1367
1368	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1369	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1370
1371	/* MSI/X configuration for 82574 */
1372	if (adapter->hw.mac.type == e1000_82574) {
1373		int tmp;
1374		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1375		tmp |= E1000_CTRL_EXT_PBA_CLR;
1376		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1377		/* Set the IVAR - interrupt vector routing. */
1378		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1379	}
1380
1381#ifdef DEVICE_POLLING
1382	/*
1383	 * Only enable interrupts if we are not polling, make sure
1384	 * they are off otherwise.
1385	 */
1386	if (ifp->if_capenable & IFCAP_POLLING)
1387		em_disable_intr(adapter);
1388	else
1389#endif /* DEVICE_POLLING */
1390		em_enable_intr(adapter);
1391
1392	/* AMT based hardware can now take control from firmware */
1393	if (adapter->has_manage && adapter->has_amt)
1394		em_get_hw_control(adapter);
1395
1396	/* Don't reset the phy next time init gets called */
1397	adapter->hw.phy.reset_disable = TRUE;
1398}
1399
1400static void
1401em_init(void *arg)
1402{
1403	struct adapter *adapter = arg;
1404
1405	EM_CORE_LOCK(adapter);
1406	em_init_locked(adapter);
1407	EM_CORE_UNLOCK(adapter);
1408}
1409
1410
1411#ifdef DEVICE_POLLING
1412/*********************************************************************
1413 *
1414 *  Legacy polling routine: note this only works with single queue
1415 *
1416 *********************************************************************/
1417static int
1418em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1419{
1420	struct adapter *adapter = ifp->if_softc;
1421	struct tx_ring	*txr = adapter->tx_rings;
1422	struct rx_ring	*rxr = adapter->rx_rings;
1423	u32		reg_icr;
1424	int		rx_done;
1425
1426	EM_CORE_LOCK(adapter);
1427	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1428		EM_CORE_UNLOCK(adapter);
1429		return (0);
1430	}
1431
1432	if (cmd == POLL_AND_CHECK_STATUS) {
1433		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1434		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1435			callout_stop(&adapter->timer);
1436			adapter->hw.mac.get_link_status = 1;
1437			em_update_link_status(adapter);
1438			callout_reset(&adapter->timer, hz,
1439			    em_local_timer, adapter);
1440		}
1441	}
1442	EM_CORE_UNLOCK(adapter);
1443
1444	em_rxeof(rxr, count, &rx_done);
1445
1446	EM_TX_LOCK(txr);
1447	em_txeof(txr);
1448#ifdef EM_MULTIQUEUE
1449	if (!drbr_empty(ifp, txr->br))
1450		em_mq_start_locked(ifp, txr, NULL);
1451#else
1452	em_start_locked(ifp, txr);
1453#endif
1454	EM_TX_UNLOCK(txr);
1455
1456	return (rx_done);
1457}
1458#endif /* DEVICE_POLLING */
1459
1460
1461/*********************************************************************
1462 *
1463 *  Fast Legacy/MSI Combined Interrupt Service routine
1464 *
1465 *********************************************************************/
1466static int
1467em_irq_fast(void *arg)
1468{
1469	struct adapter	*adapter = arg;
1470	struct ifnet	*ifp;
1471	u32		reg_icr;
1472
1473	ifp = adapter->ifp;
1474
1475	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476
1477	/* Hot eject?  */
1478	if (reg_icr == 0xffffffff)
1479		return FILTER_STRAY;
1480
1481	/* Definitely not our interrupt.  */
1482	if (reg_icr == 0x0)
1483		return FILTER_STRAY;
1484
1485	/*
1486	 * Starting with the 82571 chip, bit 31 should be used to
1487	 * determine whether the interrupt belongs to us.
1488	 */
1489	if (adapter->hw.mac.type >= e1000_82571 &&
1490	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1491		return FILTER_STRAY;
1492
1493	em_disable_intr(adapter);
1494	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1495
1496	/* Link status change */
1497	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1498		adapter->hw.mac.get_link_status = 1;
1499		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1500	}
1501
1502	if (reg_icr & E1000_ICR_RXO)
1503		adapter->rx_overruns++;
1504	return FILTER_HANDLED;
1505}
1506
1507/* Combined RX/TX handler, used by Legacy and MSI */
1508static void
1509em_handle_que(void *context, int pending)
1510{
1511	struct adapter	*adapter = context;
1512	struct ifnet	*ifp = adapter->ifp;
1513	struct tx_ring	*txr = adapter->tx_rings;
1514	struct rx_ring	*rxr = adapter->rx_rings;
1515
1516
1517	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1518		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1519		EM_TX_LOCK(txr);
1520		em_txeof(txr);
1521#ifdef EM_MULTIQUEUE
1522		if (!drbr_empty(ifp, txr->br))
1523			em_mq_start_locked(ifp, txr, NULL);
1524#else
1525		em_start_locked(ifp, txr);
1526#endif
1527		EM_TX_UNLOCK(txr);
1528		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1529			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1530			return;
1531		}
1532	}
1533
1534	em_enable_intr(adapter);
1535	return;
1536}
1537
1538
1539/*********************************************************************
1540 *
1541 *  MSIX Interrupt Service Routines
1542 *
1543 **********************************************************************/
1544static void
1545em_msix_tx(void *arg)
1546{
1547	struct tx_ring *txr = arg;
1548	struct adapter *adapter = txr->adapter;
1549	bool		more;
1550
1551	++txr->tx_irq;
1552	EM_TX_LOCK(txr);
1553	more = em_txeof(txr);
1554	EM_TX_UNLOCK(txr);
1555	if (more)
1556		taskqueue_enqueue(txr->tq, &txr->tx_task);
1557	else
1558		/* Reenable this interrupt */
1559		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1560	return;
1561}
1562
1563/*********************************************************************
1564 *
1565 *  MSIX RX Interrupt Service routine
1566 *
1567 **********************************************************************/
1568
1569static void
1570em_msix_rx(void *arg)
1571{
1572	struct rx_ring	*rxr = arg;
1573	struct adapter	*adapter = rxr->adapter;
1574	bool		more;
1575
1576	++rxr->rx_irq;
1577	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1578	if (more)
1579		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1580	else
1581		/* Reenable this interrupt */
1582		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1583	return;
1584}
1585
1586/*********************************************************************
1587 *
1588 *  MSIX Link Fast Interrupt Service routine
1589 *
1590 **********************************************************************/
1591static void
1592em_msix_link(void *arg)
1593{
1594	struct adapter	*adapter = arg;
1595	u32		reg_icr;
1596
1597	++adapter->link_irq;
1598	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1599
1600	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1601		adapter->hw.mac.get_link_status = 1;
1602		em_handle_link(adapter, 0);
1603	} else
1604		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1605		    EM_MSIX_LINK | E1000_IMS_LSC);
1606	return;
1607}
1608
1609static void
1610em_handle_rx(void *context, int pending)
1611{
1612	struct rx_ring	*rxr = context;
1613	struct adapter	*adapter = rxr->adapter;
1614        bool            more;
1615
1616	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1617	if (more)
1618		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1619	else
1620		/* Reenable this interrupt */
1621		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1622}
1623
1624static void
1625em_handle_tx(void *context, int pending)
1626{
1627	struct tx_ring	*txr = context;
1628	struct adapter	*adapter = txr->adapter;
1629	struct ifnet	*ifp = adapter->ifp;
1630
1631	EM_TX_LOCK(txr);
1632	em_txeof(txr);
1633#ifdef EM_MULTIQUEUE
1634	if (!drbr_empty(ifp, txr->br))
1635		em_mq_start_locked(ifp, txr, NULL);
1636#else
1637	em_start_locked(ifp, txr);
1638#endif
1639	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1640	EM_TX_UNLOCK(txr);
1641}
1642
1643static void
1644em_handle_link(void *context, int pending)
1645{
1646	struct adapter	*adapter = context;
1647	struct ifnet *ifp = adapter->ifp;
1648
1649	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1650		return;
1651
1652	EM_CORE_LOCK(adapter);
1653	callout_stop(&adapter->timer);
1654	em_update_link_status(adapter);
1655	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1657	    EM_MSIX_LINK | E1000_IMS_LSC);
1658	EM_CORE_UNLOCK(adapter);
1659}
1660
1661
1662/*********************************************************************
1663 *
1664 *  Media Ioctl callback
1665 *
1666 *  This routine is called whenever the user queries the status of
1667 *  the interface using ifconfig.
1668 *
1669 **********************************************************************/
1670static void
1671em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1672{
1673	struct adapter *adapter = ifp->if_softc;
1674	u_char fiber_type = IFM_1000_SX;
1675
1676	INIT_DEBUGOUT("em_media_status: begin");
1677
1678	EM_CORE_LOCK(adapter);
1679	em_update_link_status(adapter);
1680
1681	ifmr->ifm_status = IFM_AVALID;
1682	ifmr->ifm_active = IFM_ETHER;
1683
1684	if (!adapter->link_active) {
1685		EM_CORE_UNLOCK(adapter);
1686		return;
1687	}
1688
1689	ifmr->ifm_status |= IFM_ACTIVE;
1690
1691	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1692	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1693		ifmr->ifm_active |= fiber_type | IFM_FDX;
1694	} else {
1695		switch (adapter->link_speed) {
1696		case 10:
1697			ifmr->ifm_active |= IFM_10_T;
1698			break;
1699		case 100:
1700			ifmr->ifm_active |= IFM_100_TX;
1701			break;
1702		case 1000:
1703			ifmr->ifm_active |= IFM_1000_T;
1704			break;
1705		}
1706		if (adapter->link_duplex == FULL_DUPLEX)
1707			ifmr->ifm_active |= IFM_FDX;
1708		else
1709			ifmr->ifm_active |= IFM_HDX;
1710	}
1711	EM_CORE_UNLOCK(adapter);
1712}
1713
1714/*********************************************************************
1715 *
1716 *  Media Ioctl callback
1717 *
1718 *  This routine is called when the user changes speed/duplex using
1719 *  media/mediopt option with ifconfig.
1720 *
1721 **********************************************************************/
1722static int
1723em_media_change(struct ifnet *ifp)
1724{
1725	struct adapter *adapter = ifp->if_softc;
1726	struct ifmedia  *ifm = &adapter->media;
1727
1728	INIT_DEBUGOUT("em_media_change: begin");
1729
1730	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1731		return (EINVAL);
1732
1733	EM_CORE_LOCK(adapter);
1734	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1735	case IFM_AUTO:
1736		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1737		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1738		break;
1739	case IFM_1000_LX:
1740	case IFM_1000_SX:
1741	case IFM_1000_T:
1742		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1743		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1744		break;
1745	case IFM_100_TX:
1746		adapter->hw.mac.autoneg = FALSE;
1747		adapter->hw.phy.autoneg_advertised = 0;
1748		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1749			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1750		else
1751			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1752		break;
1753	case IFM_10_T:
1754		adapter->hw.mac.autoneg = FALSE;
1755		adapter->hw.phy.autoneg_advertised = 0;
1756		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1757			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1758		else
1759			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1760		break;
1761	default:
1762		device_printf(adapter->dev, "Unsupported media type\n");
1763	}
1764
1765	em_init_locked(adapter);
1766	EM_CORE_UNLOCK(adapter);
1767
1768	return (0);
1769}
1770
1771/*********************************************************************
1772 *
1773 *  This routine maps the mbufs to tx descriptors.
1774 *
1775 *  return 0 on success, positive on failure
1776 **********************************************************************/
1777
1778static int
1779em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1780{
1781	struct adapter		*adapter = txr->adapter;
1782	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1783	bus_dmamap_t		map;
1784	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1785	struct e1000_tx_desc	*ctxd = NULL;
1786	struct mbuf		*m_head;
1787	struct ether_header	*eh;
1788	struct ip		*ip = NULL;
1789	struct tcphdr		*tp = NULL;
1790	u32			txd_upper, txd_lower, txd_used, txd_saved;
1791	int			ip_off, poff;
1792	int			nsegs, i, j, first, last = 0;
1793	int			error, do_tso, tso_desc = 0, remap = 1;
1794
1795retry:
1796	m_head = *m_headp;
1797	txd_upper = txd_lower = txd_used = txd_saved = 0;
1798	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1799	ip_off = poff = 0;
1800
1801	/*
1802	 * Intel recommends entire IP/TCP header length reside in a single
1803	 * buffer. If multiple descriptors are used to describe the IP and
1804	 * TCP header, each descriptor should describe one or more
1805	 * complete headers; descriptors referencing only parts of headers
1806	 * are not supported. If all layer headers are not coalesced into
1807	 * a single buffer, each buffer should not cross a 4KB boundary,
1808	 * or be larger than the maximum read request size.
1809	 * Controller also requires modifing IP/TCP header to make TSO work
1810	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1811	 * IP/TCP header into a single buffer to meet the requirement of
1812	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1813	 * which also has similiar restrictions.
1814	 */
1815	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1816		if (do_tso || (m_head->m_next != NULL &&
1817		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1818			if (M_WRITABLE(*m_headp) == 0) {
1819				m_head = m_dup(*m_headp, M_DONTWAIT);
1820				m_freem(*m_headp);
1821				if (m_head == NULL) {
1822					*m_headp = NULL;
1823					return (ENOBUFS);
1824				}
1825				*m_headp = m_head;
1826			}
1827		}
1828		/*
1829		 * XXX
1830		 * Assume IPv4, we don't have TSO/checksum offload support
1831		 * for IPv6 yet.
1832		 */
1833		ip_off = sizeof(struct ether_header);
1834		m_head = m_pullup(m_head, ip_off);
1835		if (m_head == NULL) {
1836			*m_headp = NULL;
1837			return (ENOBUFS);
1838		}
1839		eh = mtod(m_head, struct ether_header *);
1840		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1841			ip_off = sizeof(struct ether_vlan_header);
1842			m_head = m_pullup(m_head, ip_off);
1843			if (m_head == NULL) {
1844				*m_headp = NULL;
1845				return (ENOBUFS);
1846			}
1847		}
1848		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1849		if (m_head == NULL) {
1850			*m_headp = NULL;
1851			return (ENOBUFS);
1852		}
1853		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1854		poff = ip_off + (ip->ip_hl << 2);
1855		if (do_tso) {
1856			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1857			if (m_head == NULL) {
1858				*m_headp = NULL;
1859				return (ENOBUFS);
1860			}
1861			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1862			/*
1863			 * TSO workaround:
1864			 *   pull 4 more bytes of data into it.
1865			 */
1866			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1867			if (m_head == NULL) {
1868				*m_headp = NULL;
1869				return (ENOBUFS);
1870			}
1871			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1872			ip->ip_len = 0;
1873			ip->ip_sum = 0;
1874			/*
1875			 * The pseudo TCP checksum does not include TCP payload
1876			 * length so driver should recompute the checksum here
1877			 * what hardware expect to see. This is adherence of
1878			 * Microsoft's Large Send specification.
1879			 */
1880			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1881			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1882			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1883		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1884			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1885			if (m_head == NULL) {
1886				*m_headp = NULL;
1887				return (ENOBUFS);
1888			}
1889			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1890			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1891			if (m_head == NULL) {
1892				*m_headp = NULL;
1893				return (ENOBUFS);
1894			}
1895			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1896			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1898			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1899			if (m_head == NULL) {
1900				*m_headp = NULL;
1901				return (ENOBUFS);
1902			}
1903			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1904		}
1905		*m_headp = m_head;
1906	}
1907
1908	/*
1909	 * Map the packet for DMA
1910	 *
1911	 * Capture the first descriptor index,
1912	 * this descriptor will have the index
1913	 * of the EOP which is the only one that
1914	 * now gets a DONE bit writeback.
1915	 */
1916	first = txr->next_avail_desc;
1917	tx_buffer = &txr->tx_buffers[first];
1918	tx_buffer_mapped = tx_buffer;
1919	map = tx_buffer->map;
1920
1921	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1922	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1923
1924	/*
1925	 * There are two types of errors we can (try) to handle:
1926	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1927	 *   out of segments.  Defragment the mbuf chain and try again.
1928	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1929	 *   at this point in time.  Defer sending and try again later.
1930	 * All other errors, in particular EINVAL, are fatal and prevent the
1931	 * mbuf chain from ever going through.  Drop it and report error.
1932	 */
1933	if (error == EFBIG && remap) {
1934		struct mbuf *m;
1935
1936		m = m_defrag(*m_headp, M_DONTWAIT);
1937		if (m == NULL) {
1938			adapter->mbuf_alloc_failed++;
1939			m_freem(*m_headp);
1940			*m_headp = NULL;
1941			return (ENOBUFS);
1942		}
1943		*m_headp = m;
1944
1945		/* Try it again, but only once */
1946		remap = 0;
1947		goto retry;
1948	} else if (error == ENOMEM) {
1949		adapter->no_tx_dma_setup++;
1950		return (error);
1951	} else if (error != 0) {
1952		adapter->no_tx_dma_setup++;
1953		m_freem(*m_headp);
1954		*m_headp = NULL;
1955		return (error);
1956	}
1957
1958	/*
1959	 * TSO Hardware workaround, if this packet is not
1960	 * TSO, and is only a single descriptor long, and
1961	 * it follows a TSO burst, then we need to add a
1962	 * sentinel descriptor to prevent premature writeback.
1963	 */
1964	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1965		if (nsegs == 1)
1966			tso_desc = TRUE;
1967		txr->tx_tso = FALSE;
1968	}
1969
1970        if (nsegs > (txr->tx_avail - 2)) {
1971                txr->no_desc_avail++;
1972		bus_dmamap_unload(txr->txtag, map);
1973		return (ENOBUFS);
1974        }
1975	m_head = *m_headp;
1976
1977	/* Do hardware assists */
1978	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1979		em_tso_setup(txr, m_head, ip_off, ip, tp,
1980		    &txd_upper, &txd_lower);
1981		/* we need to make a final sentinel transmit desc */
1982		tso_desc = TRUE;
1983	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1984		em_transmit_checksum_setup(txr, m_head,
1985		    ip_off, ip, &txd_upper, &txd_lower);
1986
1987	i = txr->next_avail_desc;
1988
1989	/* Set up our transmit descriptors */
1990	for (j = 0; j < nsegs; j++) {
1991		bus_size_t seg_len;
1992		bus_addr_t seg_addr;
1993
1994		tx_buffer = &txr->tx_buffers[i];
1995		ctxd = &txr->tx_base[i];
1996		seg_addr = segs[j].ds_addr;
1997		seg_len  = segs[j].ds_len;
1998		/*
1999		** TSO Workaround:
2000		** If this is the last descriptor, we want to
2001		** split it so we have a small final sentinel
2002		*/
2003		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2004			seg_len -= 4;
2005			ctxd->buffer_addr = htole64(seg_addr);
2006			ctxd->lower.data = htole32(
2007			adapter->txd_cmd | txd_lower | seg_len);
2008			ctxd->upper.data =
2009			    htole32(txd_upper);
2010			if (++i == adapter->num_tx_desc)
2011				i = 0;
2012			/* Now make the sentinel */
2013			++txd_used; /* using an extra txd */
2014			ctxd = &txr->tx_base[i];
2015			tx_buffer = &txr->tx_buffers[i];
2016			ctxd->buffer_addr =
2017			    htole64(seg_addr + seg_len);
2018			ctxd->lower.data = htole32(
2019			adapter->txd_cmd | txd_lower | 4);
2020			ctxd->upper.data =
2021			    htole32(txd_upper);
2022			last = i;
2023			if (++i == adapter->num_tx_desc)
2024				i = 0;
2025		} else {
2026			ctxd->buffer_addr = htole64(seg_addr);
2027			ctxd->lower.data = htole32(
2028			adapter->txd_cmd | txd_lower | seg_len);
2029			ctxd->upper.data =
2030			    htole32(txd_upper);
2031			last = i;
2032			if (++i == adapter->num_tx_desc)
2033				i = 0;
2034		}
2035		tx_buffer->m_head = NULL;
2036		tx_buffer->next_eop = -1;
2037	}
2038
2039	txr->next_avail_desc = i;
2040	txr->tx_avail -= nsegs;
2041	if (tso_desc) /* TSO used an extra for sentinel */
2042		txr->tx_avail -= txd_used;
2043
2044	if (m_head->m_flags & M_VLANTAG) {
2045		/* Set the vlan id. */
2046		ctxd->upper.fields.special =
2047		    htole16(m_head->m_pkthdr.ether_vtag);
2048                /* Tell hardware to add tag */
2049                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2050        }
2051
2052        tx_buffer->m_head = m_head;
2053	tx_buffer_mapped->map = tx_buffer->map;
2054	tx_buffer->map = map;
2055        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2056
2057        /*
2058         * Last Descriptor of Packet
2059	 * needs End Of Packet (EOP)
2060	 * and Report Status (RS)
2061         */
2062        ctxd->lower.data |=
2063	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2064	/*
2065	 * Keep track in the first buffer which
2066	 * descriptor will be written back
2067	 */
2068	tx_buffer = &txr->tx_buffers[first];
2069	tx_buffer->next_eop = last;
2070	/* Update the watchdog time early and often */
2071	txr->watchdog_time = ticks;
2072
2073	/*
2074	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2075	 * that this frame is available to transmit.
2076	 */
2077	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2078	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2079	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2080
2081	return (0);
2082}
2083
2084static void
2085em_set_promisc(struct adapter *adapter)
2086{
2087	struct ifnet	*ifp = adapter->ifp;
2088	u32		reg_rctl;
2089
2090	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2091
2092	if (ifp->if_flags & IFF_PROMISC) {
2093		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2094		/* Turn this on if you want to see bad packets */
2095		if (em_debug_sbp)
2096			reg_rctl |= E1000_RCTL_SBP;
2097		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2098	} else if (ifp->if_flags & IFF_ALLMULTI) {
2099		reg_rctl |= E1000_RCTL_MPE;
2100		reg_rctl &= ~E1000_RCTL_UPE;
2101		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2102	}
2103}
2104
2105static void
2106em_disable_promisc(struct adapter *adapter)
2107{
2108	u32	reg_rctl;
2109
2110	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2111
2112	reg_rctl &=  (~E1000_RCTL_UPE);
2113	reg_rctl &=  (~E1000_RCTL_MPE);
2114	reg_rctl &=  (~E1000_RCTL_SBP);
2115	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2116}
2117
2118
2119/*********************************************************************
2120 *  Multicast Update
2121 *
2122 *  This routine is called whenever multicast address list is updated.
2123 *
2124 **********************************************************************/
2125
2126static void
2127em_set_multi(struct adapter *adapter)
2128{
2129	struct ifnet	*ifp = adapter->ifp;
2130	struct ifmultiaddr *ifma;
2131	u32 reg_rctl = 0;
2132	u8  *mta; /* Multicast array memory */
2133	int mcnt = 0;
2134
2135	IOCTL_DEBUGOUT("em_set_multi: begin");
2136
2137	mta = adapter->mta;
2138	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2139
2140	if (adapter->hw.mac.type == e1000_82542 &&
2141	    adapter->hw.revision_id == E1000_REVISION_2) {
2142		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2143		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2144			e1000_pci_clear_mwi(&adapter->hw);
2145		reg_rctl |= E1000_RCTL_RST;
2146		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2147		msec_delay(5);
2148	}
2149
2150#if __FreeBSD_version < 800000
2151	IF_ADDR_LOCK(ifp);
2152#else
2153	if_maddr_rlock(ifp);
2154#endif
2155	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2156		if (ifma->ifma_addr->sa_family != AF_LINK)
2157			continue;
2158
2159		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2160			break;
2161
2162		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2163		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2164		mcnt++;
2165	}
2166#if __FreeBSD_version < 800000
2167	IF_ADDR_UNLOCK(ifp);
2168#else
2169	if_maddr_runlock(ifp);
2170#endif
2171	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2172		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2173		reg_rctl |= E1000_RCTL_MPE;
2174		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2175	} else
2176		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2177
2178	if (adapter->hw.mac.type == e1000_82542 &&
2179	    adapter->hw.revision_id == E1000_REVISION_2) {
2180		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2181		reg_rctl &= ~E1000_RCTL_RST;
2182		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2183		msec_delay(5);
2184		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2185			e1000_pci_set_mwi(&adapter->hw);
2186	}
2187}
2188
2189
2190/*********************************************************************
2191 *  Timer routine
2192 *
2193 *  This routine checks for link status and updates statistics.
2194 *
2195 **********************************************************************/
2196
2197static void
2198em_local_timer(void *arg)
2199{
2200	struct adapter	*adapter = arg;
2201	struct ifnet	*ifp = adapter->ifp;
2202	struct tx_ring	*txr = adapter->tx_rings;
2203	struct rx_ring	*rxr = adapter->rx_rings;
2204	u32		trigger;
2205
2206	EM_CORE_LOCK_ASSERT(adapter);
2207
2208	em_update_link_status(adapter);
2209	em_update_stats_counters(adapter);
2210
2211	/* Reset LAA into RAR[0] on 82571 */
2212	if ((adapter->hw.mac.type == e1000_82571) &&
2213	    e1000_get_laa_state_82571(&adapter->hw))
2214		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2215
2216	/* Mask to use in the irq trigger */
2217	if (adapter->msix_mem)
2218		trigger = rxr->ims; /* RX for 82574 */
2219	else
2220		trigger = E1000_ICS_RXDMT0;
2221
2222	/*
2223	** Don't do TX watchdog check if we've been paused
2224	*/
2225	if (adapter->pause_frames) {
2226		adapter->pause_frames = 0;
2227		goto out;
2228	}
2229	/*
2230	** Check on the state of the TX queue(s), this
2231	** can be done without the lock because its RO
2232	** and the HUNG state will be static if set.
2233	*/
2234	for (int i = 0; i < adapter->num_queues; i++, txr++)
2235		if (txr->queue_status == EM_QUEUE_HUNG)
2236			goto hung;
2237out:
2238	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2239#ifndef DEVICE_POLLING
2240	/* Trigger an RX interrupt to guarantee mbuf refresh */
2241	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2242#endif
2243	return;
2244hung:
2245	/* Looks like we're hung */
2246	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2247	device_printf(adapter->dev,
2248	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2249	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2250	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2251	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2252	    "Next TX to Clean = %d\n",
2253	    txr->me, txr->tx_avail, txr->next_to_clean);
2254	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2255	adapter->watchdog_events++;
2256	em_init_locked(adapter);
2257}
2258
2259
2260static void
2261em_update_link_status(struct adapter *adapter)
2262{
2263	struct e1000_hw *hw = &adapter->hw;
2264	struct ifnet *ifp = adapter->ifp;
2265	device_t dev = adapter->dev;
2266	struct tx_ring *txr = adapter->tx_rings;
2267	u32 link_check = 0;
2268
2269	/* Get the cached link value or read phy for real */
2270	switch (hw->phy.media_type) {
2271	case e1000_media_type_copper:
2272		if (hw->mac.get_link_status) {
2273			/* Do the work to read phy */
2274			e1000_check_for_link(hw);
2275			link_check = !hw->mac.get_link_status;
2276			if (link_check) /* ESB2 fix */
2277				e1000_cfg_on_link_up(hw);
2278		} else
2279			link_check = TRUE;
2280		break;
2281	case e1000_media_type_fiber:
2282		e1000_check_for_link(hw);
2283		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2284                                 E1000_STATUS_LU);
2285		break;
2286	case e1000_media_type_internal_serdes:
2287		e1000_check_for_link(hw);
2288		link_check = adapter->hw.mac.serdes_has_link;
2289		break;
2290	default:
2291	case e1000_media_type_unknown:
2292		break;
2293	}
2294
2295	/* Now check for a transition */
2296	if (link_check && (adapter->link_active == 0)) {
2297		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2298		    &adapter->link_duplex);
2299		/* Check if we must disable SPEED_MODE bit on PCI-E */
2300		if ((adapter->link_speed != SPEED_1000) &&
2301		    ((hw->mac.type == e1000_82571) ||
2302		    (hw->mac.type == e1000_82572))) {
2303			int tarc0;
2304			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2305			tarc0 &= ~SPEED_MODE_BIT;
2306			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2307		}
2308		if (bootverbose)
2309			device_printf(dev, "Link is up %d Mbps %s\n",
2310			    adapter->link_speed,
2311			    ((adapter->link_duplex == FULL_DUPLEX) ?
2312			    "Full Duplex" : "Half Duplex"));
2313		adapter->link_active = 1;
2314		adapter->smartspeed = 0;
2315		ifp->if_baudrate = adapter->link_speed * 1000000;
2316		if_link_state_change(ifp, LINK_STATE_UP);
2317	} else if (!link_check && (adapter->link_active == 1)) {
2318		ifp->if_baudrate = adapter->link_speed = 0;
2319		adapter->link_duplex = 0;
2320		if (bootverbose)
2321			device_printf(dev, "Link is Down\n");
2322		adapter->link_active = 0;
2323		/* Link down, disable watchdog */
2324		for (int i = 0; i < adapter->num_queues; i++, txr++)
2325			txr->queue_status = EM_QUEUE_IDLE;
2326		if_link_state_change(ifp, LINK_STATE_DOWN);
2327	}
2328}
2329
2330/*********************************************************************
2331 *
2332 *  This routine disables all traffic on the adapter by issuing a
2333 *  global reset on the MAC and deallocates TX/RX buffers.
2334 *
2335 *  This routine should always be called with BOTH the CORE
2336 *  and TX locks.
2337 **********************************************************************/
2338
2339static void
2340em_stop(void *arg)
2341{
2342	struct adapter	*adapter = arg;
2343	struct ifnet	*ifp = adapter->ifp;
2344	struct tx_ring	*txr = adapter->tx_rings;
2345
2346	EM_CORE_LOCK_ASSERT(adapter);
2347
2348	INIT_DEBUGOUT("em_stop: begin");
2349
2350	em_disable_intr(adapter);
2351	callout_stop(&adapter->timer);
2352
2353	/* Tell the stack that the interface is no longer active */
2354	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2355
2356        /* Unarm watchdog timer. */
2357	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2358		EM_TX_LOCK(txr);
2359		txr->queue_status = EM_QUEUE_IDLE;
2360		EM_TX_UNLOCK(txr);
2361	}
2362
2363	e1000_reset_hw(&adapter->hw);
2364	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2365
2366	e1000_led_off(&adapter->hw);
2367	e1000_cleanup_led(&adapter->hw);
2368}
2369
2370
2371/*********************************************************************
2372 *
2373 *  Determine hardware revision.
2374 *
2375 **********************************************************************/
2376static void
2377em_identify_hardware(struct adapter *adapter)
2378{
2379	device_t dev = adapter->dev;
2380
2381	/* Make sure our PCI config space has the necessary stuff set */
2382	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2383	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2384	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2385		device_printf(dev, "Memory Access and/or Bus Master bits "
2386		    "were not set!\n");
2387		adapter->hw.bus.pci_cmd_word |=
2388		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2389		pci_write_config(dev, PCIR_COMMAND,
2390		    adapter->hw.bus.pci_cmd_word, 2);
2391	}
2392
2393	/* Save off the information about this board */
2394	adapter->hw.vendor_id = pci_get_vendor(dev);
2395	adapter->hw.device_id = pci_get_device(dev);
2396	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2397	adapter->hw.subsystem_vendor_id =
2398	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2399	adapter->hw.subsystem_device_id =
2400	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2401
2402	/* Do Shared Code Init and Setup */
2403	if (e1000_set_mac_type(&adapter->hw)) {
2404		device_printf(dev, "Setup init failure\n");
2405		return;
2406	}
2407}
2408
2409static int
2410em_allocate_pci_resources(struct adapter *adapter)
2411{
2412	device_t	dev = adapter->dev;
2413	int		rid;
2414
2415	rid = PCIR_BAR(0);
2416	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2417	    &rid, RF_ACTIVE);
2418	if (adapter->memory == NULL) {
2419		device_printf(dev, "Unable to allocate bus resource: memory\n");
2420		return (ENXIO);
2421	}
2422	adapter->osdep.mem_bus_space_tag =
2423	    rman_get_bustag(adapter->memory);
2424	adapter->osdep.mem_bus_space_handle =
2425	    rman_get_bushandle(adapter->memory);
2426	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2427
2428	/* Default to a single queue */
2429	adapter->num_queues = 1;
2430
2431	/*
2432	 * Setup MSI/X or MSI if PCI Express
2433	 */
2434	adapter->msix = em_setup_msix(adapter);
2435
2436	adapter->hw.back = &adapter->osdep;
2437
2438	return (0);
2439}
2440
2441/*********************************************************************
2442 *
2443 *  Setup the Legacy or MSI Interrupt handler
2444 *
2445 **********************************************************************/
2446int
2447em_allocate_legacy(struct adapter *adapter)
2448{
2449	device_t dev = adapter->dev;
2450	int error, rid = 0;
2451
2452	/* Manually turn off all interrupts */
2453	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2454
2455	if (adapter->msix == 1) /* using MSI */
2456		rid = 1;
2457	/* We allocate a single interrupt resource */
2458	adapter->res = bus_alloc_resource_any(dev,
2459	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2460	if (adapter->res == NULL) {
2461		device_printf(dev, "Unable to allocate bus resource: "
2462		    "interrupt\n");
2463		return (ENXIO);
2464	}
2465
2466	/*
2467	 * Allocate a fast interrupt and the associated
2468	 * deferred processing contexts.
2469	 */
2470	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2471	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2472	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2473	    taskqueue_thread_enqueue, &adapter->tq);
2474	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2475	    device_get_nameunit(adapter->dev));
2476	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2477	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2478		device_printf(dev, "Failed to register fast interrupt "
2479			    "handler: %d\n", error);
2480		taskqueue_free(adapter->tq);
2481		adapter->tq = NULL;
2482		return (error);
2483	}
2484
2485	return (0);
2486}
2487
2488/*********************************************************************
2489 *
2490 *  Setup the MSIX Interrupt handlers
2491 *   This is not really Multiqueue, rather
2492 *   its just multiple interrupt vectors.
2493 *
2494 **********************************************************************/
2495int
2496em_allocate_msix(struct adapter *adapter)
2497{
2498	device_t	dev = adapter->dev;
2499	struct		tx_ring *txr = adapter->tx_rings;
2500	struct		rx_ring *rxr = adapter->rx_rings;
2501	int		error, rid, vector = 0;
2502
2503
2504	/* Make sure all interrupts are disabled */
2505	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2506
2507	/* First set up ring resources */
2508	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2509
2510		/* RX ring */
2511		rid = vector + 1;
2512
2513		rxr->res = bus_alloc_resource_any(dev,
2514		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2515		if (rxr->res == NULL) {
2516			device_printf(dev,
2517			    "Unable to allocate bus resource: "
2518			    "RX MSIX Interrupt %d\n", i);
2519			return (ENXIO);
2520		}
2521		if ((error = bus_setup_intr(dev, rxr->res,
2522		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2523		    rxr, &rxr->tag)) != 0) {
2524			device_printf(dev, "Failed to register RX handler");
2525			return (error);
2526		}
2527#if __FreeBSD_version >= 800504
2528		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2529#endif
2530		rxr->msix = vector++; /* NOTE increment vector for TX */
2531		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2532		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2533		    taskqueue_thread_enqueue, &rxr->tq);
2534		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2535		    device_get_nameunit(adapter->dev));
2536		/*
2537		** Set the bit to enable interrupt
2538		** in E1000_IMS -- bits 20 and 21
2539		** are for RX0 and RX1, note this has
2540		** NOTHING to do with the MSIX vector
2541		*/
2542		rxr->ims = 1 << (20 + i);
2543		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2544
2545		/* TX ring */
2546		rid = vector + 1;
2547		txr->res = bus_alloc_resource_any(dev,
2548		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2549		if (txr->res == NULL) {
2550			device_printf(dev,
2551			    "Unable to allocate bus resource: "
2552			    "TX MSIX Interrupt %d\n", i);
2553			return (ENXIO);
2554		}
2555		if ((error = bus_setup_intr(dev, txr->res,
2556		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2557		    txr, &txr->tag)) != 0) {
2558			device_printf(dev, "Failed to register TX handler");
2559			return (error);
2560		}
2561#if __FreeBSD_version >= 800504
2562		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2563#endif
2564		txr->msix = vector++; /* Increment vector for next pass */
2565		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2566		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2567		    taskqueue_thread_enqueue, &txr->tq);
2568		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2569		    device_get_nameunit(adapter->dev));
2570		/*
2571		** Set the bit to enable interrupt
2572		** in E1000_IMS -- bits 22 and 23
2573		** are for TX0 and TX1, note this has
2574		** NOTHING to do with the MSIX vector
2575		*/
2576		txr->ims = 1 << (22 + i);
2577		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2578	}
2579
2580	/* Link interrupt */
2581	++rid;
2582	adapter->res = bus_alloc_resource_any(dev,
2583	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2584	if (!adapter->res) {
2585		device_printf(dev,"Unable to allocate "
2586		    "bus resource: Link interrupt [%d]\n", rid);
2587		return (ENXIO);
2588        }
2589	/* Set the link handler function */
2590	error = bus_setup_intr(dev, adapter->res,
2591	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2592	    em_msix_link, adapter, &adapter->tag);
2593	if (error) {
2594		adapter->res = NULL;
2595		device_printf(dev, "Failed to register LINK handler");
2596		return (error);
2597	}
2598#if __FreeBSD_version >= 800504
2599		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2600#endif
2601	adapter->linkvec = vector;
2602	adapter->ivars |=  (8 | vector) << 16;
2603	adapter->ivars |= 0x80000000;
2604
2605	return (0);
2606}
2607
2608
2609static void
2610em_free_pci_resources(struct adapter *adapter)
2611{
2612	device_t	dev = adapter->dev;
2613	struct tx_ring	*txr;
2614	struct rx_ring	*rxr;
2615	int		rid;
2616
2617
2618	/*
2619	** Release all the queue interrupt resources:
2620	*/
2621	for (int i = 0; i < adapter->num_queues; i++) {
2622		txr = &adapter->tx_rings[i];
2623		rxr = &adapter->rx_rings[i];
2624		/* an early abort? */
2625		if ((txr == NULL) || (rxr == NULL))
2626			break;
2627		rid = txr->msix +1;
2628		if (txr->tag != NULL) {
2629			bus_teardown_intr(dev, txr->res, txr->tag);
2630			txr->tag = NULL;
2631		}
2632		if (txr->res != NULL)
2633			bus_release_resource(dev, SYS_RES_IRQ,
2634			    rid, txr->res);
2635		rid = rxr->msix +1;
2636		if (rxr->tag != NULL) {
2637			bus_teardown_intr(dev, rxr->res, rxr->tag);
2638			rxr->tag = NULL;
2639		}
2640		if (rxr->res != NULL)
2641			bus_release_resource(dev, SYS_RES_IRQ,
2642			    rid, rxr->res);
2643	}
2644
2645        if (adapter->linkvec) /* we are doing MSIX */
2646                rid = adapter->linkvec + 1;
2647        else
2648                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2649
2650	if (adapter->tag != NULL) {
2651		bus_teardown_intr(dev, adapter->res, adapter->tag);
2652		adapter->tag = NULL;
2653	}
2654
2655	if (adapter->res != NULL)
2656		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2657
2658
2659	if (adapter->msix)
2660		pci_release_msi(dev);
2661
2662	if (adapter->msix_mem != NULL)
2663		bus_release_resource(dev, SYS_RES_MEMORY,
2664		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2665
2666	if (adapter->memory != NULL)
2667		bus_release_resource(dev, SYS_RES_MEMORY,
2668		    PCIR_BAR(0), adapter->memory);
2669
2670	if (adapter->flash != NULL)
2671		bus_release_resource(dev, SYS_RES_MEMORY,
2672		    EM_FLASH, adapter->flash);
2673}
2674
2675/*
2676 * Setup MSI or MSI/X
2677 */
2678static int
2679em_setup_msix(struct adapter *adapter)
2680{
2681	device_t dev = adapter->dev;
2682	int val = 0;
2683
2684
2685	/*
2686	** Setup MSI/X for Hartwell: tests have shown
2687	** use of two queues to be unstable, and to
2688	** provide no great gain anyway, so we simply
2689	** seperate the interrupts and use a single queue.
2690	*/
2691	if ((adapter->hw.mac.type == e1000_82574) &&
2692	    (em_enable_msix == TRUE)) {
2693		/* Map the MSIX BAR */
2694		int rid = PCIR_BAR(EM_MSIX_BAR);
2695		adapter->msix_mem = bus_alloc_resource_any(dev,
2696		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2697       		if (!adapter->msix_mem) {
2698			/* May not be enabled */
2699               		device_printf(adapter->dev,
2700			    "Unable to map MSIX table \n");
2701			goto msi;
2702       		}
2703		val = pci_msix_count(dev);
2704		if (val < 3) {
2705			bus_release_resource(dev, SYS_RES_MEMORY,
2706			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2707			adapter->msix_mem = NULL;
2708               		device_printf(adapter->dev,
2709			    "MSIX: insufficient vectors, using MSI\n");
2710			goto msi;
2711		}
2712		val = 3;
2713		adapter->num_queues = 1;
2714		if (pci_alloc_msix(dev, &val) == 0) {
2715			device_printf(adapter->dev,
2716			    "Using MSIX interrupts "
2717			    "with %d vectors\n", val);
2718		}
2719
2720		return (val);
2721	}
2722msi:
2723       	val = pci_msi_count(dev);
2724       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2725               	adapter->msix = 1;
2726               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2727		return (val);
2728	}
2729	/* Should only happen due to manual configuration */
2730	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2731	return (0);
2732}
2733
2734
2735/*********************************************************************
2736 *
2737 *  Initialize the hardware to a configuration
2738 *  as specified by the adapter structure.
2739 *
2740 **********************************************************************/
2741static void
2742em_reset(struct adapter *adapter)
2743{
2744	device_t	dev = adapter->dev;
2745	struct ifnet	*ifp = adapter->ifp;
2746	struct e1000_hw	*hw = &adapter->hw;
2747	u16		rx_buffer_size;
2748
2749	INIT_DEBUGOUT("em_reset: begin");
2750
2751	/* Set up smart power down as default off on newer adapters. */
2752	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2753	    hw->mac.type == e1000_82572)) {
2754		u16 phy_tmp = 0;
2755
2756		/* Speed up time to link by disabling smart power down. */
2757		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2758		phy_tmp &= ~IGP02E1000_PM_SPD;
2759		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2760	}
2761
2762	/*
2763	 * These parameters control the automatic generation (Tx) and
2764	 * response (Rx) to Ethernet PAUSE frames.
2765	 * - High water mark should allow for at least two frames to be
2766	 *   received after sending an XOFF.
2767	 * - Low water mark works best when it is very near the high water mark.
2768	 *   This allows the receiver to restart by sending XON when it has
2769	 *   drained a bit. Here we use an arbitary value of 1500 which will
2770	 *   restart after one full frame is pulled from the buffer. There
2771	 *   could be several smaller frames in the buffer and if so they will
2772	 *   not trigger the XON until their total number reduces the buffer
2773	 *   by 1500.
2774	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2775	 */
2776	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2777
2778	hw->fc.high_water = rx_buffer_size -
2779	    roundup2(adapter->max_frame_size, 1024);
2780	hw->fc.low_water = hw->fc.high_water - 1500;
2781
2782	if (hw->mac.type == e1000_80003es2lan)
2783		hw->fc.pause_time = 0xFFFF;
2784	else
2785		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2786
2787	hw->fc.send_xon = TRUE;
2788
2789        /* Set Flow control, use the tunable location if sane */
2790	hw->fc.requested_mode = adapter->fc_setting;
2791
2792	/* Workaround: no TX flow ctrl for PCH */
2793	if (hw->mac.type == e1000_pchlan)
2794                hw->fc.requested_mode = e1000_fc_rx_pause;
2795
2796	/* Override - settings for PCH2LAN, ya its magic :) */
2797	if (hw->mac.type == e1000_pch2lan) {
2798		hw->fc.high_water = 0x5C20;
2799		hw->fc.low_water = 0x5048;
2800		hw->fc.pause_time = 0x0650;
2801		hw->fc.refresh_time = 0x0400;
2802		/* Jumbos need adjusted PBA */
2803		if (ifp->if_mtu > ETHERMTU)
2804			E1000_WRITE_REG(hw, E1000_PBA, 12);
2805		else
2806			E1000_WRITE_REG(hw, E1000_PBA, 26);
2807	}
2808
2809	/* Issue a global reset */
2810	e1000_reset_hw(hw);
2811	E1000_WRITE_REG(hw, E1000_WUC, 0);
2812	em_disable_aspm(adapter);
2813
2814	if (e1000_init_hw(hw) < 0) {
2815		device_printf(dev, "Hardware Initialization Failed\n");
2816		return;
2817	}
2818
2819	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2820	e1000_get_phy_info(hw);
2821	e1000_check_for_link(hw);
2822	return;
2823}
2824
2825/*********************************************************************
2826 *
2827 *  Setup networking device structure and register an interface.
2828 *
2829 **********************************************************************/
2830static int
2831em_setup_interface(device_t dev, struct adapter *adapter)
2832{
2833	struct ifnet   *ifp;
2834
2835	INIT_DEBUGOUT("em_setup_interface: begin");
2836
2837	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2838	if (ifp == NULL) {
2839		device_printf(dev, "can not allocate ifnet structure\n");
2840		return (-1);
2841	}
2842	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2843	ifp->if_mtu = ETHERMTU;
2844	ifp->if_init =  em_init;
2845	ifp->if_softc = adapter;
2846	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2847	ifp->if_ioctl = em_ioctl;
2848	ifp->if_start = em_start;
2849	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2850	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2851	IFQ_SET_READY(&ifp->if_snd);
2852
2853	ether_ifattach(ifp, adapter->hw.mac.addr);
2854
2855	ifp->if_capabilities = ifp->if_capenable = 0;
2856
2857#ifdef EM_MULTIQUEUE
2858	/* Multiqueue tx functions */
2859	ifp->if_transmit = em_mq_start;
2860	ifp->if_qflush = em_qflush;
2861#endif
2862
2863	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2864	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2865
2866	/* Enable TSO by default, can disable with ifconfig */
2867	ifp->if_capabilities |= IFCAP_TSO4;
2868	ifp->if_capenable |= IFCAP_TSO4;
2869
2870	/*
2871	 * Tell the upper layer(s) we
2872	 * support full VLAN capability
2873	 */
2874	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2875	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2876	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2877
2878	/*
2879	** Dont turn this on by default, if vlans are
2880	** created on another pseudo device (eg. lagg)
2881	** then vlan events are not passed thru, breaking
2882	** operation, but with HW FILTER off it works. If
2883	** using vlans directly on the em driver you can
2884	** enable this and get full hardware tag filtering.
2885	*/
2886	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2887
2888#ifdef DEVICE_POLLING
2889	ifp->if_capabilities |= IFCAP_POLLING;
2890#endif
2891
2892	/* Enable only WOL MAGIC by default */
2893	if (adapter->wol) {
2894		ifp->if_capabilities |= IFCAP_WOL;
2895		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2896	}
2897
2898	/*
2899	 * Specify the media types supported by this adapter and register
2900	 * callbacks to update media and link information
2901	 */
2902	ifmedia_init(&adapter->media, IFM_IMASK,
2903	    em_media_change, em_media_status);
2904	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2905	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2906		u_char fiber_type = IFM_1000_SX;	/* default type */
2907
2908		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2909			    0, NULL);
2910		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2911	} else {
2912		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2913		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2914			    0, NULL);
2915		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2916			    0, NULL);
2917		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2918			    0, NULL);
2919		if (adapter->hw.phy.type != e1000_phy_ife) {
2920			ifmedia_add(&adapter->media,
2921				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2922			ifmedia_add(&adapter->media,
2923				IFM_ETHER | IFM_1000_T, 0, NULL);
2924		}
2925	}
2926	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2927	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2928	return (0);
2929}
2930
2931
2932/*
2933 * Manage DMA'able memory.
2934 */
2935static void
2936em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2937{
2938	if (error)
2939		return;
2940	*(bus_addr_t *) arg = segs[0].ds_addr;
2941}
2942
2943static int
2944em_dma_malloc(struct adapter *adapter, bus_size_t size,
2945        struct em_dma_alloc *dma, int mapflags)
2946{
2947	int error;
2948
2949	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2950				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2951				BUS_SPACE_MAXADDR,	/* lowaddr */
2952				BUS_SPACE_MAXADDR,	/* highaddr */
2953				NULL, NULL,		/* filter, filterarg */
2954				size,			/* maxsize */
2955				1,			/* nsegments */
2956				size,			/* maxsegsize */
2957				0,			/* flags */
2958				NULL,			/* lockfunc */
2959				NULL,			/* lockarg */
2960				&dma->dma_tag);
2961	if (error) {
2962		device_printf(adapter->dev,
2963		    "%s: bus_dma_tag_create failed: %d\n",
2964		    __func__, error);
2965		goto fail_0;
2966	}
2967
2968	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2969	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2970	if (error) {
2971		device_printf(adapter->dev,
2972		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2973		    __func__, (uintmax_t)size, error);
2974		goto fail_2;
2975	}
2976
2977	dma->dma_paddr = 0;
2978	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2979	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2980	if (error || dma->dma_paddr == 0) {
2981		device_printf(adapter->dev,
2982		    "%s: bus_dmamap_load failed: %d\n",
2983		    __func__, error);
2984		goto fail_3;
2985	}
2986
2987	return (0);
2988
2989fail_3:
2990	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2991fail_2:
2992	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2993	bus_dma_tag_destroy(dma->dma_tag);
2994fail_0:
2995	dma->dma_map = NULL;
2996	dma->dma_tag = NULL;
2997
2998	return (error);
2999}
3000
3001static void
3002em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3003{
3004	if (dma->dma_tag == NULL)
3005		return;
3006	if (dma->dma_map != NULL) {
3007		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3008		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3009		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3010		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3011		dma->dma_map = NULL;
3012	}
3013	bus_dma_tag_destroy(dma->dma_tag);
3014	dma->dma_tag = NULL;
3015}
3016
3017
3018/*********************************************************************
3019 *
3020 *  Allocate memory for the transmit and receive rings, and then
3021 *  the descriptors associated with each, called only once at attach.
3022 *
3023 **********************************************************************/
3024static int
3025em_allocate_queues(struct adapter *adapter)
3026{
3027	device_t		dev = adapter->dev;
3028	struct tx_ring		*txr = NULL;
3029	struct rx_ring		*rxr = NULL;
3030	int rsize, tsize, error = E1000_SUCCESS;
3031	int txconf = 0, rxconf = 0;
3032
3033
3034	/* Allocate the TX ring struct memory */
3035	if (!(adapter->tx_rings =
3036	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3037	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3038		device_printf(dev, "Unable to allocate TX ring memory\n");
3039		error = ENOMEM;
3040		goto fail;
3041	}
3042
3043	/* Now allocate the RX */
3044	if (!(adapter->rx_rings =
3045	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3046	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3047		device_printf(dev, "Unable to allocate RX ring memory\n");
3048		error = ENOMEM;
3049		goto rx_fail;
3050	}
3051
3052	tsize = roundup2(adapter->num_tx_desc *
3053	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3054	/*
3055	 * Now set up the TX queues, txconf is needed to handle the
3056	 * possibility that things fail midcourse and we need to
3057	 * undo memory gracefully
3058	 */
3059	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3060		/* Set up some basics */
3061		txr = &adapter->tx_rings[i];
3062		txr->adapter = adapter;
3063		txr->me = i;
3064
3065		/* Initialize the TX lock */
3066		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3067		    device_get_nameunit(dev), txr->me);
3068		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3069
3070		if (em_dma_malloc(adapter, tsize,
3071			&txr->txdma, BUS_DMA_NOWAIT)) {
3072			device_printf(dev,
3073			    "Unable to allocate TX Descriptor memory\n");
3074			error = ENOMEM;
3075			goto err_tx_desc;
3076		}
3077		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3078		bzero((void *)txr->tx_base, tsize);
3079
3080        	if (em_allocate_transmit_buffers(txr)) {
3081			device_printf(dev,
3082			    "Critical Failure setting up transmit buffers\n");
3083			error = ENOMEM;
3084			goto err_tx_desc;
3085        	}
3086#if __FreeBSD_version >= 800000
3087		/* Allocate a buf ring */
3088		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3089		    M_WAITOK, &txr->tx_mtx);
3090#endif
3091	}
3092
3093	/*
3094	 * Next the RX queues...
3095	 */
3096	rsize = roundup2(adapter->num_rx_desc *
3097	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3098	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3099		rxr = &adapter->rx_rings[i];
3100		rxr->adapter = adapter;
3101		rxr->me = i;
3102
3103		/* Initialize the RX lock */
3104		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3105		    device_get_nameunit(dev), txr->me);
3106		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3107
3108		if (em_dma_malloc(adapter, rsize,
3109			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3110			device_printf(dev,
3111			    "Unable to allocate RxDescriptor memory\n");
3112			error = ENOMEM;
3113			goto err_rx_desc;
3114		}
3115		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3116		bzero((void *)rxr->rx_base, rsize);
3117
3118        	/* Allocate receive buffers for the ring*/
3119		if (em_allocate_receive_buffers(rxr)) {
3120			device_printf(dev,
3121			    "Critical Failure setting up receive buffers\n");
3122			error = ENOMEM;
3123			goto err_rx_desc;
3124		}
3125	}
3126
3127	return (0);
3128
3129err_rx_desc:
3130	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3131		em_dma_free(adapter, &rxr->rxdma);
3132err_tx_desc:
3133	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3134		em_dma_free(adapter, &txr->txdma);
3135	free(adapter->rx_rings, M_DEVBUF);
3136rx_fail:
3137#if __FreeBSD_version >= 800000
3138	buf_ring_free(txr->br, M_DEVBUF);
3139#endif
3140	free(adapter->tx_rings, M_DEVBUF);
3141fail:
3142	return (error);
3143}
3144
3145
3146/*********************************************************************
3147 *
3148 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3149 *  the information needed to transmit a packet on the wire. This is
3150 *  called only once at attach, setup is done every reset.
3151 *
3152 **********************************************************************/
3153static int
3154em_allocate_transmit_buffers(struct tx_ring *txr)
3155{
3156	struct adapter *adapter = txr->adapter;
3157	device_t dev = adapter->dev;
3158	struct em_buffer *txbuf;
3159	int error, i;
3160
3161	/*
3162	 * Setup DMA descriptor areas.
3163	 */
3164	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3165			       1, 0,			/* alignment, bounds */
3166			       BUS_SPACE_MAXADDR,	/* lowaddr */
3167			       BUS_SPACE_MAXADDR,	/* highaddr */
3168			       NULL, NULL,		/* filter, filterarg */
3169			       EM_TSO_SIZE,		/* maxsize */
3170			       EM_MAX_SCATTER,		/* nsegments */
3171			       PAGE_SIZE,		/* maxsegsize */
3172			       0,			/* flags */
3173			       NULL,			/* lockfunc */
3174			       NULL,			/* lockfuncarg */
3175			       &txr->txtag))) {
3176		device_printf(dev,"Unable to allocate TX DMA tag\n");
3177		goto fail;
3178	}
3179
3180	if (!(txr->tx_buffers =
3181	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3182	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3184		error = ENOMEM;
3185		goto fail;
3186	}
3187
3188        /* Create the descriptor buffer dma maps */
3189	txbuf = txr->tx_buffers;
3190	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3191		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3192		if (error != 0) {
3193			device_printf(dev, "Unable to create TX DMA map\n");
3194			goto fail;
3195		}
3196	}
3197
3198	return 0;
3199fail:
3200	/* We free all, it handles case where we are in the middle */
3201	em_free_transmit_structures(adapter);
3202	return (error);
3203}
3204
3205/*********************************************************************
3206 *
3207 *  Initialize a transmit ring.
3208 *
3209 **********************************************************************/
3210static void
3211em_setup_transmit_ring(struct tx_ring *txr)
3212{
3213	struct adapter *adapter = txr->adapter;
3214	struct em_buffer *txbuf;
3215	int i;
3216
3217	/* Clear the old descriptor contents */
3218	EM_TX_LOCK(txr);
3219	bzero((void *)txr->tx_base,
3220	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3221	/* Reset indices */
3222	txr->next_avail_desc = 0;
3223	txr->next_to_clean = 0;
3224
3225	/* Free any existing tx buffers. */
3226        txbuf = txr->tx_buffers;
3227	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3228		if (txbuf->m_head != NULL) {
3229			bus_dmamap_sync(txr->txtag, txbuf->map,
3230			    BUS_DMASYNC_POSTWRITE);
3231			bus_dmamap_unload(txr->txtag, txbuf->map);
3232			m_freem(txbuf->m_head);
3233			txbuf->m_head = NULL;
3234		}
3235		/* clear the watch index */
3236		txbuf->next_eop = -1;
3237        }
3238
3239	/* Set number of descriptors available */
3240	txr->tx_avail = adapter->num_tx_desc;
3241	txr->queue_status = EM_QUEUE_IDLE;
3242
3243	/* Clear checksum offload context. */
3244	txr->last_hw_offload = 0;
3245	txr->last_hw_ipcss = 0;
3246	txr->last_hw_ipcso = 0;
3247	txr->last_hw_tucss = 0;
3248	txr->last_hw_tucso = 0;
3249
3250	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3251	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3252	EM_TX_UNLOCK(txr);
3253}
3254
3255/*********************************************************************
3256 *
3257 *  Initialize all transmit rings.
3258 *
3259 **********************************************************************/
3260static void
3261em_setup_transmit_structures(struct adapter *adapter)
3262{
3263	struct tx_ring *txr = adapter->tx_rings;
3264
3265	for (int i = 0; i < adapter->num_queues; i++, txr++)
3266		em_setup_transmit_ring(txr);
3267
3268	return;
3269}
3270
3271/*********************************************************************
3272 *
3273 *  Enable transmit unit.
3274 *
3275 **********************************************************************/
3276static void
3277em_initialize_transmit_unit(struct adapter *adapter)
3278{
3279	struct tx_ring	*txr = adapter->tx_rings;
3280	struct e1000_hw	*hw = &adapter->hw;
3281	u32	tctl, tarc, tipg = 0;
3282
3283	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3284
3285	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3286		u64 bus_addr = txr->txdma.dma_paddr;
3287		/* Base and Len of TX Ring */
3288		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3289	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3290		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3291	    	    (u32)(bus_addr >> 32));
3292		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3293	    	    (u32)bus_addr);
3294		/* Init the HEAD/TAIL indices */
3295		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3296		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3297
3298		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3299		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3300		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3301
3302		txr->queue_status = EM_QUEUE_IDLE;
3303	}
3304
3305	/* Set the default values for the Tx Inter Packet Gap timer */
3306	switch (adapter->hw.mac.type) {
3307	case e1000_82542:
3308		tipg = DEFAULT_82542_TIPG_IPGT;
3309		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3310		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3311		break;
3312	case e1000_80003es2lan:
3313		tipg = DEFAULT_82543_TIPG_IPGR1;
3314		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3315		    E1000_TIPG_IPGR2_SHIFT;
3316		break;
3317	default:
3318		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3319		    (adapter->hw.phy.media_type ==
3320		    e1000_media_type_internal_serdes))
3321			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3322		else
3323			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3324		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3325		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3326	}
3327
3328	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3329	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3330
3331	if(adapter->hw.mac.type >= e1000_82540)
3332		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3333		    adapter->tx_abs_int_delay.value);
3334
3335	if ((adapter->hw.mac.type == e1000_82571) ||
3336	    (adapter->hw.mac.type == e1000_82572)) {
3337		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3338		tarc |= SPEED_MODE_BIT;
3339		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3340	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3341		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3342		tarc |= 1;
3343		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3344		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3345		tarc |= 1;
3346		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3347	}
3348
3349	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3350	if (adapter->tx_int_delay.value > 0)
3351		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3352
3353	/* Program the Transmit Control Register */
3354	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3355	tctl &= ~E1000_TCTL_CT;
3356	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3357		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3358
3359	if (adapter->hw.mac.type >= e1000_82571)
3360		tctl |= E1000_TCTL_MULR;
3361
3362	/* This write will effectively turn on the transmit unit. */
3363	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3364
3365}
3366
3367
3368/*********************************************************************
3369 *
3370 *  Free all transmit rings.
3371 *
3372 **********************************************************************/
3373static void
3374em_free_transmit_structures(struct adapter *adapter)
3375{
3376	struct tx_ring *txr = adapter->tx_rings;
3377
3378	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3379		EM_TX_LOCK(txr);
3380		em_free_transmit_buffers(txr);
3381		em_dma_free(adapter, &txr->txdma);
3382		EM_TX_UNLOCK(txr);
3383		EM_TX_LOCK_DESTROY(txr);
3384	}
3385
3386	free(adapter->tx_rings, M_DEVBUF);
3387}
3388
3389/*********************************************************************
3390 *
3391 *  Free transmit ring related data structures.
3392 *
3393 **********************************************************************/
3394static void
3395em_free_transmit_buffers(struct tx_ring *txr)
3396{
3397	struct adapter		*adapter = txr->adapter;
3398	struct em_buffer	*txbuf;
3399
3400	INIT_DEBUGOUT("free_transmit_ring: begin");
3401
3402	if (txr->tx_buffers == NULL)
3403		return;
3404
3405	for (int i = 0; i < adapter->num_tx_desc; i++) {
3406		txbuf = &txr->tx_buffers[i];
3407		if (txbuf->m_head != NULL) {
3408			bus_dmamap_sync(txr->txtag, txbuf->map,
3409			    BUS_DMASYNC_POSTWRITE);
3410			bus_dmamap_unload(txr->txtag,
3411			    txbuf->map);
3412			m_freem(txbuf->m_head);
3413			txbuf->m_head = NULL;
3414			if (txbuf->map != NULL) {
3415				bus_dmamap_destroy(txr->txtag,
3416				    txbuf->map);
3417				txbuf->map = NULL;
3418			}
3419		} else if (txbuf->map != NULL) {
3420			bus_dmamap_unload(txr->txtag,
3421			    txbuf->map);
3422			bus_dmamap_destroy(txr->txtag,
3423			    txbuf->map);
3424			txbuf->map = NULL;
3425		}
3426	}
3427#if __FreeBSD_version >= 800000
3428	if (txr->br != NULL)
3429		buf_ring_free(txr->br, M_DEVBUF);
3430#endif
3431	if (txr->tx_buffers != NULL) {
3432		free(txr->tx_buffers, M_DEVBUF);
3433		txr->tx_buffers = NULL;
3434	}
3435	if (txr->txtag != NULL) {
3436		bus_dma_tag_destroy(txr->txtag);
3437		txr->txtag = NULL;
3438	}
3439	return;
3440}
3441
3442
3443/*********************************************************************
3444 *  The offload context is protocol specific (TCP/UDP) and thus
3445 *  only needs to be set when the protocol changes. The occasion
3446 *  of a context change can be a performance detriment, and
3447 *  might be better just disabled. The reason arises in the way
3448 *  in which the controller supports pipelined requests from the
3449 *  Tx data DMA. Up to four requests can be pipelined, and they may
3450 *  belong to the same packet or to multiple packets. However all
3451 *  requests for one packet are issued before a request is issued
3452 *  for a subsequent packet and if a request for the next packet
3453 *  requires a context change, that request will be stalled
3454 *  until the previous request completes. This means setting up
3455 *  a new context effectively disables pipelined Tx data DMA which
3456 *  in turn greatly slow down performance to send small sized
3457 *  frames.
3458 **********************************************************************/
3459static void
3460em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3461    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3462{
3463	struct adapter			*adapter = txr->adapter;
3464	struct e1000_context_desc	*TXD = NULL;
3465	struct em_buffer		*tx_buffer;
3466	int				cur, hdr_len;
3467	u32				cmd = 0;
3468	u16				offload = 0;
3469	u8				ipcso, ipcss, tucso, tucss;
3470
3471	ipcss = ipcso = tucss = tucso = 0;
3472	hdr_len = ip_off + (ip->ip_hl << 2);
3473	cur = txr->next_avail_desc;
3474
3475	/* Setup of IP header checksum. */
3476	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3477		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3478		offload |= CSUM_IP;
3479		ipcss = ip_off;
3480		ipcso = ip_off + offsetof(struct ip, ip_sum);
3481		/*
3482		 * Start offset for header checksum calculation.
3483		 * End offset for header checksum calculation.
3484		 * Offset of place to put the checksum.
3485		 */
3486		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3487		TXD->lower_setup.ip_fields.ipcss = ipcss;
3488		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3489		TXD->lower_setup.ip_fields.ipcso = ipcso;
3490		cmd |= E1000_TXD_CMD_IP;
3491	}
3492
3493	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3494 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3495 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3496 		offload |= CSUM_TCP;
3497 		tucss = hdr_len;
3498 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3499 		/*
3500 		 * Setting up new checksum offload context for every frames
3501 		 * takes a lot of processing time for hardware. This also
3502 		 * reduces performance a lot for small sized frames so avoid
3503 		 * it if driver can use previously configured checksum
3504 		 * offload context.
3505 		 */
3506 		if (txr->last_hw_offload == offload) {
3507 			if (offload & CSUM_IP) {
3508 				if (txr->last_hw_ipcss == ipcss &&
3509 				    txr->last_hw_ipcso == ipcso &&
3510 				    txr->last_hw_tucss == tucss &&
3511 				    txr->last_hw_tucso == tucso)
3512 					return;
3513 			} else {
3514 				if (txr->last_hw_tucss == tucss &&
3515 				    txr->last_hw_tucso == tucso)
3516 					return;
3517 			}
3518  		}
3519 		txr->last_hw_offload = offload;
3520 		txr->last_hw_tucss = tucss;
3521 		txr->last_hw_tucso = tucso;
3522 		/*
3523 		 * Start offset for payload checksum calculation.
3524 		 * End offset for payload checksum calculation.
3525 		 * Offset of place to put the checksum.
3526 		 */
3527		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3528 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3529 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3530 		TXD->upper_setup.tcp_fields.tucso = tucso;
3531 		cmd |= E1000_TXD_CMD_TCP;
3532 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3533 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3534 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3535 		tucss = hdr_len;
3536 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3537 		/*
3538 		 * Setting up new checksum offload context for every frames
3539 		 * takes a lot of processing time for hardware. This also
3540 		 * reduces performance a lot for small sized frames so avoid
3541 		 * it if driver can use previously configured checksum
3542 		 * offload context.
3543 		 */
3544 		if (txr->last_hw_offload == offload) {
3545 			if (offload & CSUM_IP) {
3546 				if (txr->last_hw_ipcss == ipcss &&
3547 				    txr->last_hw_ipcso == ipcso &&
3548 				    txr->last_hw_tucss == tucss &&
3549 				    txr->last_hw_tucso == tucso)
3550 					return;
3551 			} else {
3552 				if (txr->last_hw_tucss == tucss &&
3553 				    txr->last_hw_tucso == tucso)
3554 					return;
3555 			}
3556 		}
3557 		txr->last_hw_offload = offload;
3558 		txr->last_hw_tucss = tucss;
3559 		txr->last_hw_tucso = tucso;
3560 		/*
3561 		 * Start offset for header checksum calculation.
3562 		 * End offset for header checksum calculation.
3563 		 * Offset of place to put the checksum.
3564 		 */
3565		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3566 		TXD->upper_setup.tcp_fields.tucss = tucss;
3567 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3568 		TXD->upper_setup.tcp_fields.tucso = tucso;
3569  	}
3570
3571 	if (offload & CSUM_IP) {
3572 		txr->last_hw_ipcss = ipcss;
3573 		txr->last_hw_ipcso = ipcso;
3574  	}
3575
3576	TXD->tcp_seg_setup.data = htole32(0);
3577	TXD->cmd_and_length =
3578	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3579	tx_buffer = &txr->tx_buffers[cur];
3580	tx_buffer->m_head = NULL;
3581	tx_buffer->next_eop = -1;
3582
3583	if (++cur == adapter->num_tx_desc)
3584		cur = 0;
3585
3586	txr->tx_avail--;
3587	txr->next_avail_desc = cur;
3588}
3589
3590
3591/**********************************************************************
3592 *
3593 *  Setup work for hardware segmentation offload (TSO)
3594 *
3595 **********************************************************************/
3596static void
3597em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3598    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3599{
3600	struct adapter			*adapter = txr->adapter;
3601	struct e1000_context_desc	*TXD;
3602	struct em_buffer		*tx_buffer;
3603	int cur, hdr_len;
3604
3605	/*
3606	 * In theory we can use the same TSO context if and only if
3607	 * frame is the same type(IP/TCP) and the same MSS. However
3608	 * checking whether a frame has the same IP/TCP structure is
3609	 * hard thing so just ignore that and always restablish a
3610	 * new TSO context.
3611	 */
3612	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3613	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3614		      E1000_TXD_DTYP_D |	/* Data descr type */
3615		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3616
3617	/* IP and/or TCP header checksum calculation and insertion. */
3618	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3619
3620	cur = txr->next_avail_desc;
3621	tx_buffer = &txr->tx_buffers[cur];
3622	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3623
3624	/*
3625	 * Start offset for header checksum calculation.
3626	 * End offset for header checksum calculation.
3627	 * Offset of place put the checksum.
3628	 */
3629	TXD->lower_setup.ip_fields.ipcss = ip_off;
3630	TXD->lower_setup.ip_fields.ipcse =
3631	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3632	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3633	/*
3634	 * Start offset for payload checksum calculation.
3635	 * End offset for payload checksum calculation.
3636	 * Offset of place to put the checksum.
3637	 */
3638	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3639	TXD->upper_setup.tcp_fields.tucse = 0;
3640	TXD->upper_setup.tcp_fields.tucso =
3641	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3642	/*
3643	 * Payload size per packet w/o any headers.
3644	 * Length of all headers up to payload.
3645	 */
3646	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3647	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3648
3649	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3650				E1000_TXD_CMD_DEXT |	/* Extended descr */
3651				E1000_TXD_CMD_TSE |	/* TSE context */
3652				E1000_TXD_CMD_IP |	/* Do IP csum */
3653				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3654				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3655
3656	tx_buffer->m_head = NULL;
3657	tx_buffer->next_eop = -1;
3658
3659	if (++cur == adapter->num_tx_desc)
3660		cur = 0;
3661
3662	txr->tx_avail--;
3663	txr->next_avail_desc = cur;
3664	txr->tx_tso = TRUE;
3665}
3666
3667
3668/**********************************************************************
3669 *
3670 *  Examine each tx_buffer in the used queue. If the hardware is done
3671 *  processing the packet then free associated resources. The
3672 *  tx_buffer is put back on the free queue.
3673 *
3674 **********************************************************************/
3675static bool
3676em_txeof(struct tx_ring *txr)
3677{
3678	struct adapter	*adapter = txr->adapter;
3679        int first, last, done, processed;
3680        struct em_buffer *tx_buffer;
3681        struct e1000_tx_desc   *tx_desc, *eop_desc;
3682	struct ifnet   *ifp = adapter->ifp;
3683
3684	EM_TX_LOCK_ASSERT(txr);
3685
3686	/* No work, make sure watchdog is off */
3687        if (txr->tx_avail == adapter->num_tx_desc) {
3688		txr->queue_status = EM_QUEUE_IDLE;
3689                return (FALSE);
3690	}
3691
3692	processed = 0;
3693        first = txr->next_to_clean;
3694        tx_desc = &txr->tx_base[first];
3695        tx_buffer = &txr->tx_buffers[first];
3696	last = tx_buffer->next_eop;
3697        eop_desc = &txr->tx_base[last];
3698
3699	/*
3700	 * What this does is get the index of the
3701	 * first descriptor AFTER the EOP of the
3702	 * first packet, that way we can do the
3703	 * simple comparison on the inner while loop.
3704	 */
3705	if (++last == adapter->num_tx_desc)
3706 		last = 0;
3707	done = last;
3708
3709        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3710            BUS_DMASYNC_POSTREAD);
3711
3712        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3713		/* We clean the range of the packet */
3714		while (first != done) {
3715                	tx_desc->upper.data = 0;
3716                	tx_desc->lower.data = 0;
3717                	tx_desc->buffer_addr = 0;
3718                	++txr->tx_avail;
3719			++processed;
3720
3721			if (tx_buffer->m_head) {
3722				bus_dmamap_sync(txr->txtag,
3723				    tx_buffer->map,
3724				    BUS_DMASYNC_POSTWRITE);
3725				bus_dmamap_unload(txr->txtag,
3726				    tx_buffer->map);
3727                        	m_freem(tx_buffer->m_head);
3728                        	tx_buffer->m_head = NULL;
3729                	}
3730			tx_buffer->next_eop = -1;
3731			txr->watchdog_time = ticks;
3732
3733	                if (++first == adapter->num_tx_desc)
3734				first = 0;
3735
3736	                tx_buffer = &txr->tx_buffers[first];
3737			tx_desc = &txr->tx_base[first];
3738		}
3739		++ifp->if_opackets;
3740		/* See if we can continue to the next packet */
3741		last = tx_buffer->next_eop;
3742		if (last != -1) {
3743        		eop_desc = &txr->tx_base[last];
3744			/* Get new done point */
3745			if (++last == adapter->num_tx_desc) last = 0;
3746			done = last;
3747		} else
3748			break;
3749        }
3750        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3751            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3752
3753        txr->next_to_clean = first;
3754
3755	/*
3756	** Watchdog calculation, we know there's
3757	** work outstanding or the first return
3758	** would have been taken, so none processed
3759	** for too long indicates a hang. local timer
3760	** will examine this and do a reset if needed.
3761	*/
3762	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3763		txr->queue_status = EM_QUEUE_HUNG;
3764
3765        /*
3766         * If we have a minimum free, clear IFF_DRV_OACTIVE
3767         * to tell the stack that it is OK to send packets.
3768         */
3769        if (txr->tx_avail > EM_MAX_SCATTER)
3770                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3771
3772	/* Disable watchdog if all clean */
3773	if (txr->tx_avail == adapter->num_tx_desc) {
3774		txr->queue_status = EM_QUEUE_IDLE;
3775		return (FALSE);
3776	}
3777
3778	return (TRUE);
3779}
3780
3781
3782/*********************************************************************
3783 *
3784 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3785 *
3786 **********************************************************************/
3787static void
3788em_refresh_mbufs(struct rx_ring *rxr, int limit)
3789{
3790	struct adapter		*adapter = rxr->adapter;
3791	struct mbuf		*m;
3792	bus_dma_segment_t	segs[1];
3793	struct em_buffer	*rxbuf;
3794	int			i, j, error, nsegs;
3795	bool			cleaned = FALSE;
3796
3797	i = j = rxr->next_to_refresh;
3798	/*
3799	** Get one descriptor beyond
3800	** our work mark to control
3801	** the loop.
3802	*/
3803	if (++j == adapter->num_rx_desc)
3804		j = 0;
3805
3806	while (j != limit) {
3807		rxbuf = &rxr->rx_buffers[i];
3808		if (rxbuf->m_head == NULL) {
3809			m = m_getjcl(M_DONTWAIT, MT_DATA,
3810			    M_PKTHDR, adapter->rx_mbuf_sz);
3811			/*
3812			** If we have a temporary resource shortage
3813			** that causes a failure, just abort refresh
3814			** for now, we will return to this point when
3815			** reinvoked from em_rxeof.
3816			*/
3817			if (m == NULL)
3818				goto update;
3819		} else
3820			m = rxbuf->m_head;
3821
3822		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3823		m->m_flags |= M_PKTHDR;
3824		m->m_data = m->m_ext.ext_buf;
3825
3826		/* Use bus_dma machinery to setup the memory mapping  */
3827		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3828		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3829		if (error != 0) {
3830			printf("Refresh mbufs: hdr dmamap load"
3831			    " failure - %d\n", error);
3832			m_free(m);
3833			rxbuf->m_head = NULL;
3834			goto update;
3835		}
3836		rxbuf->m_head = m;
3837		bus_dmamap_sync(rxr->rxtag,
3838		    rxbuf->map, BUS_DMASYNC_PREREAD);
3839		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3840		cleaned = TRUE;
3841
3842		i = j; /* Next is precalulated for us */
3843		rxr->next_to_refresh = i;
3844		/* Calculate next controlling index */
3845		if (++j == adapter->num_rx_desc)
3846			j = 0;
3847	}
3848update:
3849	/*
3850	** Update the tail pointer only if,
3851	** and as far as we have refreshed.
3852	*/
3853	if (cleaned)
3854		E1000_WRITE_REG(&adapter->hw,
3855		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3856
3857	return;
3858}
3859
3860
3861/*********************************************************************
3862 *
3863 *  Allocate memory for rx_buffer structures. Since we use one
3864 *  rx_buffer per received packet, the maximum number of rx_buffer's
3865 *  that we'll need is equal to the number of receive descriptors
3866 *  that we've allocated.
3867 *
3868 **********************************************************************/
3869static int
3870em_allocate_receive_buffers(struct rx_ring *rxr)
3871{
3872	struct adapter		*adapter = rxr->adapter;
3873	device_t		dev = adapter->dev;
3874	struct em_buffer	*rxbuf;
3875	int			error;
3876
3877	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3878	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3879	if (rxr->rx_buffers == NULL) {
3880		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3881		return (ENOMEM);
3882	}
3883
3884	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3885				1, 0,			/* alignment, bounds */
3886				BUS_SPACE_MAXADDR,	/* lowaddr */
3887				BUS_SPACE_MAXADDR,	/* highaddr */
3888				NULL, NULL,		/* filter, filterarg */
3889				MJUM9BYTES,		/* maxsize */
3890				1,			/* nsegments */
3891				MJUM9BYTES,		/* maxsegsize */
3892				0,			/* flags */
3893				NULL,			/* lockfunc */
3894				NULL,			/* lockarg */
3895				&rxr->rxtag);
3896	if (error) {
3897		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3898		    __func__, error);
3899		goto fail;
3900	}
3901
3902	rxbuf = rxr->rx_buffers;
3903	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3904		rxbuf = &rxr->rx_buffers[i];
3905		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3906		    &rxbuf->map);
3907		if (error) {
3908			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3909			    __func__, error);
3910			goto fail;
3911		}
3912	}
3913
3914	return (0);
3915
3916fail:
3917	em_free_receive_structures(adapter);
3918	return (error);
3919}
3920
3921
3922/*********************************************************************
3923 *
3924 *  Initialize a receive ring and its buffers.
3925 *
3926 **********************************************************************/
3927static int
3928em_setup_receive_ring(struct rx_ring *rxr)
3929{
3930	struct	adapter 	*adapter = rxr->adapter;
3931	struct em_buffer	*rxbuf;
3932	bus_dma_segment_t	seg[1];
3933	int			i, j, nsegs, error = 0;
3934
3935
3936	/* Clear the ring contents */
3937	EM_RX_LOCK(rxr);
3938
3939	/* Invalidate all descriptors */
3940	for (i = 0; i < adapter->num_rx_desc; i++) {
3941		struct e1000_rx_desc* cur;
3942		cur = &rxr->rx_base[i];
3943		cur->status = 0;
3944	}
3945
3946	/* Now replenish the mbufs */
3947	i = j = rxr->next_to_refresh;
3948	if (++j == adapter->num_rx_desc)
3949		j = 0;
3950
3951	while (j != rxr->next_to_check) {
3952		rxbuf = &rxr->rx_buffers[i];
3953		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3954		    M_PKTHDR, adapter->rx_mbuf_sz);
3955		if (rxbuf->m_head == NULL) {
3956			error = ENOBUFS;
3957			goto fail;
3958		}
3959		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3960		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3961		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3962
3963		/* Get the memory mapping */
3964		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3965		    rxbuf->map, rxbuf->m_head, seg,
3966		    &nsegs, BUS_DMA_NOWAIT);
3967		if (error != 0) {
3968			m_freem(rxbuf->m_head);
3969			rxbuf->m_head = NULL;
3970			goto fail;
3971		}
3972		bus_dmamap_sync(rxr->rxtag,
3973		    rxbuf->map, BUS_DMASYNC_PREREAD);
3974
3975		/* Update descriptor */
3976		rxr->rx_base[i].buffer_addr = htole64(seg[0].ds_addr);
3977		i = j;
3978		if (++j == adapter->num_rx_desc)
3979			j = 0;
3980	}
3981
3982fail:
3983	rxr->next_to_refresh = i;
3984	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3985	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3986	EM_RX_UNLOCK(rxr);
3987	return (error);
3988}
3989
3990/*********************************************************************
3991 *
3992 *  Initialize all receive rings.
3993 *
3994 **********************************************************************/
3995static int
3996em_setup_receive_structures(struct adapter *adapter)
3997{
3998	struct rx_ring *rxr = adapter->rx_rings;
3999	int q;
4000
4001	for (q = 0; q < adapter->num_queues; q++, rxr++)
4002		if (em_setup_receive_ring(rxr))
4003			goto fail;
4004
4005	return (0);
4006fail:
4007	/*
4008	 * Free RX buffers allocated so far, we will only handle
4009	 * the rings that completed, the failing case will have
4010	 * cleaned up for itself. 'q' failed, so its the terminus.
4011	 */
4012	for (int i = 0, n = 0; i < q; ++i) {
4013		rxr = &adapter->rx_rings[i];
4014		n = rxr->next_to_check;
4015		while(n != rxr->next_to_refresh) {
4016			struct em_buffer *rxbuf;
4017			rxbuf = &rxr->rx_buffers[n];
4018			if (rxbuf->m_head != NULL) {
4019				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4020			  	  BUS_DMASYNC_POSTREAD);
4021				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4022				m_freem(rxbuf->m_head);
4023				rxbuf->m_head = NULL;
4024			}
4025			if (++n == adapter->num_rx_desc)
4026				n = 0;
4027		}
4028		rxr->next_to_check = 0;
4029		rxr->next_to_refresh = 0;
4030	}
4031
4032	return (ENOBUFS);
4033}
4034
4035/*********************************************************************
4036 *
4037 *  Free all receive rings.
4038 *
4039 **********************************************************************/
4040static void
4041em_free_receive_structures(struct adapter *adapter)
4042{
4043	struct rx_ring *rxr = adapter->rx_rings;
4044
4045	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4046		em_free_receive_buffers(rxr);
4047		/* Free the ring memory as well */
4048		em_dma_free(adapter, &rxr->rxdma);
4049		EM_RX_LOCK_DESTROY(rxr);
4050	}
4051
4052	free(adapter->rx_rings, M_DEVBUF);
4053}
4054
4055
4056/*********************************************************************
4057 *
4058 *  Free receive ring data structures
4059 *
4060 **********************************************************************/
4061static void
4062em_free_receive_buffers(struct rx_ring *rxr)
4063{
4064	struct adapter		*adapter = rxr->adapter;
4065	struct em_buffer	*rxbuf = NULL;
4066
4067	INIT_DEBUGOUT("free_receive_buffers: begin");
4068
4069	if (rxr->rx_buffers != NULL) {
4070		int i = rxr->next_to_check;
4071		while(i != rxr->next_to_refresh) {
4072			rxbuf = &rxr->rx_buffers[i];
4073			if (rxbuf->map != NULL) {
4074				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4075				    BUS_DMASYNC_POSTREAD);
4076				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4077				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4078			}
4079			if (rxbuf->m_head != NULL) {
4080				m_freem(rxbuf->m_head);
4081				rxbuf->m_head = NULL;
4082			}
4083			if (++i == adapter->num_rx_desc)
4084				i = 0;
4085		}
4086		free(rxr->rx_buffers, M_DEVBUF);
4087		rxr->rx_buffers = NULL;
4088		rxr->next_to_check = 0;
4089		rxr->next_to_refresh = 0;
4090	}
4091
4092	if (rxr->rxtag != NULL) {
4093		bus_dma_tag_destroy(rxr->rxtag);
4094		rxr->rxtag = NULL;
4095	}
4096
4097	return;
4098}
4099
4100
4101/*********************************************************************
4102 *
4103 *  Enable receive unit.
4104 *
4105 **********************************************************************/
4106#define MAX_INTS_PER_SEC	8000
4107#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4108
4109static void
4110em_initialize_receive_unit(struct adapter *adapter)
4111{
4112	struct rx_ring	*rxr = adapter->rx_rings;
4113	struct ifnet	*ifp = adapter->ifp;
4114	struct e1000_hw	*hw = &adapter->hw;
4115	u64	bus_addr;
4116	u32	rctl, rxcsum;
4117
4118	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4119
4120	/*
4121	 * Make sure receives are disabled while setting
4122	 * up the descriptor ring
4123	 */
4124	rctl = E1000_READ_REG(hw, E1000_RCTL);
4125	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4126
4127	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4128	    adapter->rx_abs_int_delay.value);
4129	/*
4130	 * Set the interrupt throttling rate. Value is calculated
4131	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4132	 */
4133	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4134
4135	/*
4136	** When using MSIX interrupts we need to throttle
4137	** using the EITR register (82574 only)
4138	*/
4139	if (hw->mac.type == e1000_82574)
4140		for (int i = 0; i < 4; i++)
4141			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4142			    DEFAULT_ITR);
4143
4144	/* Disable accelerated ackknowledge */
4145	if (adapter->hw.mac.type == e1000_82574)
4146		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4147
4148	if (ifp->if_capenable & IFCAP_RXCSUM) {
4149		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4150		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4151		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4152	}
4153
4154	/*
4155	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4156	** long latencies are observed, like Lenovo X60. This
4157	** change eliminates the problem, but since having positive
4158	** values in RDTR is a known source of problems on other
4159	** platforms another solution is being sought.
4160	*/
4161	if (hw->mac.type == e1000_82573)
4162		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4163
4164	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4165		/* Setup the Base and Length of the Rx Descriptor Ring */
4166		bus_addr = rxr->rxdma.dma_paddr;
4167		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4168		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4169		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4170		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4171		/* Setup the Head and Tail Descriptor Pointers */
4172		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4173		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4174	}
4175
4176	/* Set early receive threshold on appropriate hw */
4177	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4178	    (adapter->hw.mac.type == e1000_pch2lan) ||
4179	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4180	    (ifp->if_mtu > ETHERMTU)) {
4181		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4182		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4183		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4184	}
4185
4186	if (adapter->hw.mac.type == e1000_pch2lan) {
4187		if (ifp->if_mtu > ETHERMTU)
4188			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4189		else
4190			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4191	}
4192
4193	/* Setup the Receive Control Register */
4194	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4195	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4196	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4197	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4198
4199        /* Strip the CRC */
4200        rctl |= E1000_RCTL_SECRC;
4201
4202        /* Make sure VLAN Filters are off */
4203        rctl &= ~E1000_RCTL_VFE;
4204	rctl &= ~E1000_RCTL_SBP;
4205
4206	if (adapter->rx_mbuf_sz == MCLBYTES)
4207		rctl |= E1000_RCTL_SZ_2048;
4208	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4209		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4210	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4211		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4212
4213	if (ifp->if_mtu > ETHERMTU)
4214		rctl |= E1000_RCTL_LPE;
4215	else
4216		rctl &= ~E1000_RCTL_LPE;
4217
4218	/* Write out the settings */
4219	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4220
4221	return;
4222}
4223
4224
4225/*********************************************************************
4226 *
4227 *  This routine executes in interrupt context. It replenishes
4228 *  the mbufs in the descriptor and sends data which has been
4229 *  dma'ed into host memory to upper layer.
4230 *
4231 *  We loop at most count times if count is > 0, or until done if
4232 *  count < 0.
4233 *
4234 *  For polling we also now return the number of cleaned packets
4235 *********************************************************************/
4236static bool
4237em_rxeof(struct rx_ring *rxr, int count, int *done)
4238{
4239	struct adapter		*adapter = rxr->adapter;
4240	struct ifnet		*ifp = adapter->ifp;
4241	struct mbuf		*mp, *sendmp;
4242	u8			status = 0;
4243	u16 			len;
4244	int			i, processed, rxdone = 0;
4245	bool			eop;
4246	struct e1000_rx_desc	*cur;
4247
4248	EM_RX_LOCK(rxr);
4249
4250	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4251
4252		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4253			break;
4254
4255		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4256		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4257
4258		cur = &rxr->rx_base[i];
4259		status = cur->status;
4260		mp = sendmp = NULL;
4261
4262		if ((status & E1000_RXD_STAT_DD) == 0)
4263			break;
4264
4265		len = le16toh(cur->length);
4266		eop = (status & E1000_RXD_STAT_EOP) != 0;
4267
4268		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4269		    (rxr->discard == TRUE)) {
4270			ifp->if_ierrors++;
4271			++rxr->rx_discarded;
4272			if (!eop) /* Catch subsequent segs */
4273				rxr->discard = TRUE;
4274			else
4275				rxr->discard = FALSE;
4276			em_rx_discard(rxr, i);
4277			goto next_desc;
4278		}
4279
4280		/* Assign correct length to the current fragment */
4281		mp = rxr->rx_buffers[i].m_head;
4282		mp->m_len = len;
4283
4284		/* Trigger for refresh */
4285		rxr->rx_buffers[i].m_head = NULL;
4286
4287		/* First segment? */
4288		if (rxr->fmp == NULL) {
4289			mp->m_pkthdr.len = len;
4290			rxr->fmp = rxr->lmp = mp;
4291		} else {
4292			/* Chain mbuf's together */
4293			mp->m_flags &= ~M_PKTHDR;
4294			rxr->lmp->m_next = mp;
4295			rxr->lmp = mp;
4296			rxr->fmp->m_pkthdr.len += len;
4297		}
4298
4299		if (eop) {
4300			--count;
4301			sendmp = rxr->fmp;
4302			sendmp->m_pkthdr.rcvif = ifp;
4303			ifp->if_ipackets++;
4304			em_receive_checksum(cur, sendmp);
4305#ifndef __NO_STRICT_ALIGNMENT
4306			if (adapter->max_frame_size >
4307			    (MCLBYTES - ETHER_ALIGN) &&
4308			    em_fixup_rx(rxr) != 0)
4309				goto skip;
4310#endif
4311			if (status & E1000_RXD_STAT_VP) {
4312				sendmp->m_pkthdr.ether_vtag =
4313				    (le16toh(cur->special) &
4314				    E1000_RXD_SPC_VLAN_MASK);
4315				sendmp->m_flags |= M_VLANTAG;
4316			}
4317#ifdef EM_MULTIQUEUE
4318			sendmp->m_pkthdr.flowid = rxr->msix;
4319			sendmp->m_flags |= M_FLOWID;
4320#endif
4321#ifndef __NO_STRICT_ALIGNMENT
4322skip:
4323#endif
4324			rxr->fmp = rxr->lmp = NULL;
4325		}
4326next_desc:
4327		/* Zero out the receive descriptors status. */
4328		cur->status = 0;
4329		++rxdone;	/* cumulative for POLL */
4330		++processed;
4331
4332		/* Advance our pointers to the next descriptor. */
4333		if (++i == adapter->num_rx_desc)
4334			i = 0;
4335
4336		/* Send to the stack */
4337		if (sendmp != NULL) {
4338			rxr->next_to_check = i;
4339			EM_RX_UNLOCK(rxr);
4340			(*ifp->if_input)(ifp, sendmp);
4341			EM_RX_LOCK(rxr);
4342			i = rxr->next_to_check;
4343		}
4344
4345		/* Only refresh mbufs every 8 descriptors */
4346		if (processed == 8) {
4347			em_refresh_mbufs(rxr, i);
4348			processed = 0;
4349		}
4350	}
4351
4352	/* Catch any remaining refresh work */
4353	if (e1000_rx_unrefreshed(rxr))
4354		em_refresh_mbufs(rxr, i);
4355
4356	rxr->next_to_check = i;
4357	if (done != NULL)
4358		*done = rxdone;
4359	EM_RX_UNLOCK(rxr);
4360
4361	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4362}
4363
4364static __inline void
4365em_rx_discard(struct rx_ring *rxr, int i)
4366{
4367	struct em_buffer	*rbuf;
4368
4369	rbuf = &rxr->rx_buffers[i];
4370	/* Free any previous pieces */
4371	if (rxr->fmp != NULL) {
4372		rxr->fmp->m_flags |= M_PKTHDR;
4373		m_freem(rxr->fmp);
4374		rxr->fmp = NULL;
4375		rxr->lmp = NULL;
4376	}
4377	/*
4378	** Free buffer and allow em_refresh_mbufs()
4379	** to clean up and recharge buffer.
4380	*/
4381	if (rbuf->m_head) {
4382		m_free(rbuf->m_head);
4383		rbuf->m_head = NULL;
4384	}
4385	return;
4386}
4387
4388#ifndef __NO_STRICT_ALIGNMENT
4389/*
4390 * When jumbo frames are enabled we should realign entire payload on
4391 * architecures with strict alignment. This is serious design mistake of 8254x
4392 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4393 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4394 * payload. On architecures without strict alignment restrictions 8254x still
4395 * performs unaligned memory access which would reduce the performance too.
4396 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4397 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4398 * existing mbuf chain.
4399 *
4400 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4401 * not used at all on architectures with strict alignment.
4402 */
4403static int
4404em_fixup_rx(struct rx_ring *rxr)
4405{
4406	struct adapter *adapter = rxr->adapter;
4407	struct mbuf *m, *n;
4408	int error;
4409
4410	error = 0;
4411	m = rxr->fmp;
4412	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4413		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4414		m->m_data += ETHER_HDR_LEN;
4415	} else {
4416		MGETHDR(n, M_DONTWAIT, MT_DATA);
4417		if (n != NULL) {
4418			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4419			m->m_data += ETHER_HDR_LEN;
4420			m->m_len -= ETHER_HDR_LEN;
4421			n->m_len = ETHER_HDR_LEN;
4422			M_MOVE_PKTHDR(n, m);
4423			n->m_next = m;
4424			rxr->fmp = n;
4425		} else {
4426			adapter->dropped_pkts++;
4427			m_freem(rxr->fmp);
4428			rxr->fmp = NULL;
4429			error = ENOMEM;
4430		}
4431	}
4432
4433	return (error);
4434}
4435#endif
4436
4437/*********************************************************************
4438 *
4439 *  Verify that the hardware indicated that the checksum is valid.
4440 *  Inform the stack about the status of checksum so that stack
4441 *  doesn't spend time verifying the checksum.
4442 *
4443 *********************************************************************/
4444static void
4445em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4446{
4447	/* Ignore Checksum bit is set */
4448	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4449		mp->m_pkthdr.csum_flags = 0;
4450		return;
4451	}
4452
4453	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4454		/* Did it pass? */
4455		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4456			/* IP Checksum Good */
4457			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4458			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4459
4460		} else {
4461			mp->m_pkthdr.csum_flags = 0;
4462		}
4463	}
4464
4465	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4466		/* Did it pass? */
4467		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4468			mp->m_pkthdr.csum_flags |=
4469			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4470			mp->m_pkthdr.csum_data = htons(0xffff);
4471		}
4472	}
4473}
4474
4475/*
4476 * This routine is run via an vlan
4477 * config EVENT
4478 */
4479static void
4480em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4481{
4482	struct adapter	*adapter = ifp->if_softc;
4483	u32		index, bit;
4484
4485	if (ifp->if_softc !=  arg)   /* Not our event */
4486		return;
4487
4488	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4489                return;
4490
4491	EM_CORE_LOCK(adapter);
4492	index = (vtag >> 5) & 0x7F;
4493	bit = vtag & 0x1F;
4494	adapter->shadow_vfta[index] |= (1 << bit);
4495	++adapter->num_vlans;
4496	/* Re-init to load the changes */
4497	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4498		em_init_locked(adapter);
4499	EM_CORE_UNLOCK(adapter);
4500}
4501
4502/*
4503 * This routine is run via an vlan
4504 * unconfig EVENT
4505 */
4506static void
4507em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4508{
4509	struct adapter	*adapter = ifp->if_softc;
4510	u32		index, bit;
4511
4512	if (ifp->if_softc !=  arg)
4513		return;
4514
4515	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4516                return;
4517
4518	EM_CORE_LOCK(adapter);
4519	index = (vtag >> 5) & 0x7F;
4520	bit = vtag & 0x1F;
4521	adapter->shadow_vfta[index] &= ~(1 << bit);
4522	--adapter->num_vlans;
4523	/* Re-init to load the changes */
4524	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4525		em_init_locked(adapter);
4526	EM_CORE_UNLOCK(adapter);
4527}
4528
4529static void
4530em_setup_vlan_hw_support(struct adapter *adapter)
4531{
4532	struct e1000_hw *hw = &adapter->hw;
4533	u32             reg;
4534
4535	/*
4536	** We get here thru init_locked, meaning
4537	** a soft reset, this has already cleared
4538	** the VFTA and other state, so if there
4539	** have been no vlan's registered do nothing.
4540	*/
4541	if (adapter->num_vlans == 0)
4542                return;
4543
4544	/*
4545	** A soft reset zero's out the VFTA, so
4546	** we need to repopulate it now.
4547	*/
4548	for (int i = 0; i < EM_VFTA_SIZE; i++)
4549                if (adapter->shadow_vfta[i] != 0)
4550			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4551                            i, adapter->shadow_vfta[i]);
4552
4553	reg = E1000_READ_REG(hw, E1000_CTRL);
4554	reg |= E1000_CTRL_VME;
4555	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4556
4557	/* Enable the Filter Table */
4558	reg = E1000_READ_REG(hw, E1000_RCTL);
4559	reg &= ~E1000_RCTL_CFIEN;
4560	reg |= E1000_RCTL_VFE;
4561	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4562}
4563
4564static void
4565em_enable_intr(struct adapter *adapter)
4566{
4567	struct e1000_hw *hw = &adapter->hw;
4568	u32 ims_mask = IMS_ENABLE_MASK;
4569
4570	if (hw->mac.type == e1000_82574) {
4571		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4572		ims_mask |= EM_MSIX_MASK;
4573	}
4574	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4575}
4576
4577static void
4578em_disable_intr(struct adapter *adapter)
4579{
4580	struct e1000_hw *hw = &adapter->hw;
4581
4582	if (hw->mac.type == e1000_82574)
4583		E1000_WRITE_REG(hw, EM_EIAC, 0);
4584	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4585}
4586
4587/*
4588 * Bit of a misnomer, what this really means is
4589 * to enable OS management of the system... aka
4590 * to disable special hardware management features
4591 */
4592static void
4593em_init_manageability(struct adapter *adapter)
4594{
4595	/* A shared code workaround */
4596#define E1000_82542_MANC2H E1000_MANC2H
4597	if (adapter->has_manage) {
4598		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4599		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4600
4601		/* disable hardware interception of ARP */
4602		manc &= ~(E1000_MANC_ARP_EN);
4603
4604                /* enable receiving management packets to the host */
4605		manc |= E1000_MANC_EN_MNG2HOST;
4606#define E1000_MNG2HOST_PORT_623 (1 << 5)
4607#define E1000_MNG2HOST_PORT_664 (1 << 6)
4608		manc2h |= E1000_MNG2HOST_PORT_623;
4609		manc2h |= E1000_MNG2HOST_PORT_664;
4610		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4611		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4612	}
4613}
4614
4615/*
4616 * Give control back to hardware management
4617 * controller if there is one.
4618 */
4619static void
4620em_release_manageability(struct adapter *adapter)
4621{
4622	if (adapter->has_manage) {
4623		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4624
4625		/* re-enable hardware interception of ARP */
4626		manc |= E1000_MANC_ARP_EN;
4627		manc &= ~E1000_MANC_EN_MNG2HOST;
4628
4629		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4630	}
4631}
4632
4633/*
4634 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4635 * For ASF and Pass Through versions of f/w this means
4636 * that the driver is loaded. For AMT version type f/w
4637 * this means that the network i/f is open.
4638 */
4639static void
4640em_get_hw_control(struct adapter *adapter)
4641{
4642	u32 ctrl_ext, swsm;
4643
4644	if (adapter->hw.mac.type == e1000_82573) {
4645		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4646		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4647		    swsm | E1000_SWSM_DRV_LOAD);
4648		return;
4649	}
4650	/* else */
4651	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4652	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4653	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4654	return;
4655}
4656
4657/*
4658 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4659 * For ASF and Pass Through versions of f/w this means that
4660 * the driver is no longer loaded. For AMT versions of the
4661 * f/w this means that the network i/f is closed.
4662 */
4663static void
4664em_release_hw_control(struct adapter *adapter)
4665{
4666	u32 ctrl_ext, swsm;
4667
4668	if (!adapter->has_manage)
4669		return;
4670
4671	if (adapter->hw.mac.type == e1000_82573) {
4672		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4673		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4674		    swsm & ~E1000_SWSM_DRV_LOAD);
4675		return;
4676	}
4677	/* else */
4678	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4679	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4680	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4681	return;
4682}
4683
4684static int
4685em_is_valid_ether_addr(u8 *addr)
4686{
4687	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4688
4689	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4690		return (FALSE);
4691	}
4692
4693	return (TRUE);
4694}
4695
4696/*
4697** Parse the interface capabilities with regard
4698** to both system management and wake-on-lan for
4699** later use.
4700*/
4701static void
4702em_get_wakeup(device_t dev)
4703{
4704	struct adapter	*adapter = device_get_softc(dev);
4705	u16		eeprom_data = 0, device_id, apme_mask;
4706
4707	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4708	apme_mask = EM_EEPROM_APME;
4709
4710	switch (adapter->hw.mac.type) {
4711	case e1000_82573:
4712	case e1000_82583:
4713		adapter->has_amt = TRUE;
4714		/* Falls thru */
4715	case e1000_82571:
4716	case e1000_82572:
4717	case e1000_80003es2lan:
4718		if (adapter->hw.bus.func == 1) {
4719			e1000_read_nvm(&adapter->hw,
4720			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4721			break;
4722		} else
4723			e1000_read_nvm(&adapter->hw,
4724			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4725		break;
4726	case e1000_ich8lan:
4727	case e1000_ich9lan:
4728	case e1000_ich10lan:
4729	case e1000_pchlan:
4730	case e1000_pch2lan:
4731		apme_mask = E1000_WUC_APME;
4732		adapter->has_amt = TRUE;
4733		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4734		break;
4735	default:
4736		e1000_read_nvm(&adapter->hw,
4737		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4738		break;
4739	}
4740	if (eeprom_data & apme_mask)
4741		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4742	/*
4743         * We have the eeprom settings, now apply the special cases
4744         * where the eeprom may be wrong or the board won't support
4745         * wake on lan on a particular port
4746	 */
4747	device_id = pci_get_device(dev);
4748        switch (device_id) {
4749	case E1000_DEV_ID_82571EB_FIBER:
4750		/* Wake events only supported on port A for dual fiber
4751		 * regardless of eeprom setting */
4752		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4753		    E1000_STATUS_FUNC_1)
4754			adapter->wol = 0;
4755		break;
4756	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4757	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4758	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4759                /* if quad port adapter, disable WoL on all but port A */
4760		if (global_quad_port_a != 0)
4761			adapter->wol = 0;
4762		/* Reset for multiple quad port adapters */
4763		if (++global_quad_port_a == 4)
4764			global_quad_port_a = 0;
4765                break;
4766	}
4767	return;
4768}
4769
4770
4771/*
4772 * Enable PCI Wake On Lan capability
4773 */
4774static void
4775em_enable_wakeup(device_t dev)
4776{
4777	struct adapter	*adapter = device_get_softc(dev);
4778	struct ifnet	*ifp = adapter->ifp;
4779	u32		pmc, ctrl, ctrl_ext, rctl;
4780	u16     	status;
4781
4782	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4783		return;
4784
4785	/* Advertise the wakeup capability */
4786	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4787	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4788	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4789	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4790
4791	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4792	    (adapter->hw.mac.type == e1000_pchlan) ||
4793	    (adapter->hw.mac.type == e1000_ich9lan) ||
4794	    (adapter->hw.mac.type == e1000_ich10lan))
4795		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4796
4797	/* Keep the laser running on Fiber adapters */
4798	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4799	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4800		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4801		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4802		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4803	}
4804
4805	/*
4806	** Determine type of Wakeup: note that wol
4807	** is set with all bits on by default.
4808	*/
4809	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4810		adapter->wol &= ~E1000_WUFC_MAG;
4811
4812	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4813		adapter->wol &= ~E1000_WUFC_MC;
4814	else {
4815		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4816		rctl |= E1000_RCTL_MPE;
4817		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4818	}
4819
4820	if ((adapter->hw.mac.type == e1000_pchlan) ||
4821	    (adapter->hw.mac.type == e1000_pch2lan)) {
4822		if (em_enable_phy_wakeup(adapter))
4823			return;
4824	} else {
4825		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4826		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4827	}
4828
4829	if (adapter->hw.phy.type == e1000_phy_igp_3)
4830		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4831
4832        /* Request PME */
4833        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4834	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4835	if (ifp->if_capenable & IFCAP_WOL)
4836		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4837        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4838
4839	return;
4840}
4841
4842/*
4843** WOL in the newer chipset interfaces (pchlan)
4844** require thing to be copied into the phy
4845*/
4846static int
4847em_enable_phy_wakeup(struct adapter *adapter)
4848{
4849	struct e1000_hw *hw = &adapter->hw;
4850	u32 mreg, ret = 0;
4851	u16 preg;
4852
4853	/* copy MAC RARs to PHY RARs */
4854	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4855
4856	/* copy MAC MTA to PHY MTA */
4857	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4858		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4859		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4860		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4861		    (u16)((mreg >> 16) & 0xFFFF));
4862	}
4863
4864	/* configure PHY Rx Control register */
4865	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4866	mreg = E1000_READ_REG(hw, E1000_RCTL);
4867	if (mreg & E1000_RCTL_UPE)
4868		preg |= BM_RCTL_UPE;
4869	if (mreg & E1000_RCTL_MPE)
4870		preg |= BM_RCTL_MPE;
4871	preg &= ~(BM_RCTL_MO_MASK);
4872	if (mreg & E1000_RCTL_MO_3)
4873		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4874				<< BM_RCTL_MO_SHIFT);
4875	if (mreg & E1000_RCTL_BAM)
4876		preg |= BM_RCTL_BAM;
4877	if (mreg & E1000_RCTL_PMCF)
4878		preg |= BM_RCTL_PMCF;
4879	mreg = E1000_READ_REG(hw, E1000_CTRL);
4880	if (mreg & E1000_CTRL_RFCE)
4881		preg |= BM_RCTL_RFCE;
4882	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4883
4884	/* enable PHY wakeup in MAC register */
4885	E1000_WRITE_REG(hw, E1000_WUC,
4886	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4887	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4888
4889	/* configure and enable PHY wakeup in PHY registers */
4890	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4891	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4892
4893	/* activate PHY wakeup */
4894	ret = hw->phy.ops.acquire(hw);
4895	if (ret) {
4896		printf("Could not acquire PHY\n");
4897		return ret;
4898	}
4899	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4900	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4901	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4902	if (ret) {
4903		printf("Could not read PHY page 769\n");
4904		goto out;
4905	}
4906	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4907	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4908	if (ret)
4909		printf("Could not set PHY Host Wakeup bit\n");
4910out:
4911	hw->phy.ops.release(hw);
4912
4913	return ret;
4914}
4915
4916static void
4917em_led_func(void *arg, int onoff)
4918{
4919	struct adapter	*adapter = arg;
4920
4921	EM_CORE_LOCK(adapter);
4922	if (onoff) {
4923		e1000_setup_led(&adapter->hw);
4924		e1000_led_on(&adapter->hw);
4925	} else {
4926		e1000_led_off(&adapter->hw);
4927		e1000_cleanup_led(&adapter->hw);
4928	}
4929	EM_CORE_UNLOCK(adapter);
4930}
4931
4932/*
4933** Disable the L0S and L1 LINK states
4934*/
4935static void
4936em_disable_aspm(struct adapter *adapter)
4937{
4938	int		base, reg;
4939	u16		link_cap,link_ctrl;
4940	device_t	dev = adapter->dev;
4941
4942	switch (adapter->hw.mac.type) {
4943		case e1000_82573:
4944		case e1000_82574:
4945		case e1000_82583:
4946			break;
4947		default:
4948			return;
4949	}
4950	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
4951		return;
4952	reg = base + PCIR_EXPRESS_LINK_CAP;
4953	link_cap = pci_read_config(dev, reg, 2);
4954	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4955		return;
4956	reg = base + PCIR_EXPRESS_LINK_CTL;
4957	link_ctrl = pci_read_config(dev, reg, 2);
4958	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4959	pci_write_config(dev, reg, link_ctrl, 2);
4960	return;
4961}
4962
4963/**********************************************************************
4964 *
4965 *  Update the board statistics counters.
4966 *
4967 **********************************************************************/
4968static void
4969em_update_stats_counters(struct adapter *adapter)
4970{
4971	struct ifnet   *ifp;
4972
4973	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4974	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4975		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4976		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4977	}
4978	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4979	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4980	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4981	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4982
4983	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4984	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4985	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4986	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4987	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4988	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4989	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4990	/*
4991	** For watchdog management we need to know if we have been
4992	** paused during the last interval, so capture that here.
4993	*/
4994	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4995	adapter->stats.xoffrxc += adapter->pause_frames;
4996	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4997	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4998	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4999	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5000	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5001	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5002	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5003	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5004	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5005	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5006	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5007	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5008
5009	/* For the 64-bit byte counters the low dword must be read first. */
5010	/* Both registers clear on the read of the high dword */
5011
5012	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5013	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5014	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5015	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5016
5017	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5018	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5019	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5020	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5021	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5022
5023	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5024	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5025
5026	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5027	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5028	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5029	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5030	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5031	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5032	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5033	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5034	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5035	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5036
5037	/* Interrupt Counts */
5038
5039	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5040	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5041	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5042	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5043	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5044	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5045	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5046	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5047	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5048
5049	if (adapter->hw.mac.type >= e1000_82543) {
5050		adapter->stats.algnerrc +=
5051		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5052		adapter->stats.rxerrc +=
5053		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5054		adapter->stats.tncrs +=
5055		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5056		adapter->stats.cexterr +=
5057		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5058		adapter->stats.tsctc +=
5059		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5060		adapter->stats.tsctfc +=
5061		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5062	}
5063	ifp = adapter->ifp;
5064
5065	ifp->if_collisions = adapter->stats.colc;
5066
5067	/* Rx Errors */
5068	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5069	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5070	    adapter->stats.ruc + adapter->stats.roc +
5071	    adapter->stats.mpc + adapter->stats.cexterr;
5072
5073	/* Tx Errors */
5074	ifp->if_oerrors = adapter->stats.ecol +
5075	    adapter->stats.latecol + adapter->watchdog_events;
5076}
5077
5078/* Export a single 32-bit register via a read-only sysctl. */
5079static int
5080em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5081{
5082	struct adapter *adapter;
5083	u_int val;
5084
5085	adapter = oidp->oid_arg1;
5086	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5087	return (sysctl_handle_int(oidp, &val, 0, req));
5088}
5089
5090/*
5091 * Add sysctl variables, one per statistic, to the system.
5092 */
5093static void
5094em_add_hw_stats(struct adapter *adapter)
5095{
5096	device_t dev = adapter->dev;
5097
5098	struct tx_ring *txr = adapter->tx_rings;
5099	struct rx_ring *rxr = adapter->rx_rings;
5100
5101	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5102	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5103	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5104	struct e1000_hw_stats *stats = &adapter->stats;
5105
5106	struct sysctl_oid *stat_node, *queue_node, *int_node;
5107	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5108
5109#define QUEUE_NAME_LEN 32
5110	char namebuf[QUEUE_NAME_LEN];
5111
5112	/* Driver Statistics */
5113	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5114			CTLFLAG_RD, &adapter->link_irq,
5115			"Link MSIX IRQ Handled");
5116	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5117			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5118			 "Std mbuf failed");
5119	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5120			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5121			 "Std mbuf cluster failed");
5122	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5123			CTLFLAG_RD, &adapter->dropped_pkts,
5124			"Driver dropped packets");
5125	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5126			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5127			"Driver tx dma failure in xmit");
5128	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5129			CTLFLAG_RD, &adapter->rx_overruns,
5130			"RX overruns");
5131	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5132			CTLFLAG_RD, &adapter->watchdog_events,
5133			"Watchdog timeouts");
5134
5135	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5136			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5137			em_sysctl_reg_handler, "IU",
5138			"Device Control Register");
5139	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5140			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5141			em_sysctl_reg_handler, "IU",
5142			"Receiver Control Register");
5143	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5144			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5145			"Flow Control High Watermark");
5146	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5147			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5148			"Flow Control Low Watermark");
5149
5150	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5151		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5152		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5153					    CTLFLAG_RD, NULL, "Queue Name");
5154		queue_list = SYSCTL_CHILDREN(queue_node);
5155
5156		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5157				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5158				E1000_TDH(txr->me),
5159				em_sysctl_reg_handler, "IU",
5160 				"Transmit Descriptor Head");
5161		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5162				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5163				E1000_TDT(txr->me),
5164				em_sysctl_reg_handler, "IU",
5165 				"Transmit Descriptor Tail");
5166		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5167				CTLFLAG_RD, &txr->tx_irq,
5168				"Queue MSI-X Transmit Interrupts");
5169		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5170				CTLFLAG_RD, &txr->no_desc_avail,
5171				"Queue No Descriptor Available");
5172
5173		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5174				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5175				E1000_RDH(rxr->me),
5176				em_sysctl_reg_handler, "IU",
5177				"Receive Descriptor Head");
5178		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5179				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5180				E1000_RDT(rxr->me),
5181				em_sysctl_reg_handler, "IU",
5182				"Receive Descriptor Tail");
5183		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5184				CTLFLAG_RD, &rxr->rx_irq,
5185				"Queue MSI-X Receive Interrupts");
5186	}
5187
5188	/* MAC stats get their own sub node */
5189
5190	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5191				    CTLFLAG_RD, NULL, "Statistics");
5192	stat_list = SYSCTL_CHILDREN(stat_node);
5193
5194	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5195			CTLFLAG_RD, &stats->ecol,
5196			"Excessive collisions");
5197	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5198			CTLFLAG_RD, &stats->scc,
5199			"Single collisions");
5200	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5201			CTLFLAG_RD, &stats->mcc,
5202			"Multiple collisions");
5203	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5204			CTLFLAG_RD, &stats->latecol,
5205			"Late collisions");
5206	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5207			CTLFLAG_RD, &stats->colc,
5208			"Collision Count");
5209	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5210			CTLFLAG_RD, &adapter->stats.symerrs,
5211			"Symbol Errors");
5212	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5213			CTLFLAG_RD, &adapter->stats.sec,
5214			"Sequence Errors");
5215	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5216			CTLFLAG_RD, &adapter->stats.dc,
5217			"Defer Count");
5218	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5219			CTLFLAG_RD, &adapter->stats.mpc,
5220			"Missed Packets");
5221	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5222			CTLFLAG_RD, &adapter->stats.rnbc,
5223			"Receive No Buffers");
5224	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5225			CTLFLAG_RD, &adapter->stats.ruc,
5226			"Receive Undersize");
5227	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5228			CTLFLAG_RD, &adapter->stats.rfc,
5229			"Fragmented Packets Received ");
5230	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5231			CTLFLAG_RD, &adapter->stats.roc,
5232			"Oversized Packets Received");
5233	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5234			CTLFLAG_RD, &adapter->stats.rjc,
5235			"Recevied Jabber");
5236	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5237			CTLFLAG_RD, &adapter->stats.rxerrc,
5238			"Receive Errors");
5239	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5240			CTLFLAG_RD, &adapter->stats.crcerrs,
5241			"CRC errors");
5242	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5243			CTLFLAG_RD, &adapter->stats.algnerrc,
5244			"Alignment Errors");
5245	/* On 82575 these are collision counts */
5246	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5247			CTLFLAG_RD, &adapter->stats.cexterr,
5248			"Collision/Carrier extension errors");
5249	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5250			CTLFLAG_RD, &adapter->stats.xonrxc,
5251			"XON Received");
5252	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5253			CTLFLAG_RD, &adapter->stats.xontxc,
5254			"XON Transmitted");
5255	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5256			CTLFLAG_RD, &adapter->stats.xoffrxc,
5257			"XOFF Received");
5258	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5259			CTLFLAG_RD, &adapter->stats.xofftxc,
5260			"XOFF Transmitted");
5261
5262	/* Packet Reception Stats */
5263	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5264			CTLFLAG_RD, &adapter->stats.tpr,
5265			"Total Packets Received ");
5266	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5267			CTLFLAG_RD, &adapter->stats.gprc,
5268			"Good Packets Received");
5269	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5270			CTLFLAG_RD, &adapter->stats.bprc,
5271			"Broadcast Packets Received");
5272	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5273			CTLFLAG_RD, &adapter->stats.mprc,
5274			"Multicast Packets Received");
5275	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5276			CTLFLAG_RD, &adapter->stats.prc64,
5277			"64 byte frames received ");
5278	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5279			CTLFLAG_RD, &adapter->stats.prc127,
5280			"65-127 byte frames received");
5281	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5282			CTLFLAG_RD, &adapter->stats.prc255,
5283			"128-255 byte frames received");
5284	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5285			CTLFLAG_RD, &adapter->stats.prc511,
5286			"256-511 byte frames received");
5287	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5288			CTLFLAG_RD, &adapter->stats.prc1023,
5289			"512-1023 byte frames received");
5290	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5291			CTLFLAG_RD, &adapter->stats.prc1522,
5292			"1023-1522 byte frames received");
5293 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5294 			CTLFLAG_RD, &adapter->stats.gorc,
5295 			"Good Octets Received");
5296
5297	/* Packet Transmission Stats */
5298 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5299 			CTLFLAG_RD, &adapter->stats.gotc,
5300 			"Good Octets Transmitted");
5301	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5302			CTLFLAG_RD, &adapter->stats.tpt,
5303			"Total Packets Transmitted");
5304	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5305			CTLFLAG_RD, &adapter->stats.gptc,
5306			"Good Packets Transmitted");
5307	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5308			CTLFLAG_RD, &adapter->stats.bptc,
5309			"Broadcast Packets Transmitted");
5310	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5311			CTLFLAG_RD, &adapter->stats.mptc,
5312			"Multicast Packets Transmitted");
5313	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5314			CTLFLAG_RD, &adapter->stats.ptc64,
5315			"64 byte frames transmitted ");
5316	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5317			CTLFLAG_RD, &adapter->stats.ptc127,
5318			"65-127 byte frames transmitted");
5319	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5320			CTLFLAG_RD, &adapter->stats.ptc255,
5321			"128-255 byte frames transmitted");
5322	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5323			CTLFLAG_RD, &adapter->stats.ptc511,
5324			"256-511 byte frames transmitted");
5325	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5326			CTLFLAG_RD, &adapter->stats.ptc1023,
5327			"512-1023 byte frames transmitted");
5328	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5329			CTLFLAG_RD, &adapter->stats.ptc1522,
5330			"1024-1522 byte frames transmitted");
5331	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5332			CTLFLAG_RD, &adapter->stats.tsctc,
5333			"TSO Contexts Transmitted");
5334	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5335			CTLFLAG_RD, &adapter->stats.tsctfc,
5336			"TSO Contexts Failed");
5337
5338
5339	/* Interrupt Stats */
5340
5341	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5342				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5343	int_list = SYSCTL_CHILDREN(int_node);
5344
5345	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5346			CTLFLAG_RD, &adapter->stats.iac,
5347			"Interrupt Assertion Count");
5348
5349	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5350			CTLFLAG_RD, &adapter->stats.icrxptc,
5351			"Interrupt Cause Rx Pkt Timer Expire Count");
5352
5353	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5354			CTLFLAG_RD, &adapter->stats.icrxatc,
5355			"Interrupt Cause Rx Abs Timer Expire Count");
5356
5357	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5358			CTLFLAG_RD, &adapter->stats.ictxptc,
5359			"Interrupt Cause Tx Pkt Timer Expire Count");
5360
5361	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5362			CTLFLAG_RD, &adapter->stats.ictxatc,
5363			"Interrupt Cause Tx Abs Timer Expire Count");
5364
5365	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5366			CTLFLAG_RD, &adapter->stats.ictxqec,
5367			"Interrupt Cause Tx Queue Empty Count");
5368
5369	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5370			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5371			"Interrupt Cause Tx Queue Min Thresh Count");
5372
5373	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5374			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5375			"Interrupt Cause Rx Desc Min Thresh Count");
5376
5377	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5378			CTLFLAG_RD, &adapter->stats.icrxoc,
5379			"Interrupt Cause Receiver Overrun Count");
5380}
5381
5382/**********************************************************************
5383 *
5384 *  This routine provides a way to dump out the adapter eeprom,
5385 *  often a useful debug/service tool. This only dumps the first
5386 *  32 words, stuff that matters is in that extent.
5387 *
5388 **********************************************************************/
5389static int
5390em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5391{
5392	struct adapter *adapter;
5393	int error;
5394	int result;
5395
5396	result = -1;
5397	error = sysctl_handle_int(oidp, &result, 0, req);
5398
5399	if (error || !req->newptr)
5400		return (error);
5401
5402	/*
5403	 * This value will cause a hex dump of the
5404	 * first 32 16-bit words of the EEPROM to
5405	 * the screen.
5406	 */
5407	if (result == 1) {
5408		adapter = (struct adapter *)arg1;
5409		em_print_nvm_info(adapter);
5410        }
5411
5412	return (error);
5413}
5414
5415static void
5416em_print_nvm_info(struct adapter *adapter)
5417{
5418	u16	eeprom_data;
5419	int	i, j, row = 0;
5420
5421	/* Its a bit crude, but it gets the job done */
5422	printf("\nInterface EEPROM Dump:\n");
5423	printf("Offset\n0x0000  ");
5424	for (i = 0, j = 0; i < 32; i++, j++) {
5425		if (j == 8) { /* Make the offset block */
5426			j = 0; ++row;
5427			printf("\n0x00%x0  ",row);
5428		}
5429		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5430		printf("%04x ", eeprom_data);
5431	}
5432	printf("\n");
5433}
5434
5435static int
5436em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5437{
5438	struct em_int_delay_info *info;
5439	struct adapter *adapter;
5440	u32 regval;
5441	int error, usecs, ticks;
5442
5443	info = (struct em_int_delay_info *)arg1;
5444	usecs = info->value;
5445	error = sysctl_handle_int(oidp, &usecs, 0, req);
5446	if (error != 0 || req->newptr == NULL)
5447		return (error);
5448	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5449		return (EINVAL);
5450	info->value = usecs;
5451	ticks = EM_USECS_TO_TICKS(usecs);
5452
5453	adapter = info->adapter;
5454
5455	EM_CORE_LOCK(adapter);
5456	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5457	regval = (regval & ~0xffff) | (ticks & 0xffff);
5458	/* Handle a few special cases. */
5459	switch (info->offset) {
5460	case E1000_RDTR:
5461		break;
5462	case E1000_TIDV:
5463		if (ticks == 0) {
5464			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5465			/* Don't write 0 into the TIDV register. */
5466			regval++;
5467		} else
5468			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5469		break;
5470	}
5471	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5472	EM_CORE_UNLOCK(adapter);
5473	return (0);
5474}
5475
5476static void
5477em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5478	const char *description, struct em_int_delay_info *info,
5479	int offset, int value)
5480{
5481	info->adapter = adapter;
5482	info->offset = offset;
5483	info->value = value;
5484	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5485	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5486	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5487	    info, 0, em_sysctl_int_delay, "I", description);
5488}
5489
5490static void
5491em_set_sysctl_value(struct adapter *adapter, const char *name,
5492	const char *description, int *limit, int value)
5493{
5494	*limit = value;
5495	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5496	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5497	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5498}
5499
5500static int
5501em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5502{
5503	struct adapter *adapter;
5504	int error;
5505	int result;
5506
5507	result = -1;
5508	error = sysctl_handle_int(oidp, &result, 0, req);
5509
5510	if (error || !req->newptr)
5511		return (error);
5512
5513	if (result == 1) {
5514		adapter = (struct adapter *)arg1;
5515		em_print_debug_info(adapter);
5516        }
5517
5518	return (error);
5519}
5520
5521/*
5522** This routine is meant to be fluid, add whatever is
5523** needed for debugging a problem.  -jfv
5524*/
5525static void
5526em_print_debug_info(struct adapter *adapter)
5527{
5528	device_t dev = adapter->dev;
5529	struct tx_ring *txr = adapter->tx_rings;
5530	struct rx_ring *rxr = adapter->rx_rings;
5531
5532	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5533		printf("Interface is RUNNING ");
5534	else
5535		printf("Interface is NOT RUNNING\n");
5536	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5537		printf("and ACTIVE\n");
5538	else
5539		printf("and INACTIVE\n");
5540
5541	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5542	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5543	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5544	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5545	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5546	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5547	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5548	device_printf(dev, "TX descriptors avail = %d\n",
5549	    txr->tx_avail);
5550	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5551	    txr->no_desc_avail);
5552	device_printf(dev, "RX discarded packets = %ld\n",
5553	    rxr->rx_discarded);
5554	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5555	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5556}
5557