if_em.c revision 206429
1138593Ssam/******************************************************************************
2138593Ssam
3138593Ssam  Copyright (c) 2001-2010, Intel Corporation
4138593Ssam  All rights reserved.
5138593Ssam
6138593Ssam  Redistribution and use in source and binary forms, with or without
7138593Ssam  modification, are permitted provided that the following conditions are met:
8138593Ssam
9138593Ssam   1. Redistributions of source code must retain the above copyright notice,
10138593Ssam      this list of conditions and the following disclaimer.
11138593Ssam
12138593Ssam   2. Redistributions in binary form must reproduce the above copyright
13138593Ssam      notice, this list of conditions and the following disclaimer in the
14138593Ssam      documentation and/or other materials provided with the distribution.
15138593Ssam
16138593Ssam   3. Neither the name of the Intel Corporation nor the names of its
17138593Ssam      contributors may be used to endorse or promote products derived from
18138593Ssam      this software without specific prior written permission.
19138593Ssam
20138593Ssam  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21138593Ssam  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22138593Ssam  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23138593Ssam  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24138593Ssam  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25138593Ssam  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26138593Ssam  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27138593Ssam  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28138593Ssam  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29138593Ssam  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30138593Ssam  POSSIBILITY OF SUCH DAMAGE.
31138593Ssam
32138593Ssam******************************************************************************/
33138593Ssam/*$FreeBSD: head/sys/dev/e1000/if_em.c 206429 2010-04-09 18:42:15Z jfv $*/
34138593Ssam
35138593Ssam#ifdef HAVE_KERNEL_OPTION_HEADERS
36138593Ssam#include "opt_device_polling.h"
37138593Ssam#include "opt_inet.h"
38138593Ssam#endif
39138593Ssam
40202289Semaste#include <sys/param.h>
41138593Ssam#include <sys/systm.h>
42138593Ssam#if __FreeBSD_version >= 800000
43138593Ssam#include <sys/buf_ring.h>
44138593Ssam#endif
45138593Ssam#include <sys/bus.h>
46166956Ssam#include <sys/endian.h>
47138593Ssam#include <sys/kernel.h>
48138593Ssam#include <sys/kthread.h>
49138593Ssam#include <sys/malloc.h>
50138593Ssam#include <sys/mbuf.h>
51138593Ssam#include <sys/module.h>
52138593Ssam#include <sys/rman.h>
53138593Ssam#include <sys/socket.h>
54138593Ssam#include <sys/sockio.h>
55138593Ssam#include <sys/sysctl.h>
56191121Sbrooks#include <sys/taskqueue.h>
57138593Ssam#include <sys/eventhandler.h>
58138593Ssam#include <machine/bus.h>
59138593Ssam#include <machine/resource.h>
60166956Ssam
61138593Ssam#include <net/bpf.h>
62138593Ssam#include <net/ethernet.h>
63138593Ssam#include <net/if.h>
64138593Ssam#include <net/if_arp.h>
65138593Ssam#include <net/if_dl.h>
66166956Ssam#include <net/if_media.h>
67138593Ssam
68138593Ssam#include <net/if_types.h>
69138593Ssam#include <net/if_vlan_var.h>
70138593Ssam
71138593Ssam#include <netinet/in_systm.h>
72166956Ssam#include <netinet/in.h>
73166956Ssam#include <netinet/if_ether.h>
74166956Ssam#include <netinet/ip.h>
75138593Ssam#include <netinet/ip6.h>
76138593Ssam#include <netinet/tcp.h>
77138593Ssam#include <netinet/udp.h>
78138593Ssam
79166956Ssam#include <machine/in_cksum.h>
80166956Ssam#include <dev/led/led.h>
81138593Ssam#include <dev/pci/pcivar.h>
82138593Ssam#include <dev/pci/pcireg.h>
83138593Ssam
84166956Ssam#include "e1000_api.h"
85166956Ssam#include "e1000_82571.h"
86166956Ssam#include "if_em.h"
87138593Ssam
88138593Ssam/*********************************************************************
89138593Ssam *  Set this to one to display debug statistics
90138593Ssam *********************************************************************/
91138593Ssamint	em_display_debug_stats = 0;
92138593Ssam
93138593Ssam/*********************************************************************
94138593Ssam *  Driver version:
95138593Ssam *********************************************************************/
96138593Ssamchar em_driver_version[] = "7.0.3";
97138593Ssam
98138593Ssam
99138593Ssam/*********************************************************************
100138593Ssam *  PCI Device ID Table
101138593Ssam *
102138593Ssam *  Used by probe to select devices to load on
103138593Ssam *  Last field stores an index into e1000_strings
104138593Ssam *  Last entry must be all 0s
105138593Ssam *
106138593Ssam *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107138593Ssam *********************************************************************/
108138593Ssam
109138593Ssamstatic em_vendor_info_t em_vendor_info_array[] =
110138593Ssam{
111138593Ssam	/* Intel(R) PRO/1000 Network Connection */
112138593Ssam	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113138593Ssam	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114202289Semaste	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115138593Ssam	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
117138593Ssam	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
119202289Semaste	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120202289Semaste						PCI_ANY_ID, PCI_ANY_ID, 0},
121202289Semaste	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122202289Semaste						PCI_ANY_ID, PCI_ANY_ID, 0},
123202289Semaste	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
125202289Semaste	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
127138593Ssam	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128138593Ssam	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129138593Ssam	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130138593Ssam	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131138593Ssam
132138593Ssam	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133138593Ssam	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134138593Ssam	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135138593Ssam	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136138593Ssam	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137194799Sdelphij						PCI_ANY_ID, PCI_ANY_ID, 0},
138138593Ssam	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
140138593Ssam	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
142138593Ssam	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143138593Ssam						PCI_ANY_ID, PCI_ANY_ID, 0},
144138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151138593Ssam	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158147437Sume	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159147437Sume	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160138593Ssam	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161138593Ssam	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162138593Ssam	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163138593Ssam	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164138593Ssam	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165147437Sume	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166147437Sume	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167138593Ssam	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168138593Ssam	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169138593Ssam	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170138593Ssam	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171138593Ssam	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172138593Ssam	/* required last entry */
173138593Ssam	{ 0, 0, 0, 0, 0}
174138593Ssam};
175138593Ssam
176191121Sbrooks/*********************************************************************
177138593Ssam *  Table of branding strings for all supported NICs.
178138593Ssam *********************************************************************/
179138593Ssam
180138593Ssamstatic char *em_strings[] = {
181138593Ssam	"Intel(R) PRO/1000 Network Connection"
182138593Ssam};
183138593Ssam
184138593Ssam/*********************************************************************
185138593Ssam *  Function prototypes
186138593Ssam *********************************************************************/
187138593Ssamstatic int	em_probe(device_t);
188138593Ssamstatic int	em_attach(device_t);
189138593Ssamstatic int	em_detach(device_t);
190138593Ssamstatic int	em_shutdown(device_t);
191138593Ssamstatic int	em_suspend(device_t);
192138593Ssamstatic int	em_resume(device_t);
193138593Ssamstatic void	em_start(struct ifnet *);
194138593Ssamstatic void	em_start_locked(struct ifnet *, struct tx_ring *);
195138593Ssam#ifdef EM_MULTIQUEUE
196138593Ssamstatic int	em_mq_start(struct ifnet *, struct mbuf *);
197138593Ssamstatic int	em_mq_start_locked(struct ifnet *,
198138593Ssam		    struct tx_ring *, struct mbuf *);
199138593Ssamstatic void	em_qflush(struct ifnet *);
200138593Ssam#endif
201138593Ssamstatic int	em_ioctl(struct ifnet *, u_long, caddr_t);
202138593Ssamstatic void	em_init(void *);
203224179Sbzstatic void	em_init_locked(struct adapter *);
204224179Sbzstatic void	em_stop(void *);
205222527Sbzstatic void	em_media_status(struct ifnet *, struct ifmediareq *);
206222527Sbzstatic int	em_media_change(struct ifnet *);
207224179Sbzstatic void	em_identify_hardware(struct adapter *);
208138593Ssamstatic int	em_allocate_pci_resources(struct adapter *);
209138593Ssamstatic int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static int	em_rxeof(struct rx_ring *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350static int em_enable_msix = TRUE;
351static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
352TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
354
355/* How many packets rxeof tries to clean at a time */
356static int em_rx_process_limit = 100;
357TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358
359/* Flow control setting - default to FULL */
360static int em_fc_setting = e1000_fc_full;
361TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
362
363/*
364** Shadow VFTA table, this is needed because
365** the real vlan filter table gets cleared during
366** a soft reset and the driver needs to be able
367** to repopulate it.
368*/
369static u32 em_shadow_vfta[EM_VFTA_SIZE];
370
371/* Global used in WOL setup with multiport cards */
372static int global_quad_port_a = 0;
373
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  em_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384em_probe(device_t dev)
385{
386	char		adapter_name[60];
387	u16		pci_vendor_id = 0;
388	u16		pci_device_id = 0;
389	u16		pci_subvendor_id = 0;
390	u16		pci_subdevice_id = 0;
391	em_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("em_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != EM_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = em_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				em_strings[ent->index],
415				em_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436em_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
446
447	/* SYSCTL stuff */
448	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
449	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
450	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
451	    em_sysctl_debug_info, "I", "Debug Information");
452
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_stats, "I", "Statistics");
457
458	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460	/* Determine hardware and mac info */
461	em_identify_hardware(adapter);
462
463	/* Setup PCI resources */
464	if (em_allocate_pci_resources(adapter)) {
465		device_printf(dev, "Allocation of PCI resources failed\n");
466		error = ENXIO;
467		goto err_pci;
468	}
469
470	/*
471	** For ICH8 and family we need to
472	** map the flash memory, and this
473	** must happen after the MAC is
474	** identified
475	*/
476	if ((adapter->hw.mac.type == e1000_ich8lan) ||
477	    (adapter->hw.mac.type == e1000_pchlan) ||
478	    (adapter->hw.mac.type == e1000_ich9lan) ||
479	    (adapter->hw.mac.type == e1000_ich10lan)) {
480		int rid = EM_BAR_TYPE_FLASH;
481		adapter->flash = bus_alloc_resource_any(dev,
482		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
483		if (adapter->flash == NULL) {
484			device_printf(dev, "Mapping of Flash failed\n");
485			error = ENXIO;
486			goto err_pci;
487		}
488		/* This is used in the shared code */
489		adapter->hw.flash_address = (u8 *)adapter->flash;
490		adapter->osdep.flash_bus_space_tag =
491		    rman_get_bustag(adapter->flash);
492		adapter->osdep.flash_bus_space_handle =
493		    rman_get_bushandle(adapter->flash);
494	}
495
496	/* Do Shared Code initialization */
497	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498		device_printf(dev, "Setup of Shared code failed\n");
499		error = ENXIO;
500		goto err_pci;
501	}
502
503	e1000_get_bus_info(&adapter->hw);
504
505	/* Set up some sysctls for the tunable interrupt delays */
506	em_add_int_delay_sysctl(adapter, "rx_int_delay",
507	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "tx_int_delay",
510	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513	    "receive interrupt delay limit in usecs",
514	    &adapter->rx_abs_int_delay,
515	    E1000_REGISTER(&adapter->hw, E1000_RADV),
516	    em_rx_abs_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518	    "transmit interrupt delay limit in usecs",
519	    &adapter->tx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_TADV),
521	    em_tx_abs_int_delay_dflt);
522
523	/* Sysctls for limiting the amount of work done in the taskqueue */
524	em_add_rx_process_limit(adapter, "rx_processing_limit",
525	    "max number of rx packets to process", &adapter->rx_process_limit,
526	    em_rx_process_limit);
527
528	/*
529	 * Validate number of transmit and receive descriptors. It
530	 * must not exceed hardware maximum, and must be multiple
531	 * of E1000_DBA_ALIGN.
532	 */
533	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536		    EM_DEFAULT_TXD, em_txd);
537		adapter->num_tx_desc = EM_DEFAULT_TXD;
538	} else
539		adapter->num_tx_desc = em_txd;
540
541	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544		    EM_DEFAULT_RXD, em_rxd);
545		adapter->num_rx_desc = EM_DEFAULT_RXD;
546	} else
547		adapter->num_rx_desc = em_rxd;
548
549	adapter->hw.mac.autoneg = DO_AUTO_NEG;
550	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553	/* Copper options */
554	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555		adapter->hw.phy.mdix = AUTO_ALL_MODES;
556		adapter->hw.phy.disable_polarity_correction = FALSE;
557		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558	}
559
560	/*
561	 * Set the frame limits assuming
562	 * standard ethernet sized frames.
563	 */
564	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567	/*
568	 * This controls when hardware reports transmit completion
569	 * status.
570	 */
571	adapter->hw.mac.report_tx_early = 1;
572
573	/*
574	** Get queue/ring memory
575	*/
576	if (em_allocate_queues(adapter)) {
577		error = ENOMEM;
578		goto err_pci;
579	}
580
581	/*
582	** Start from a known state, this is
583	** important in reading the nvm and
584	** mac from that.
585	*/
586	e1000_reset_hw(&adapter->hw);
587
588	/* Make sure we have a good EEPROM before we read from it */
589	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590		/*
591		** Some PCI-E parts fail the first check due to
592		** the link being in sleep state, call it again,
593		** if it fails a second time its a real issue.
594		*/
595		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596			device_printf(dev,
597			    "The EEPROM Checksum Is Not Valid\n");
598			error = EIO;
599			goto err_late;
600		}
601	}
602
603	/* Copy the permanent MAC address out of the EEPROM */
604	if (e1000_read_mac_addr(&adapter->hw) < 0) {
605		device_printf(dev, "EEPROM read error while reading MAC"
606		    " address\n");
607		error = EIO;
608		goto err_late;
609	}
610
611	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
612		device_printf(dev, "Invalid MAC address\n");
613		error = EIO;
614		goto err_late;
615	}
616
617	/*
618	**  Do interrupt configuration
619	*/
620	if (adapter->msix > 1) /* Do MSIX */
621		error = em_allocate_msix(adapter);
622	else  /* MSI or Legacy */
623		error = em_allocate_legacy(adapter);
624	if (error)
625		goto err_late;
626
627	/*
628	 * Get Wake-on-Lan and Management info for later use
629	 */
630	em_get_wakeup(dev);
631
632	/* Setup OS specific network interface */
633	em_setup_interface(dev, adapter);
634
635	em_reset(adapter);
636
637	/* Initialize statistics */
638	em_update_stats_counters(adapter);
639
640	adapter->hw.mac.get_link_status = 1;
641	em_update_link_status(adapter);
642
643	/* Indicate SOL/IDER usage */
644	if (e1000_check_reset_block(&adapter->hw))
645		device_printf(dev,
646		    "PHY reset is blocked due to SOL/IDER session.\n");
647
648	/* Register for VLAN events */
649	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
653
654	/* Non-AMT based hardware can now take control from firmware */
655	if (adapter->has_manage && !adapter->has_amt)
656		em_get_hw_control(adapter);
657
658	/* Tell the stack that the interface is not active */
659	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661	adapter->led_dev = led_create(em_led_func, adapter,
662	    device_get_nameunit(dev));
663
664	INIT_DEBUGOUT("em_attach: end");
665
666	return (0);
667
668err_late:
669	em_free_transmit_structures(adapter);
670	em_free_receive_structures(adapter);
671	em_release_hw_control(adapter);
672err_pci:
673	em_free_pci_resources(adapter);
674	EM_CORE_LOCK_DESTROY(adapter);
675
676	return (error);
677}
678
679/*********************************************************************
680 *  Device removal routine
681 *
682 *  The detach entry point is called when the driver is being removed.
683 *  This routine stops the adapter and deallocates all the resources
684 *  that were allocated for driver operation.
685 *
686 *  return 0 on success, positive on failure
687 *********************************************************************/
688
689static int
690em_detach(device_t dev)
691{
692	struct adapter	*adapter = device_get_softc(dev);
693	struct ifnet	*ifp = adapter->ifp;
694
695	INIT_DEBUGOUT("em_detach: begin");
696
697	/* Make sure VLANS are not using driver */
698	if (adapter->ifp->if_vlantrunk != NULL) {
699		device_printf(dev,"Vlan in use, detach first\n");
700		return (EBUSY);
701	}
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	EM_CORE_LOCK(adapter);
709	adapter->in_detach = 1;
710	em_stop(adapter);
711	EM_CORE_UNLOCK(adapter);
712	EM_CORE_LOCK_DESTROY(adapter);
713
714	e1000_phy_hw_reset(&adapter->hw);
715
716	em_release_manageability(adapter);
717	em_release_hw_control(adapter);
718
719	/* Unregister VLAN events */
720	if (adapter->vlan_attach != NULL)
721		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
722	if (adapter->vlan_detach != NULL)
723		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
724
725	ether_ifdetach(adapter->ifp);
726	callout_drain(&adapter->timer);
727
728	em_free_pci_resources(adapter);
729	bus_generic_detach(dev);
730	if_free(ifp);
731
732	em_free_transmit_structures(adapter);
733	em_free_receive_structures(adapter);
734
735	em_release_hw_control(adapter);
736
737	return (0);
738}
739
740/*********************************************************************
741 *
742 *  Shutdown entry point
743 *
744 **********************************************************************/
745
746static int
747em_shutdown(device_t dev)
748{
749	return em_suspend(dev);
750}
751
752/*
753 * Suspend/resume device methods.
754 */
755static int
756em_suspend(device_t dev)
757{
758	struct adapter *adapter = device_get_softc(dev);
759
760	EM_CORE_LOCK(adapter);
761
762        em_release_manageability(adapter);
763	em_release_hw_control(adapter);
764	em_enable_wakeup(dev);
765
766	EM_CORE_UNLOCK(adapter);
767
768	return bus_generic_suspend(dev);
769}
770
771static int
772em_resume(device_t dev)
773{
774	struct adapter *adapter = device_get_softc(dev);
775	struct ifnet *ifp = adapter->ifp;
776
777	if (adapter->led_dev != NULL)
778		led_destroy(adapter->led_dev);
779
780	EM_CORE_LOCK(adapter);
781	em_init_locked(adapter);
782	em_init_manageability(adapter);
783	EM_CORE_UNLOCK(adapter);
784	em_start(ifp);
785
786	return bus_generic_resume(dev);
787}
788
789
790/*********************************************************************
791 *  Transmit entry point
792 *
793 *  em_start is called by the stack to initiate a transmit.
794 *  The driver will remain in this routine as long as there are
795 *  packets to transmit and transmit resources are available.
796 *  In case resources are not available stack is notified and
797 *  the packet is requeued.
798 **********************************************************************/
799
800#ifdef EM_MULTIQUEUE
801static int
802em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803{
804	struct adapter  *adapter = txr->adapter;
805        struct mbuf     *next;
806        int             err = 0, enq = 0;
807
808	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810		if (m != NULL)
811			err = drbr_enqueue(ifp, txr->br, m);
812		return (err);
813	}
814
815        /* Call cleanup if number of TX descriptors low */
816	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817		em_txeof(txr);
818
819	enq = 0;
820	if (m == NULL) {
821		next = drbr_dequeue(ifp, txr->br);
822	} else if (drbr_needs_enqueue(ifp, txr->br)) {
823		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824			return (err);
825		next = drbr_dequeue(ifp, txr->br);
826	} else
827		next = m;
828
829	/* Process the queue */
830	while (next != NULL) {
831		if ((err = em_xmit(txr, &next)) != 0) {
832                        if (next != NULL)
833                                err = drbr_enqueue(ifp, txr->br, next);
834                        break;
835		}
836		enq++;
837		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838		ETHER_BPF_MTAP(ifp, next);
839		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                        break;
841		if (txr->tx_avail < EM_MAX_SCATTER) {
842			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843			break;
844		}
845		next = drbr_dequeue(ifp, txr->br);
846	}
847
848	if (enq > 0) {
849                /* Set the watchdog */
850                txr->watchdog_check = TRUE;
851		txr->watchdog_time = ticks;
852	}
853	return (err);
854}
855
856/*
857** Multiqueue capable stack interface, this is not
858** yet truely multiqueue, but that is coming...
859*/
860static int
861em_mq_start(struct ifnet *ifp, struct mbuf *m)
862{
863	struct adapter	*adapter = ifp->if_softc;
864	struct tx_ring	*txr;
865	int 		i, error = 0;
866
867	/* Which queue to use */
868	if ((m->m_flags & M_FLOWID) != 0)
869                i = m->m_pkthdr.flowid % adapter->num_queues;
870	else
871		i = curcpu % adapter->num_queues;
872
873	txr = &adapter->tx_rings[i];
874
875	if (EM_TX_TRYLOCK(txr)) {
876		error = em_mq_start_locked(ifp, txr, m);
877		EM_TX_UNLOCK(txr);
878	} else
879		error = drbr_enqueue(ifp, txr->br, m);
880
881	return (error);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888em_qflush(struct ifnet *ifp)
889{
890	struct adapter  *adapter = ifp->if_softc;
891	struct tx_ring  *txr = adapter->tx_rings;
892	struct mbuf     *m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		EM_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		EM_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902
903#endif /* EM_MULTIQUEUE */
904
905static void
906em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907{
908	struct adapter	*adapter = ifp->if_softc;
909	struct mbuf	*m_head;
910
911	EM_TX_LOCK_ASSERT(txr);
912
913	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914	    IFF_DRV_RUNNING)
915		return;
916
917	if (!adapter->link_active)
918		return;
919
920        /* Call cleanup if number of TX descriptors low */
921	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922		em_txeof(txr);
923
924	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925		if (txr->tx_avail < EM_MAX_SCATTER) {
926			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927			break;
928		}
929                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930		if (m_head == NULL)
931			break;
932		/*
933		 *  Encapsulation can modify our pointer, and or make it
934		 *  NULL on failure.  In that event, we can't requeue.
935		 */
936		if (em_xmit(txr, &m_head)) {
937			if (m_head == NULL)
938				break;
939			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941			break;
942		}
943
944		/* Send a copy of the frame to the BPF listener */
945		ETHER_BPF_MTAP(ifp, m_head);
946
947		/* Set timeout in case hardware has problems transmitting. */
948		txr->watchdog_time = ticks;
949		txr->watchdog_check = TRUE;
950	}
951
952	return;
953}
954
955static void
956em_start(struct ifnet *ifp)
957{
958	struct adapter	*adapter = ifp->if_softc;
959	struct tx_ring	*txr = adapter->tx_rings;
960
961	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962		EM_TX_LOCK(txr);
963		em_start_locked(ifp, txr);
964		EM_TX_UNLOCK(txr);
965	}
966	return;
967}
968
969/*********************************************************************
970 *  Ioctl entry point
971 *
972 *  em_ioctl is called when the user wants to configure the
973 *  interface.
974 *
975 *  return 0 on success, positive on failure
976 **********************************************************************/
977
978static int
979em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980{
981	struct adapter	*adapter = ifp->if_softc;
982	struct ifreq *ifr = (struct ifreq *)data;
983#ifdef INET
984	struct ifaddr *ifa = (struct ifaddr *)data;
985#endif
986	int error = 0;
987
988	if (adapter->in_detach)
989		return (error);
990
991	switch (command) {
992	case SIOCSIFADDR:
993#ifdef INET
994		if (ifa->ifa_addr->sa_family == AF_INET) {
995			/*
996			 * XXX
997			 * Since resetting hardware takes a very long time
998			 * and results in link renegotiation we only
999			 * initialize the hardware only when it is absolutely
1000			 * required.
1001			 */
1002			ifp->if_flags |= IFF_UP;
1003			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004				EM_CORE_LOCK(adapter);
1005				em_init_locked(adapter);
1006				EM_CORE_UNLOCK(adapter);
1007			}
1008			arp_ifinit(ifp, ifa);
1009		} else
1010#endif
1011			error = ether_ioctl(ifp, command, data);
1012		break;
1013	case SIOCSIFMTU:
1014	    {
1015		int max_frame_size;
1016
1017		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018
1019		EM_CORE_LOCK(adapter);
1020		switch (adapter->hw.mac.type) {
1021		case e1000_82571:
1022		case e1000_82572:
1023		case e1000_ich9lan:
1024		case e1000_ich10lan:
1025		case e1000_82574:
1026		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1027			max_frame_size = 9234;
1028			break;
1029		case e1000_pchlan:
1030			max_frame_size = 4096;
1031			break;
1032			/* Adapters that do not support jumbo frames */
1033		case e1000_82583:
1034		case e1000_ich8lan:
1035			max_frame_size = ETHER_MAX_LEN;
1036			break;
1037		default:
1038			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039		}
1040		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041		    ETHER_CRC_LEN) {
1042			EM_CORE_UNLOCK(adapter);
1043			error = EINVAL;
1044			break;
1045		}
1046
1047		ifp->if_mtu = ifr->ifr_mtu;
1048		adapter->max_frame_size =
1049		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050		em_init_locked(adapter);
1051		EM_CORE_UNLOCK(adapter);
1052		break;
1053	    }
1054	case SIOCSIFFLAGS:
1055		IOCTL_DEBUGOUT("ioctl rcv'd:\
1056		    SIOCSIFFLAGS (Set Interface Flags)");
1057		EM_CORE_LOCK(adapter);
1058		if (ifp->if_flags & IFF_UP) {
1059			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060				if ((ifp->if_flags ^ adapter->if_flags) &
1061				    (IFF_PROMISC | IFF_ALLMULTI)) {
1062					em_disable_promisc(adapter);
1063					em_set_promisc(adapter);
1064				}
1065			} else
1066				em_init_locked(adapter);
1067		} else
1068			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069				em_stop(adapter);
1070		adapter->if_flags = ifp->if_flags;
1071		EM_CORE_UNLOCK(adapter);
1072		break;
1073	case SIOCADDMULTI:
1074	case SIOCDELMULTI:
1075		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077			EM_CORE_LOCK(adapter);
1078			em_disable_intr(adapter);
1079			em_set_multi(adapter);
1080#ifdef DEVICE_POLLING
1081			if (!(ifp->if_capenable & IFCAP_POLLING))
1082#endif
1083				em_enable_intr(adapter);
1084			EM_CORE_UNLOCK(adapter);
1085		}
1086		break;
1087	case SIOCSIFMEDIA:
1088		/* Check SOL/IDER usage */
1089		EM_CORE_LOCK(adapter);
1090		if (e1000_check_reset_block(&adapter->hw)) {
1091			EM_CORE_UNLOCK(adapter);
1092			device_printf(adapter->dev, "Media change is"
1093			    " blocked due to SOL/IDER session.\n");
1094			break;
1095		}
1096		EM_CORE_UNLOCK(adapter);
1097	case SIOCGIFMEDIA:
1098		IOCTL_DEBUGOUT("ioctl rcv'd: \
1099		    SIOCxIFMEDIA (Get/Set Interface Media)");
1100		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101		break;
1102	case SIOCSIFCAP:
1103	    {
1104		int mask, reinit;
1105
1106		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107		reinit = 0;
1108		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109#ifdef DEVICE_POLLING
1110		if (mask & IFCAP_POLLING) {
1111			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112				error = ether_poll_register(em_poll, ifp);
1113				if (error)
1114					return (error);
1115				EM_CORE_LOCK(adapter);
1116				em_disable_intr(adapter);
1117				ifp->if_capenable |= IFCAP_POLLING;
1118				EM_CORE_UNLOCK(adapter);
1119			} else {
1120				error = ether_poll_deregister(ifp);
1121				/* Enable interrupt even in error case */
1122				EM_CORE_LOCK(adapter);
1123				em_enable_intr(adapter);
1124				ifp->if_capenable &= ~IFCAP_POLLING;
1125				EM_CORE_UNLOCK(adapter);
1126			}
1127		}
1128#endif
1129		if (mask & IFCAP_HWCSUM) {
1130			ifp->if_capenable ^= IFCAP_HWCSUM;
1131			reinit = 1;
1132		}
1133		if (mask & IFCAP_TSO4) {
1134			ifp->if_capenable ^= IFCAP_TSO4;
1135			reinit = 1;
1136		}
1137		if (mask & IFCAP_VLAN_HWTAGGING) {
1138			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139			reinit = 1;
1140		}
1141		if (mask & IFCAP_VLAN_HWFILTER) {
1142			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143			reinit = 1;
1144		}
1145		if ((mask & IFCAP_WOL) &&
1146		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147			if (mask & IFCAP_WOL_MCAST)
1148				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149			if (mask & IFCAP_WOL_MAGIC)
1150				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151		}
1152		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153			em_init(adapter);
1154		VLAN_CAPABILITIES(ifp);
1155		break;
1156	    }
1157
1158	default:
1159		error = ether_ioctl(ifp, command, data);
1160		break;
1161	}
1162
1163	return (error);
1164}
1165
1166
1167/*********************************************************************
1168 *  Init entry point
1169 *
1170 *  This routine is used in two ways. It is used by the stack as
1171 *  init entry point in network interface structure. It is also used
1172 *  by the driver as a hw/sw initialization routine to get to a
1173 *  consistent state.
1174 *
1175 *  return 0 on success, positive on failure
1176 **********************************************************************/
1177
1178static void
1179em_init_locked(struct adapter *adapter)
1180{
1181	struct ifnet	*ifp = adapter->ifp;
1182	device_t	dev = adapter->dev;
1183	u32		pba;
1184
1185	INIT_DEBUGOUT("em_init: begin");
1186
1187	EM_CORE_LOCK_ASSERT(adapter);
1188
1189	em_disable_intr(adapter);
1190	callout_stop(&adapter->timer);
1191
1192	/*
1193	 * Packet Buffer Allocation (PBA)
1194	 * Writing PBA sets the receive portion of the buffer
1195	 * the remainder is used for the transmit buffer.
1196	 */
1197	switch (adapter->hw.mac.type) {
1198	/* Total Packet Buffer on these is 48K */
1199	case e1000_82571:
1200	case e1000_82572:
1201	case e1000_80003es2lan:
1202			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203		break;
1204	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206		break;
1207	case e1000_82574:
1208	case e1000_82583:
1209			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210		break;
1211	case e1000_ich9lan:
1212	case e1000_ich10lan:
1213	case e1000_pchlan:
1214		pba = E1000_PBA_10K;
1215		break;
1216	case e1000_ich8lan:
1217		pba = E1000_PBA_8K;
1218		break;
1219	default:
1220		if (adapter->max_frame_size > 8192)
1221			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222		else
1223			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224	}
1225
1226	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228
1229	/* Get the latest mac address, User can use a LAA */
1230        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231              ETHER_ADDR_LEN);
1232
1233	/* Put the address into the Receive Address Array */
1234	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235
1236	/*
1237	 * With the 82571 adapter, RAR[0] may be overwritten
1238	 * when the other port is reset, we make a duplicate
1239	 * in RAR[14] for that eventuality, this assures
1240	 * the interface continues to function.
1241	 */
1242	if (adapter->hw.mac.type == e1000_82571) {
1243		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245		    E1000_RAR_ENTRIES - 1);
1246	}
1247
1248	/* Initialize the hardware */
1249	em_reset(adapter);
1250	em_update_link_status(adapter);
1251
1252	/* Setup VLAN support, basic and offload if available */
1253	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254
1255	/* Use real VLAN Filter support? */
1256	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258			/* Use real VLAN Filter support */
1259			em_setup_vlan_hw_support(adapter);
1260		else {
1261			u32 ctrl;
1262			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263			ctrl |= E1000_CTRL_VME;
1264			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265		}
1266	}
1267
1268	/* Set hardware offload abilities */
1269	ifp->if_hwassist = 0;
1270	if (ifp->if_capenable & IFCAP_TXCSUM)
1271		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272	if (ifp->if_capenable & IFCAP_TSO4)
1273		ifp->if_hwassist |= CSUM_TSO;
1274
1275	/* Configure for OS presence */
1276	em_init_manageability(adapter);
1277
1278	/* Prepare transmit descriptors and buffers */
1279	em_setup_transmit_structures(adapter);
1280	em_initialize_transmit_unit(adapter);
1281
1282	/* Setup Multicast table */
1283	em_set_multi(adapter);
1284
1285	/* Prepare receive descriptors and buffers */
1286	if (em_setup_receive_structures(adapter)) {
1287		device_printf(dev, "Could not setup receive structures\n");
1288		em_stop(adapter);
1289		return;
1290	}
1291	em_initialize_receive_unit(adapter);
1292
1293	/* Don't lose promiscuous settings */
1294	em_set_promisc(adapter);
1295
1296	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298
1299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301
1302	/* MSI/X configuration for 82574 */
1303	if (adapter->hw.mac.type == e1000_82574) {
1304		int tmp;
1305		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306		tmp |= E1000_CTRL_EXT_PBA_CLR;
1307		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308		/* Set the IVAR - interrupt vector routing. */
1309		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310	}
1311
1312#ifdef DEVICE_POLLING
1313	/*
1314	 * Only enable interrupts if we are not polling, make sure
1315	 * they are off otherwise.
1316	 */
1317	if (ifp->if_capenable & IFCAP_POLLING)
1318		em_disable_intr(adapter);
1319	else
1320#endif /* DEVICE_POLLING */
1321		em_enable_intr(adapter);
1322
1323	/* AMT based hardware can now take control from firmware */
1324	if (adapter->has_manage && adapter->has_amt)
1325		em_get_hw_control(adapter);
1326
1327	/* Don't reset the phy next time init gets called */
1328	adapter->hw.phy.reset_disable = TRUE;
1329}
1330
1331static void
1332em_init(void *arg)
1333{
1334	struct adapter *adapter = arg;
1335
1336	EM_CORE_LOCK(adapter);
1337	em_init_locked(adapter);
1338	EM_CORE_UNLOCK(adapter);
1339}
1340
1341
1342#ifdef DEVICE_POLLING
1343/*********************************************************************
1344 *
1345 *  Legacy polling routine: note this only works with single queue
1346 *
1347 *********************************************************************/
1348static int
1349em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350{
1351	struct adapter *adapter = ifp->if_softc;
1352	struct tx_ring	*txr = adapter->tx_rings;
1353	struct rx_ring	*rxr = adapter->rx_rings;
1354	u32		reg_icr, rx_done = 0;
1355
1356	EM_CORE_LOCK(adapter);
1357	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1358		EM_CORE_UNLOCK(adapter);
1359		return (rx_done);
1360	}
1361
1362	if (cmd == POLL_AND_CHECK_STATUS) {
1363		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1365			callout_stop(&adapter->timer);
1366			adapter->hw.mac.get_link_status = 1;
1367			em_update_link_status(adapter);
1368			callout_reset(&adapter->timer, hz,
1369			    em_local_timer, adapter);
1370		}
1371	}
1372	EM_CORE_UNLOCK(adapter);
1373
1374	rx_done = em_rxeof(rxr, count);
1375
1376	EM_TX_LOCK(txr);
1377	em_txeof(txr);
1378#ifdef EM_MULTIQUEUE
1379	if (!drbr_empty(ifp, txr->br))
1380		em_mq_start_locked(ifp, txr, NULL);
1381#else
1382	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1383		em_start_locked(ifp, txr);
1384#endif
1385	EM_TX_UNLOCK(txr);
1386
1387	return (rx_done);
1388}
1389#endif /* DEVICE_POLLING */
1390
1391
1392/*********************************************************************
1393 *
1394 *  Fast Legacy/MSI Combined Interrupt Service routine
1395 *
1396 *********************************************************************/
1397static int
1398em_irq_fast(void *arg)
1399{
1400	struct adapter	*adapter = arg;
1401	struct ifnet	*ifp;
1402	u32		reg_icr;
1403
1404	ifp = adapter->ifp;
1405
1406	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1407
1408	/* Hot eject?  */
1409	if (reg_icr == 0xffffffff)
1410		return FILTER_STRAY;
1411
1412	/* Definitely not our interrupt.  */
1413	if (reg_icr == 0x0)
1414		return FILTER_STRAY;
1415
1416	/*
1417	 * Starting with the 82571 chip, bit 31 should be used to
1418	 * determine whether the interrupt belongs to us.
1419	 */
1420	if (adapter->hw.mac.type >= e1000_82571 &&
1421	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1422		return FILTER_STRAY;
1423
1424	em_disable_intr(adapter);
1425	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1426
1427	/* Link status change */
1428	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1429		adapter->hw.mac.get_link_status = 1;
1430		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1431	}
1432
1433	if (reg_icr & E1000_ICR_RXO)
1434		adapter->rx_overruns++;
1435	return FILTER_HANDLED;
1436}
1437
1438/* Combined RX/TX handler, used by Legacy and MSI */
1439static void
1440em_handle_que(void *context, int pending)
1441{
1442	struct adapter	*adapter = context;
1443	struct ifnet	*ifp = adapter->ifp;
1444	struct tx_ring	*txr = adapter->tx_rings;
1445	struct rx_ring	*rxr = adapter->rx_rings;
1446	bool		more_rx;
1447
1448
1449	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1450		more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1451		EM_TX_LOCK(txr);
1452		em_txeof(txr);
1453#ifdef EM_MULTIQUEUE
1454		if (!drbr_empty(ifp, txr->br))
1455			em_mq_start_locked(ifp, txr, NULL);
1456#else
1457		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1458			em_start_locked(ifp, txr);
1459#endif
1460		EM_TX_UNLOCK(txr);
1461		if (more_rx)
1462			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1463	}
1464
1465	em_enable_intr(adapter);
1466	return;
1467}
1468
1469
1470/*********************************************************************
1471 *
1472 *  MSIX Interrupt Service Routines
1473 *
1474 **********************************************************************/
1475static void
1476em_msix_tx(void *arg)
1477{
1478	struct tx_ring *txr = arg;
1479	struct adapter *adapter = txr->adapter;
1480
1481	++txr->tx_irq;
1482	EM_TX_LOCK(txr);
1483	em_txeof(txr);
1484	EM_TX_UNLOCK(txr);
1485	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1486	return;
1487}
1488
1489/*********************************************************************
1490 *
1491 *  MSIX RX Interrupt Service routine
1492 *
1493 **********************************************************************/
1494
1495static void
1496em_msix_rx(void *arg)
1497{
1498	struct rx_ring	*rxr = arg;
1499	struct adapter	*adapter = rxr->adapter;
1500	bool		more;
1501
1502	++rxr->rx_irq;
1503	more = em_rxeof(rxr, adapter->rx_process_limit);
1504	if (more)
1505		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1506	else
1507		/* Reenable this interrupt */
1508		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1509	return;
1510}
1511
1512/*********************************************************************
1513 *
1514 *  MSIX Link Fast Interrupt Service routine
1515 *
1516 **********************************************************************/
1517static void
1518em_msix_link(void *arg)
1519{
1520	struct adapter	*adapter = arg;
1521	u32		reg_icr;
1522
1523	++adapter->link_irq;
1524	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1525
1526	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1527		adapter->hw.mac.get_link_status = 1;
1528		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1529	} else
1530		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1531		    EM_MSIX_LINK | E1000_IMS_LSC);
1532	return;
1533}
1534
1535static void
1536em_handle_rx(void *context, int pending)
1537{
1538	struct rx_ring	*rxr = context;
1539	struct adapter	*adapter = rxr->adapter;
1540        bool            more;
1541
1542	more = em_rxeof(rxr, adapter->rx_process_limit);
1543	if (more)
1544		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1545	else
1546		/* Reenable this interrupt */
1547		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1548}
1549
1550static void
1551em_handle_tx(void *context, int pending)
1552{
1553	struct tx_ring	*txr = context;
1554	struct adapter	*adapter = txr->adapter;
1555	struct ifnet	*ifp = adapter->ifp;
1556
1557	if (!EM_TX_TRYLOCK(txr))
1558		return;
1559
1560	em_txeof(txr);
1561
1562#ifdef EM_MULTIQUEUE
1563	if (!drbr_empty(ifp, txr->br))
1564		em_mq_start_locked(ifp, txr, NULL);
1565#else
1566	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1567		em_start_locked(ifp, txr);
1568#endif
1569	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1570	EM_TX_UNLOCK(txr);
1571}
1572
1573static void
1574em_handle_link(void *context, int pending)
1575{
1576	struct adapter	*adapter = context;
1577	struct ifnet *ifp = adapter->ifp;
1578
1579	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1580		return;
1581
1582	EM_CORE_LOCK(adapter);
1583	callout_stop(&adapter->timer);
1584	em_update_link_status(adapter);
1585	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1586	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1587	    EM_MSIX_LINK | E1000_IMS_LSC);
1588	EM_CORE_UNLOCK(adapter);
1589}
1590
1591
1592/*********************************************************************
1593 *
1594 *  Media Ioctl callback
1595 *
1596 *  This routine is called whenever the user queries the status of
1597 *  the interface using ifconfig.
1598 *
1599 **********************************************************************/
1600static void
1601em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1602{
1603	struct adapter *adapter = ifp->if_softc;
1604	u_char fiber_type = IFM_1000_SX;
1605
1606	INIT_DEBUGOUT("em_media_status: begin");
1607
1608	EM_CORE_LOCK(adapter);
1609	em_update_link_status(adapter);
1610
1611	ifmr->ifm_status = IFM_AVALID;
1612	ifmr->ifm_active = IFM_ETHER;
1613
1614	if (!adapter->link_active) {
1615		EM_CORE_UNLOCK(adapter);
1616		return;
1617	}
1618
1619	ifmr->ifm_status |= IFM_ACTIVE;
1620
1621	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1622	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1623		ifmr->ifm_active |= fiber_type | IFM_FDX;
1624	} else {
1625		switch (adapter->link_speed) {
1626		case 10:
1627			ifmr->ifm_active |= IFM_10_T;
1628			break;
1629		case 100:
1630			ifmr->ifm_active |= IFM_100_TX;
1631			break;
1632		case 1000:
1633			ifmr->ifm_active |= IFM_1000_T;
1634			break;
1635		}
1636		if (adapter->link_duplex == FULL_DUPLEX)
1637			ifmr->ifm_active |= IFM_FDX;
1638		else
1639			ifmr->ifm_active |= IFM_HDX;
1640	}
1641	EM_CORE_UNLOCK(adapter);
1642}
1643
1644/*********************************************************************
1645 *
1646 *  Media Ioctl callback
1647 *
1648 *  This routine is called when the user changes speed/duplex using
1649 *  media/mediopt option with ifconfig.
1650 *
1651 **********************************************************************/
1652static int
1653em_media_change(struct ifnet *ifp)
1654{
1655	struct adapter *adapter = ifp->if_softc;
1656	struct ifmedia  *ifm = &adapter->media;
1657
1658	INIT_DEBUGOUT("em_media_change: begin");
1659
1660	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1661		return (EINVAL);
1662
1663	EM_CORE_LOCK(adapter);
1664	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1665	case IFM_AUTO:
1666		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1667		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1668		break;
1669	case IFM_1000_LX:
1670	case IFM_1000_SX:
1671	case IFM_1000_T:
1672		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1673		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1674		break;
1675	case IFM_100_TX:
1676		adapter->hw.mac.autoneg = FALSE;
1677		adapter->hw.phy.autoneg_advertised = 0;
1678		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1679			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1680		else
1681			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1682		break;
1683	case IFM_10_T:
1684		adapter->hw.mac.autoneg = FALSE;
1685		adapter->hw.phy.autoneg_advertised = 0;
1686		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1687			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1688		else
1689			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1690		break;
1691	default:
1692		device_printf(adapter->dev, "Unsupported media type\n");
1693	}
1694
1695	/* As the speed/duplex settings my have changed we need to
1696	 * reset the PHY.
1697	 */
1698	adapter->hw.phy.reset_disable = FALSE;
1699
1700	em_init_locked(adapter);
1701	EM_CORE_UNLOCK(adapter);
1702
1703	return (0);
1704}
1705
1706/*********************************************************************
1707 *
1708 *  This routine maps the mbufs to tx descriptors.
1709 *
1710 *  return 0 on success, positive on failure
1711 **********************************************************************/
1712
1713static int
1714em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1715{
1716	struct adapter		*adapter = txr->adapter;
1717	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1718	bus_dmamap_t		map;
1719	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1720	struct e1000_tx_desc	*ctxd = NULL;
1721	struct mbuf		*m_head;
1722	u32			txd_upper, txd_lower, txd_used, txd_saved;
1723	int			nsegs, i, j, first, last = 0;
1724	int			error, do_tso, tso_desc = 0;
1725
1726	m_head = *m_headp;
1727	txd_upper = txd_lower = txd_used = txd_saved = 0;
1728	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1729
1730	/*
1731	 * TSO workaround:
1732	 *  If an mbuf is only header we need
1733	 *     to pull 4 bytes of data into it.
1734	 */
1735	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1736		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1737		*m_headp = m_head;
1738		if (m_head == NULL)
1739			return (ENOBUFS);
1740	}
1741
1742	/*
1743	 * Map the packet for DMA
1744	 *
1745	 * Capture the first descriptor index,
1746	 * this descriptor will have the index
1747	 * of the EOP which is the only one that
1748	 * now gets a DONE bit writeback.
1749	 */
1750	first = txr->next_avail_desc;
1751	tx_buffer = &txr->tx_buffers[first];
1752	tx_buffer_mapped = tx_buffer;
1753	map = tx_buffer->map;
1754
1755	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1756	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1757
1758	/*
1759	 * There are two types of errors we can (try) to handle:
1760	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1761	 *   out of segments.  Defragment the mbuf chain and try again.
1762	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1763	 *   at this point in time.  Defer sending and try again later.
1764	 * All other errors, in particular EINVAL, are fatal and prevent the
1765	 * mbuf chain from ever going through.  Drop it and report error.
1766	 */
1767	if (error == EFBIG) {
1768		struct mbuf *m;
1769
1770		m = m_defrag(*m_headp, M_DONTWAIT);
1771		if (m == NULL) {
1772			adapter->mbuf_alloc_failed++;
1773			m_freem(*m_headp);
1774			*m_headp = NULL;
1775			return (ENOBUFS);
1776		}
1777		*m_headp = m;
1778
1779		/* Try it again */
1780		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1781		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1782
1783		if (error) {
1784			adapter->no_tx_dma_setup++;
1785			m_freem(*m_headp);
1786			*m_headp = NULL;
1787			return (error);
1788		}
1789	} else if (error != 0) {
1790		adapter->no_tx_dma_setup++;
1791		return (error);
1792	}
1793
1794	/*
1795	 * TSO Hardware workaround, if this packet is not
1796	 * TSO, and is only a single descriptor long, and
1797	 * it follows a TSO burst, then we need to add a
1798	 * sentinel descriptor to prevent premature writeback.
1799	 */
1800	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1801		if (nsegs == 1)
1802			tso_desc = TRUE;
1803		txr->tx_tso = FALSE;
1804	}
1805
1806        if (nsegs > (txr->tx_avail - 2)) {
1807                txr->no_desc_avail++;
1808		bus_dmamap_unload(txr->txtag, map);
1809		return (ENOBUFS);
1810        }
1811	m_head = *m_headp;
1812
1813	/* Do hardware assists */
1814#if __FreeBSD_version >= 700000
1815	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1816		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1817		if (error != TRUE)
1818			return (ENXIO); /* something foobar */
1819		/* we need to make a final sentinel transmit desc */
1820		tso_desc = TRUE;
1821	} else
1822#endif
1823	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1824		em_transmit_checksum_setup(txr,  m_head,
1825		    &txd_upper, &txd_lower);
1826
1827	i = txr->next_avail_desc;
1828
1829	/* Set up our transmit descriptors */
1830	for (j = 0; j < nsegs; j++) {
1831		bus_size_t seg_len;
1832		bus_addr_t seg_addr;
1833
1834		tx_buffer = &txr->tx_buffers[i];
1835		ctxd = &txr->tx_base[i];
1836		seg_addr = segs[j].ds_addr;
1837		seg_len  = segs[j].ds_len;
1838		/*
1839		** TSO Workaround:
1840		** If this is the last descriptor, we want to
1841		** split it so we have a small final sentinel
1842		*/
1843		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1844			seg_len -= 4;
1845			ctxd->buffer_addr = htole64(seg_addr);
1846			ctxd->lower.data = htole32(
1847			adapter->txd_cmd | txd_lower | seg_len);
1848			ctxd->upper.data =
1849			    htole32(txd_upper);
1850			if (++i == adapter->num_tx_desc)
1851				i = 0;
1852			/* Now make the sentinel */
1853			++txd_used; /* using an extra txd */
1854			ctxd = &txr->tx_base[i];
1855			tx_buffer = &txr->tx_buffers[i];
1856			ctxd->buffer_addr =
1857			    htole64(seg_addr + seg_len);
1858			ctxd->lower.data = htole32(
1859			adapter->txd_cmd | txd_lower | 4);
1860			ctxd->upper.data =
1861			    htole32(txd_upper);
1862			last = i;
1863			if (++i == adapter->num_tx_desc)
1864				i = 0;
1865		} else {
1866			ctxd->buffer_addr = htole64(seg_addr);
1867			ctxd->lower.data = htole32(
1868			adapter->txd_cmd | txd_lower | seg_len);
1869			ctxd->upper.data =
1870			    htole32(txd_upper);
1871			last = i;
1872			if (++i == adapter->num_tx_desc)
1873				i = 0;
1874		}
1875		tx_buffer->m_head = NULL;
1876		tx_buffer->next_eop = -1;
1877	}
1878
1879	txr->next_avail_desc = i;
1880	txr->tx_avail -= nsegs;
1881	if (tso_desc) /* TSO used an extra for sentinel */
1882		txr->tx_avail -= txd_used;
1883
1884	if (m_head->m_flags & M_VLANTAG) {
1885		/* Set the vlan id. */
1886		ctxd->upper.fields.special =
1887		    htole16(m_head->m_pkthdr.ether_vtag);
1888                /* Tell hardware to add tag */
1889                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1890        }
1891
1892        tx_buffer->m_head = m_head;
1893	tx_buffer_mapped->map = tx_buffer->map;
1894	tx_buffer->map = map;
1895        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1896
1897        /*
1898         * Last Descriptor of Packet
1899	 * needs End Of Packet (EOP)
1900	 * and Report Status (RS)
1901         */
1902        ctxd->lower.data |=
1903	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1904	/*
1905	 * Keep track in the first buffer which
1906	 * descriptor will be written back
1907	 */
1908	tx_buffer = &txr->tx_buffers[first];
1909	tx_buffer->next_eop = last;
1910
1911	/*
1912	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1913	 * that this frame is available to transmit.
1914	 */
1915	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1916	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1917	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1918
1919	return (0);
1920}
1921
1922static void
1923em_set_promisc(struct adapter *adapter)
1924{
1925	struct ifnet	*ifp = adapter->ifp;
1926	u32		reg_rctl;
1927
1928	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1929
1930	if (ifp->if_flags & IFF_PROMISC) {
1931		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1932		/* Turn this on if you want to see bad packets */
1933		if (em_debug_sbp)
1934			reg_rctl |= E1000_RCTL_SBP;
1935		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1936	} else if (ifp->if_flags & IFF_ALLMULTI) {
1937		reg_rctl |= E1000_RCTL_MPE;
1938		reg_rctl &= ~E1000_RCTL_UPE;
1939		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1940	}
1941}
1942
1943static void
1944em_disable_promisc(struct adapter *adapter)
1945{
1946	u32	reg_rctl;
1947
1948	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1949
1950	reg_rctl &=  (~E1000_RCTL_UPE);
1951	reg_rctl &=  (~E1000_RCTL_MPE);
1952	reg_rctl &=  (~E1000_RCTL_SBP);
1953	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1954}
1955
1956
1957/*********************************************************************
1958 *  Multicast Update
1959 *
1960 *  This routine is called whenever multicast address list is updated.
1961 *
1962 **********************************************************************/
1963
1964static void
1965em_set_multi(struct adapter *adapter)
1966{
1967	struct ifnet	*ifp = adapter->ifp;
1968	struct ifmultiaddr *ifma;
1969	u32 reg_rctl = 0;
1970	u8  *mta; /* Multicast array memory */
1971	int mcnt = 0;
1972
1973	IOCTL_DEBUGOUT("em_set_multi: begin");
1974
1975	if (adapter->hw.mac.type == e1000_82542 &&
1976	    adapter->hw.revision_id == E1000_REVISION_2) {
1977		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1978		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1979			e1000_pci_clear_mwi(&adapter->hw);
1980		reg_rctl |= E1000_RCTL_RST;
1981		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1982		msec_delay(5);
1983	}
1984
1985	/* Allocate temporary memory to setup array */
1986	mta = malloc(sizeof(u8) *
1987	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
1988	    M_DEVBUF, M_NOWAIT | M_ZERO);
1989	if (mta == NULL)
1990		panic("em_set_multi memory failure\n");
1991
1992#if __FreeBSD_version < 800000
1993	IF_ADDR_LOCK(ifp);
1994#else
1995	if_maddr_rlock(ifp);
1996#endif
1997	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1998		if (ifma->ifma_addr->sa_family != AF_LINK)
1999			continue;
2000
2001		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2002			break;
2003
2004		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2005		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2006		mcnt++;
2007	}
2008#if __FreeBSD_version < 800000
2009	IF_ADDR_UNLOCK(ifp);
2010#else
2011	if_maddr_runlock(ifp);
2012#endif
2013	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2014		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2015		reg_rctl |= E1000_RCTL_MPE;
2016		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2017	} else
2018		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2019
2020	if (adapter->hw.mac.type == e1000_82542 &&
2021	    adapter->hw.revision_id == E1000_REVISION_2) {
2022		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2023		reg_rctl &= ~E1000_RCTL_RST;
2024		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2025		msec_delay(5);
2026		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2027			e1000_pci_set_mwi(&adapter->hw);
2028	}
2029	free(mta, M_DEVBUF);
2030}
2031
2032
2033/*********************************************************************
2034 *  Timer routine
2035 *
2036 *  This routine checks for link status and updates statistics.
2037 *
2038 **********************************************************************/
2039
2040static void
2041em_local_timer(void *arg)
2042{
2043	struct adapter	*adapter = arg;
2044	struct ifnet	*ifp = adapter->ifp;
2045	struct tx_ring	*txr = adapter->tx_rings;
2046
2047	EM_CORE_LOCK_ASSERT(adapter);
2048
2049	em_update_link_status(adapter);
2050	em_update_stats_counters(adapter);
2051
2052	/* Reset LAA into RAR[0] on 82571 */
2053	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2054		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2055
2056	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2057		em_print_hw_stats(adapter);
2058
2059	/*
2060	** Check for time since any descriptor was cleaned
2061	*/
2062	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2063		EM_TX_LOCK(txr);
2064		if (txr->watchdog_check == FALSE) {
2065			EM_TX_UNLOCK(txr);
2066			continue;
2067		}
2068		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2069			goto hung;
2070		EM_TX_UNLOCK(txr);
2071	}
2072
2073	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2074	return;
2075hung:
2076	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2077	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2078	adapter->watchdog_events++;
2079	EM_TX_UNLOCK(txr);
2080	em_init_locked(adapter);
2081}
2082
2083
2084static void
2085em_update_link_status(struct adapter *adapter)
2086{
2087	struct e1000_hw *hw = &adapter->hw;
2088	struct ifnet *ifp = adapter->ifp;
2089	device_t dev = adapter->dev;
2090	u32 link_check = 0;
2091
2092	/* Get the cached link value or read phy for real */
2093	switch (hw->phy.media_type) {
2094	case e1000_media_type_copper:
2095		if (hw->mac.get_link_status) {
2096			/* Do the work to read phy */
2097			e1000_check_for_link(hw);
2098			link_check = !hw->mac.get_link_status;
2099			if (link_check) /* ESB2 fix */
2100				e1000_cfg_on_link_up(hw);
2101		} else
2102			link_check = TRUE;
2103		break;
2104	case e1000_media_type_fiber:
2105		e1000_check_for_link(hw);
2106		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2107                                 E1000_STATUS_LU);
2108		break;
2109	case e1000_media_type_internal_serdes:
2110		e1000_check_for_link(hw);
2111		link_check = adapter->hw.mac.serdes_has_link;
2112		break;
2113	default:
2114	case e1000_media_type_unknown:
2115		break;
2116	}
2117
2118	/* Now check for a transition */
2119	if (link_check && (adapter->link_active == 0)) {
2120		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2121		    &adapter->link_duplex);
2122		/* Check if we must disable SPEED_MODE bit on PCI-E */
2123		if ((adapter->link_speed != SPEED_1000) &&
2124		    ((hw->mac.type == e1000_82571) ||
2125		    (hw->mac.type == e1000_82572))) {
2126			int tarc0;
2127			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2128			tarc0 &= ~SPEED_MODE_BIT;
2129			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2130		}
2131		if (bootverbose)
2132			device_printf(dev, "Link is up %d Mbps %s\n",
2133			    adapter->link_speed,
2134			    ((adapter->link_duplex == FULL_DUPLEX) ?
2135			    "Full Duplex" : "Half Duplex"));
2136		adapter->link_active = 1;
2137		adapter->smartspeed = 0;
2138		ifp->if_baudrate = adapter->link_speed * 1000000;
2139		if_link_state_change(ifp, LINK_STATE_UP);
2140	} else if (!link_check && (adapter->link_active == 1)) {
2141		ifp->if_baudrate = adapter->link_speed = 0;
2142		adapter->link_duplex = 0;
2143		if (bootverbose)
2144			device_printf(dev, "Link is Down\n");
2145		adapter->link_active = 0;
2146		/* Link down, disable watchdog */
2147		// JFV change later
2148		//adapter->watchdog_check = FALSE;
2149		if_link_state_change(ifp, LINK_STATE_DOWN);
2150	}
2151}
2152
2153/*********************************************************************
2154 *
2155 *  This routine disables all traffic on the adapter by issuing a
2156 *  global reset on the MAC and deallocates TX/RX buffers.
2157 *
2158 *  This routine should always be called with BOTH the CORE
2159 *  and TX locks.
2160 **********************************************************************/
2161
2162static void
2163em_stop(void *arg)
2164{
2165	struct adapter	*adapter = arg;
2166	struct ifnet	*ifp = adapter->ifp;
2167	struct tx_ring	*txr = adapter->tx_rings;
2168
2169	EM_CORE_LOCK_ASSERT(adapter);
2170
2171	INIT_DEBUGOUT("em_stop: begin");
2172
2173	em_disable_intr(adapter);
2174	callout_stop(&adapter->timer);
2175
2176	/* Tell the stack that the interface is no longer active */
2177	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2178
2179        /* Unarm watchdog timer. */
2180	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2181		EM_TX_LOCK(txr);
2182		txr->watchdog_check = FALSE;
2183		EM_TX_UNLOCK(txr);
2184	}
2185
2186	e1000_reset_hw(&adapter->hw);
2187	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2188
2189	e1000_led_off(&adapter->hw);
2190	e1000_cleanup_led(&adapter->hw);
2191}
2192
2193
2194/*********************************************************************
2195 *
2196 *  Determine hardware revision.
2197 *
2198 **********************************************************************/
2199static void
2200em_identify_hardware(struct adapter *adapter)
2201{
2202	device_t dev = adapter->dev;
2203
2204	/* Make sure our PCI config space has the necessary stuff set */
2205	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2206	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2207	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2208		device_printf(dev, "Memory Access and/or Bus Master bits "
2209		    "were not set!\n");
2210		adapter->hw.bus.pci_cmd_word |=
2211		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2212		pci_write_config(dev, PCIR_COMMAND,
2213		    adapter->hw.bus.pci_cmd_word, 2);
2214	}
2215
2216	/* Save off the information about this board */
2217	adapter->hw.vendor_id = pci_get_vendor(dev);
2218	adapter->hw.device_id = pci_get_device(dev);
2219	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2220	adapter->hw.subsystem_vendor_id =
2221	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2222	adapter->hw.subsystem_device_id =
2223	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2224
2225	/* Do Shared Code Init and Setup */
2226	if (e1000_set_mac_type(&adapter->hw)) {
2227		device_printf(dev, "Setup init failure\n");
2228		return;
2229	}
2230}
2231
2232static int
2233em_allocate_pci_resources(struct adapter *adapter)
2234{
2235	device_t	dev = adapter->dev;
2236	int		rid;
2237
2238	rid = PCIR_BAR(0);
2239	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2240	    &rid, RF_ACTIVE);
2241	if (adapter->memory == NULL) {
2242		device_printf(dev, "Unable to allocate bus resource: memory\n");
2243		return (ENXIO);
2244	}
2245	adapter->osdep.mem_bus_space_tag =
2246	    rman_get_bustag(adapter->memory);
2247	adapter->osdep.mem_bus_space_handle =
2248	    rman_get_bushandle(adapter->memory);
2249	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2250
2251	/* Default to a single queue */
2252	adapter->num_queues = 1;
2253
2254	/*
2255	 * Setup MSI/X or MSI if PCI Express
2256	 */
2257	adapter->msix = em_setup_msix(adapter);
2258
2259	adapter->hw.back = &adapter->osdep;
2260
2261	return (0);
2262}
2263
2264/*********************************************************************
2265 *
2266 *  Setup the Legacy or MSI Interrupt handler
2267 *
2268 **********************************************************************/
2269int
2270em_allocate_legacy(struct adapter *adapter)
2271{
2272	device_t dev = adapter->dev;
2273	int error, rid = 0;
2274
2275	/* Manually turn off all interrupts */
2276	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2277
2278	if (adapter->msix == 1) /* using MSI */
2279		rid = 1;
2280	/* We allocate a single interrupt resource */
2281	adapter->res = bus_alloc_resource_any(dev,
2282	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2283	if (adapter->res == NULL) {
2284		device_printf(dev, "Unable to allocate bus resource: "
2285		    "interrupt\n");
2286		return (ENXIO);
2287	}
2288
2289	/*
2290	 * Allocate a fast interrupt and the associated
2291	 * deferred processing contexts.
2292	 */
2293	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2294	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2295	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2296	    taskqueue_thread_enqueue, &adapter->tq);
2297	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2298	    device_get_nameunit(adapter->dev));
2299	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2300	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2301		device_printf(dev, "Failed to register fast interrupt "
2302			    "handler: %d\n", error);
2303		taskqueue_free(adapter->tq);
2304		adapter->tq = NULL;
2305		return (error);
2306	}
2307
2308	return (0);
2309}
2310
2311/*********************************************************************
2312 *
2313 *  Setup the MSIX Interrupt handlers
2314 *   This is not really Multiqueue, rather
2315 *   its just multiple interrupt vectors.
2316 *
2317 **********************************************************************/
2318int
2319em_allocate_msix(struct adapter *adapter)
2320{
2321	device_t	dev = adapter->dev;
2322	struct		tx_ring *txr = adapter->tx_rings;
2323	struct		rx_ring *rxr = adapter->rx_rings;
2324	int		error, rid, vector = 0;
2325
2326
2327	/* Make sure all interrupts are disabled */
2328	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2329
2330	/* First set up ring resources */
2331	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2332
2333		/* RX ring */
2334		rid = vector + 1;
2335
2336		rxr->res = bus_alloc_resource_any(dev,
2337		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2338		if (rxr->res == NULL) {
2339			device_printf(dev,
2340			    "Unable to allocate bus resource: "
2341			    "RX MSIX Interrupt %d\n", i);
2342			return (ENXIO);
2343		}
2344		if ((error = bus_setup_intr(dev, rxr->res,
2345		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2346		    rxr, &rxr->tag)) != 0) {
2347			device_printf(dev, "Failed to register RX handler");
2348			return (error);
2349		}
2350		rxr->msix = vector++; /* NOTE increment vector for TX */
2351		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2352		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2353		    taskqueue_thread_enqueue, &rxr->tq);
2354		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2355		    device_get_nameunit(adapter->dev));
2356		/*
2357		** Set the bit to enable interrupt
2358		** in E1000_IMS -- bits 20 and 21
2359		** are for RX0 and RX1, note this has
2360		** NOTHING to do with the MSIX vector
2361		*/
2362		rxr->ims = 1 << (20 + i);
2363		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2364
2365		/* TX ring */
2366		rid = vector + 1;
2367		txr->res = bus_alloc_resource_any(dev,
2368		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2369		if (txr->res == NULL) {
2370			device_printf(dev,
2371			    "Unable to allocate bus resource: "
2372			    "TX MSIX Interrupt %d\n", i);
2373			return (ENXIO);
2374		}
2375		if ((error = bus_setup_intr(dev, txr->res,
2376		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2377		    txr, &txr->tag)) != 0) {
2378			device_printf(dev, "Failed to register TX handler");
2379			return (error);
2380		}
2381		txr->msix = vector++; /* Increment vector for next pass */
2382		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2383		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2384		    taskqueue_thread_enqueue, &txr->tq);
2385		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2386		    device_get_nameunit(adapter->dev));
2387		/*
2388		** Set the bit to enable interrupt
2389		** in E1000_IMS -- bits 22 and 23
2390		** are for TX0 and TX1, note this has
2391		** NOTHING to do with the MSIX vector
2392		*/
2393		txr->ims = 1 << (22 + i);
2394		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2395	}
2396
2397	/* Link interrupt */
2398	++rid;
2399	adapter->res = bus_alloc_resource_any(dev,
2400	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2401	if (!adapter->res) {
2402		device_printf(dev,"Unable to allocate "
2403		    "bus resource: Link interrupt [%d]\n", rid);
2404		return (ENXIO);
2405        }
2406	/* Set the link handler function */
2407	error = bus_setup_intr(dev, adapter->res,
2408	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2409	    em_msix_link, adapter, &adapter->tag);
2410	if (error) {
2411		adapter->res = NULL;
2412		device_printf(dev, "Failed to register LINK handler");
2413		return (error);
2414	}
2415	adapter->linkvec = vector;
2416	adapter->ivars |=  (8 | vector) << 16;
2417	adapter->ivars |= 0x80000000;
2418	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2419	adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2420	    taskqueue_thread_enqueue, &adapter->tq);
2421	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2422	    device_get_nameunit(adapter->dev));
2423
2424	return (0);
2425}
2426
2427
2428static void
2429em_free_pci_resources(struct adapter *adapter)
2430{
2431	device_t	dev = adapter->dev;
2432	struct tx_ring	*txr;
2433	struct rx_ring	*rxr;
2434	int		rid;
2435
2436
2437	/*
2438	** Release all the queue interrupt resources:
2439	*/
2440	for (int i = 0; i < adapter->num_queues; i++) {
2441		txr = &adapter->tx_rings[i];
2442		rxr = &adapter->rx_rings[i];
2443		rid = txr->msix +1;
2444		if (txr->tag != NULL) {
2445			bus_teardown_intr(dev, txr->res, txr->tag);
2446			txr->tag = NULL;
2447		}
2448		if (txr->res != NULL)
2449			bus_release_resource(dev, SYS_RES_IRQ,
2450			    rid, txr->res);
2451		rid = rxr->msix +1;
2452		if (rxr->tag != NULL) {
2453			bus_teardown_intr(dev, rxr->res, rxr->tag);
2454			rxr->tag = NULL;
2455		}
2456		if (rxr->res != NULL)
2457			bus_release_resource(dev, SYS_RES_IRQ,
2458			    rid, rxr->res);
2459	}
2460
2461        if (adapter->linkvec) /* we are doing MSIX */
2462                rid = adapter->linkvec + 1;
2463        else
2464                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2465
2466	if (adapter->tag != NULL) {
2467		bus_teardown_intr(dev, adapter->res, adapter->tag);
2468		adapter->tag = NULL;
2469	}
2470
2471	if (adapter->res != NULL)
2472		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2473
2474
2475	if (adapter->msix)
2476		pci_release_msi(dev);
2477
2478	if (adapter->msix_mem != NULL)
2479		bus_release_resource(dev, SYS_RES_MEMORY,
2480		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2481
2482	if (adapter->memory != NULL)
2483		bus_release_resource(dev, SYS_RES_MEMORY,
2484		    PCIR_BAR(0), adapter->memory);
2485
2486	if (adapter->flash != NULL)
2487		bus_release_resource(dev, SYS_RES_MEMORY,
2488		    EM_FLASH, adapter->flash);
2489}
2490
2491/*
2492 * Setup MSI or MSI/X
2493 */
2494static int
2495em_setup_msix(struct adapter *adapter)
2496{
2497	device_t dev = adapter->dev;
2498	int val = 0;
2499
2500
2501	/* Setup MSI/X for Hartwell */
2502	if ((adapter->hw.mac.type == e1000_82574) &&
2503	    (em_enable_msix == TRUE)) {
2504		/* Map the MSIX BAR */
2505		int rid = PCIR_BAR(EM_MSIX_BAR);
2506		adapter->msix_mem = bus_alloc_resource_any(dev,
2507		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2508       		if (!adapter->msix_mem) {
2509			/* May not be enabled */
2510               		device_printf(adapter->dev,
2511			    "Unable to map MSIX table \n");
2512			goto msi;
2513       		}
2514		val = pci_msix_count(dev);
2515		if (val != 5) {
2516			bus_release_resource(dev, SYS_RES_MEMORY,
2517			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2518			adapter->msix_mem = NULL;
2519               		device_printf(adapter->dev,
2520			    "MSIX vectors wrong, using MSI \n");
2521			goto msi;
2522		}
2523		if (em_msix_queues == 2) {
2524			val = 5;
2525			adapter->num_queues = 2;
2526		} else {
2527			val = 3;
2528			adapter->num_queues = 1;
2529		}
2530		if (pci_alloc_msix(dev, &val) == 0) {
2531			device_printf(adapter->dev,
2532			    "Using MSIX interrupts "
2533			    "with %d vectors\n", val);
2534		}
2535
2536		return (val);
2537	}
2538msi:
2539       	val = pci_msi_count(dev);
2540       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2541               	adapter->msix = 1;
2542               	device_printf(adapter->dev,"Using MSI interrupt\n");
2543		return (val);
2544	}
2545	/* Should only happen due to manual invention */
2546	device_printf(adapter->dev,"Setup MSIX failure\n");
2547	return (0);
2548}
2549
2550
2551/*********************************************************************
2552 *
2553 *  Initialize the hardware to a configuration
2554 *  as specified by the adapter structure.
2555 *
2556 **********************************************************************/
2557static void
2558em_reset(struct adapter *adapter)
2559{
2560	device_t	dev = adapter->dev;
2561	struct e1000_hw	*hw = &adapter->hw;
2562	u16		rx_buffer_size;
2563
2564	INIT_DEBUGOUT("em_reset: begin");
2565
2566	/* Set up smart power down as default off on newer adapters. */
2567	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2568	    hw->mac.type == e1000_82572)) {
2569		u16 phy_tmp = 0;
2570
2571		/* Speed up time to link by disabling smart power down. */
2572		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2573		phy_tmp &= ~IGP02E1000_PM_SPD;
2574		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2575	}
2576
2577	/*
2578	 * These parameters control the automatic generation (Tx) and
2579	 * response (Rx) to Ethernet PAUSE frames.
2580	 * - High water mark should allow for at least two frames to be
2581	 *   received after sending an XOFF.
2582	 * - Low water mark works best when it is very near the high water mark.
2583	 *   This allows the receiver to restart by sending XON when it has
2584	 *   drained a bit. Here we use an arbitary value of 1500 which will
2585	 *   restart after one full frame is pulled from the buffer. There
2586	 *   could be several smaller frames in the buffer and if so they will
2587	 *   not trigger the XON until their total number reduces the buffer
2588	 *   by 1500.
2589	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2590	 */
2591	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2592
2593	hw->fc.high_water = rx_buffer_size -
2594	    roundup2(adapter->max_frame_size, 1024);
2595	hw->fc.low_water = hw->fc.high_water - 1500;
2596
2597	if (hw->mac.type == e1000_80003es2lan)
2598		hw->fc.pause_time = 0xFFFF;
2599	else
2600		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2601
2602	hw->fc.send_xon = TRUE;
2603
2604        /* Set Flow control, use the tunable location if sane */
2605        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2606		hw->fc.requested_mode = em_fc_setting;
2607	else
2608		hw->fc.requested_mode = e1000_fc_none;
2609
2610	/* Override - workaround for PCHLAN issue */
2611	if (hw->mac.type == e1000_pchlan)
2612                hw->fc.requested_mode = e1000_fc_rx_pause;
2613
2614	/* Issue a global reset */
2615	e1000_reset_hw(hw);
2616	E1000_WRITE_REG(hw, E1000_WUC, 0);
2617
2618	if (e1000_init_hw(hw) < 0) {
2619		device_printf(dev, "Hardware Initialization Failed\n");
2620		return;
2621	}
2622
2623	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2624	e1000_get_phy_info(hw);
2625	e1000_check_for_link(hw);
2626	return;
2627}
2628
2629/*********************************************************************
2630 *
2631 *  Setup networking device structure and register an interface.
2632 *
2633 **********************************************************************/
2634static void
2635em_setup_interface(device_t dev, struct adapter *adapter)
2636{
2637	struct ifnet   *ifp;
2638
2639	INIT_DEBUGOUT("em_setup_interface: begin");
2640
2641	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2642	if (ifp == NULL)
2643		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2644	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2645	ifp->if_mtu = ETHERMTU;
2646	ifp->if_init =  em_init;
2647	ifp->if_softc = adapter;
2648	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2649	ifp->if_ioctl = em_ioctl;
2650	ifp->if_start = em_start;
2651	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2652	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2653	IFQ_SET_READY(&ifp->if_snd);
2654
2655	ether_ifattach(ifp, adapter->hw.mac.addr);
2656
2657	ifp->if_capabilities = ifp->if_capenable = 0;
2658
2659#ifdef EM_MULTIQUEUE
2660	/* Multiqueue tx functions */
2661	ifp->if_transmit = em_mq_start;
2662	ifp->if_qflush = em_qflush;
2663#endif
2664
2665	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2666	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2667
2668	/* Enable TSO by default, can disable with ifconfig */
2669	ifp->if_capabilities |= IFCAP_TSO4;
2670	ifp->if_capenable |= IFCAP_TSO4;
2671
2672	/*
2673	 * Tell the upper layer(s) we
2674	 * support full VLAN capability
2675	 */
2676	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2677	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2678	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2679
2680	/*
2681	** Dont turn this on by default, if vlans are
2682	** created on another pseudo device (eg. lagg)
2683	** then vlan events are not passed thru, breaking
2684	** operation, but with HW FILTER off it works. If
2685	** using vlans directly on the em driver you can
2686	** enable this and get full hardware tag filtering.
2687	*/
2688	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2689
2690#ifdef DEVICE_POLLING
2691	ifp->if_capabilities |= IFCAP_POLLING;
2692#endif
2693
2694	/* Enable All WOL methods by default */
2695	if (adapter->wol) {
2696		ifp->if_capabilities |= IFCAP_WOL;
2697		ifp->if_capenable |= IFCAP_WOL;
2698	}
2699
2700	/*
2701	 * Specify the media types supported by this adapter and register
2702	 * callbacks to update media and link information
2703	 */
2704	ifmedia_init(&adapter->media, IFM_IMASK,
2705	    em_media_change, em_media_status);
2706	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2707	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2708		u_char fiber_type = IFM_1000_SX;	/* default type */
2709
2710		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2711			    0, NULL);
2712		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2713	} else {
2714		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2715		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2716			    0, NULL);
2717		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2718			    0, NULL);
2719		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2720			    0, NULL);
2721		if (adapter->hw.phy.type != e1000_phy_ife) {
2722			ifmedia_add(&adapter->media,
2723				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2724			ifmedia_add(&adapter->media,
2725				IFM_ETHER | IFM_1000_T, 0, NULL);
2726		}
2727	}
2728	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2729	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2730}
2731
2732
2733/*
2734 * Manage DMA'able memory.
2735 */
2736static void
2737em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2738{
2739	if (error)
2740		return;
2741	*(bus_addr_t *) arg = segs[0].ds_addr;
2742}
2743
2744static int
2745em_dma_malloc(struct adapter *adapter, bus_size_t size,
2746        struct em_dma_alloc *dma, int mapflags)
2747{
2748	int error;
2749
2750	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2751				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2752				BUS_SPACE_MAXADDR,	/* lowaddr */
2753				BUS_SPACE_MAXADDR,	/* highaddr */
2754				NULL, NULL,		/* filter, filterarg */
2755				size,			/* maxsize */
2756				1,			/* nsegments */
2757				size,			/* maxsegsize */
2758				0,			/* flags */
2759				NULL,			/* lockfunc */
2760				NULL,			/* lockarg */
2761				&dma->dma_tag);
2762	if (error) {
2763		device_printf(adapter->dev,
2764		    "%s: bus_dma_tag_create failed: %d\n",
2765		    __func__, error);
2766		goto fail_0;
2767	}
2768
2769	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2770	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2771	if (error) {
2772		device_printf(adapter->dev,
2773		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2774		    __func__, (uintmax_t)size, error);
2775		goto fail_2;
2776	}
2777
2778	dma->dma_paddr = 0;
2779	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2780	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2781	if (error || dma->dma_paddr == 0) {
2782		device_printf(adapter->dev,
2783		    "%s: bus_dmamap_load failed: %d\n",
2784		    __func__, error);
2785		goto fail_3;
2786	}
2787
2788	return (0);
2789
2790fail_3:
2791	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2792fail_2:
2793	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2794	bus_dma_tag_destroy(dma->dma_tag);
2795fail_0:
2796	dma->dma_map = NULL;
2797	dma->dma_tag = NULL;
2798
2799	return (error);
2800}
2801
2802static void
2803em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2804{
2805	if (dma->dma_tag == NULL)
2806		return;
2807	if (dma->dma_map != NULL) {
2808		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2809		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2810		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2811		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2812		dma->dma_map = NULL;
2813	}
2814	bus_dma_tag_destroy(dma->dma_tag);
2815	dma->dma_tag = NULL;
2816}
2817
2818
2819/*********************************************************************
2820 *
2821 *  Allocate memory for the transmit and receive rings, and then
2822 *  the descriptors associated with each, called only once at attach.
2823 *
2824 **********************************************************************/
2825static int
2826em_allocate_queues(struct adapter *adapter)
2827{
2828	device_t		dev = adapter->dev;
2829	struct tx_ring		*txr = NULL;
2830	struct rx_ring		*rxr = NULL;
2831	int rsize, tsize, error = E1000_SUCCESS;
2832	int txconf = 0, rxconf = 0;
2833
2834
2835	/* Allocate the TX ring struct memory */
2836	if (!(adapter->tx_rings =
2837	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2838	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2839		device_printf(dev, "Unable to allocate TX ring memory\n");
2840		error = ENOMEM;
2841		goto fail;
2842	}
2843
2844	/* Now allocate the RX */
2845	if (!(adapter->rx_rings =
2846	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2847	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2848		device_printf(dev, "Unable to allocate RX ring memory\n");
2849		error = ENOMEM;
2850		goto rx_fail;
2851	}
2852
2853	tsize = roundup2(adapter->num_tx_desc *
2854	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2855	/*
2856	 * Now set up the TX queues, txconf is needed to handle the
2857	 * possibility that things fail midcourse and we need to
2858	 * undo memory gracefully
2859	 */
2860	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2861		/* Set up some basics */
2862		txr = &adapter->tx_rings[i];
2863		txr->adapter = adapter;
2864		txr->me = i;
2865
2866		/* Initialize the TX lock */
2867		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2868		    device_get_nameunit(dev), txr->me);
2869		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2870
2871		if (em_dma_malloc(adapter, tsize,
2872			&txr->txdma, BUS_DMA_NOWAIT)) {
2873			device_printf(dev,
2874			    "Unable to allocate TX Descriptor memory\n");
2875			error = ENOMEM;
2876			goto err_tx_desc;
2877		}
2878		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2879		bzero((void *)txr->tx_base, tsize);
2880
2881        	if (em_allocate_transmit_buffers(txr)) {
2882			device_printf(dev,
2883			    "Critical Failure setting up transmit buffers\n");
2884			error = ENOMEM;
2885			goto err_tx_desc;
2886        	}
2887#if __FreeBSD_version >= 800000
2888		/* Allocate a buf ring */
2889		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2890		    M_WAITOK, &txr->tx_mtx);
2891#endif
2892	}
2893
2894	/*
2895	 * Next the RX queues...
2896	 */
2897	rsize = roundup2(adapter->num_rx_desc *
2898	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2899	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2900		rxr = &adapter->rx_rings[i];
2901		rxr->adapter = adapter;
2902		rxr->me = i;
2903
2904		/* Initialize the RX lock */
2905		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2906		    device_get_nameunit(dev), txr->me);
2907		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2908
2909		if (em_dma_malloc(adapter, rsize,
2910			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2911			device_printf(dev,
2912			    "Unable to allocate RxDescriptor memory\n");
2913			error = ENOMEM;
2914			goto err_rx_desc;
2915		}
2916		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2917		bzero((void *)rxr->rx_base, rsize);
2918
2919        	/* Allocate receive buffers for the ring*/
2920		if (em_allocate_receive_buffers(rxr)) {
2921			device_printf(dev,
2922			    "Critical Failure setting up receive buffers\n");
2923			error = ENOMEM;
2924			goto err_rx_desc;
2925		}
2926	}
2927
2928	return (0);
2929
2930err_rx_desc:
2931	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2932		em_dma_free(adapter, &rxr->rxdma);
2933err_tx_desc:
2934	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2935		em_dma_free(adapter, &txr->txdma);
2936	free(adapter->rx_rings, M_DEVBUF);
2937rx_fail:
2938	buf_ring_free(txr->br, M_DEVBUF);
2939	free(adapter->tx_rings, M_DEVBUF);
2940fail:
2941	return (error);
2942}
2943
2944
2945/*********************************************************************
2946 *
2947 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2948 *  the information needed to transmit a packet on the wire. This is
2949 *  called only once at attach, setup is done every reset.
2950 *
2951 **********************************************************************/
2952static int
2953em_allocate_transmit_buffers(struct tx_ring *txr)
2954{
2955	struct adapter *adapter = txr->adapter;
2956	device_t dev = adapter->dev;
2957	struct em_buffer *txbuf;
2958	int error, i;
2959
2960	/*
2961	 * Setup DMA descriptor areas.
2962	 */
2963	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2964			       1, 0,			/* alignment, bounds */
2965			       BUS_SPACE_MAXADDR,	/* lowaddr */
2966			       BUS_SPACE_MAXADDR,	/* highaddr */
2967			       NULL, NULL,		/* filter, filterarg */
2968			       EM_TSO_SIZE,		/* maxsize */
2969			       EM_MAX_SCATTER,		/* nsegments */
2970			       PAGE_SIZE,		/* maxsegsize */
2971			       0,			/* flags */
2972			       NULL,			/* lockfunc */
2973			       NULL,			/* lockfuncarg */
2974			       &txr->txtag))) {
2975		device_printf(dev,"Unable to allocate TX DMA tag\n");
2976		goto fail;
2977	}
2978
2979	if (!(txr->tx_buffers =
2980	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2981	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2982		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2983		error = ENOMEM;
2984		goto fail;
2985	}
2986
2987        /* Create the descriptor buffer dma maps */
2988	txbuf = txr->tx_buffers;
2989	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2990		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2991		if (error != 0) {
2992			device_printf(dev, "Unable to create TX DMA map\n");
2993			goto fail;
2994		}
2995	}
2996
2997	return 0;
2998fail:
2999	/* We free all, it handles case where we are in the middle */
3000	em_free_transmit_structures(adapter);
3001	return (error);
3002}
3003
3004/*********************************************************************
3005 *
3006 *  Initialize a transmit ring.
3007 *
3008 **********************************************************************/
3009static void
3010em_setup_transmit_ring(struct tx_ring *txr)
3011{
3012	struct adapter *adapter = txr->adapter;
3013	struct em_buffer *txbuf;
3014	int i;
3015
3016	/* Clear the old descriptor contents */
3017	EM_TX_LOCK(txr);
3018	bzero((void *)txr->tx_base,
3019	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3020	/* Reset indices */
3021	txr->next_avail_desc = 0;
3022	txr->next_to_clean = 0;
3023
3024	/* Free any existing tx buffers. */
3025        txbuf = txr->tx_buffers;
3026	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3027		if (txbuf->m_head != NULL) {
3028			bus_dmamap_sync(txr->txtag, txbuf->map,
3029			    BUS_DMASYNC_POSTWRITE);
3030			bus_dmamap_unload(txr->txtag, txbuf->map);
3031			m_freem(txbuf->m_head);
3032			txbuf->m_head = NULL;
3033		}
3034		/* clear the watch index */
3035		txbuf->next_eop = -1;
3036        }
3037
3038	/* Set number of descriptors available */
3039	txr->tx_avail = adapter->num_tx_desc;
3040
3041	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3042	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3043	EM_TX_UNLOCK(txr);
3044}
3045
3046/*********************************************************************
3047 *
3048 *  Initialize all transmit rings.
3049 *
3050 **********************************************************************/
3051static void
3052em_setup_transmit_structures(struct adapter *adapter)
3053{
3054	struct tx_ring *txr = adapter->tx_rings;
3055
3056	for (int i = 0; i < adapter->num_queues; i++, txr++)
3057		em_setup_transmit_ring(txr);
3058
3059	return;
3060}
3061
3062/*********************************************************************
3063 *
3064 *  Enable transmit unit.
3065 *
3066 **********************************************************************/
3067static void
3068em_initialize_transmit_unit(struct adapter *adapter)
3069{
3070	struct tx_ring	*txr = adapter->tx_rings;
3071	struct e1000_hw	*hw = &adapter->hw;
3072	u32	tctl, tarc, tipg = 0;
3073
3074	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3075
3076	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3077		u64 bus_addr = txr->txdma.dma_paddr;
3078		/* Base and Len of TX Ring */
3079		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3080	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3081		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3082	    	    (u32)(bus_addr >> 32));
3083		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3084	    	    (u32)bus_addr);
3085		/* Init the HEAD/TAIL indices */
3086		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3087		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3088
3089		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3090		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3091		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3092
3093		txr->watchdog_check = FALSE;
3094	}
3095
3096	/* Set the default values for the Tx Inter Packet Gap timer */
3097	switch (adapter->hw.mac.type) {
3098	case e1000_82542:
3099		tipg = DEFAULT_82542_TIPG_IPGT;
3100		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3101		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3102		break;
3103	case e1000_80003es2lan:
3104		tipg = DEFAULT_82543_TIPG_IPGR1;
3105		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3106		    E1000_TIPG_IPGR2_SHIFT;
3107		break;
3108	default:
3109		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3110		    (adapter->hw.phy.media_type ==
3111		    e1000_media_type_internal_serdes))
3112			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3113		else
3114			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3115		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3116		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3117	}
3118
3119	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3120	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3121
3122	if(adapter->hw.mac.type >= e1000_82540)
3123		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3124		    adapter->tx_abs_int_delay.value);
3125
3126	if ((adapter->hw.mac.type == e1000_82571) ||
3127	    (adapter->hw.mac.type == e1000_82572)) {
3128		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3129		tarc |= SPEED_MODE_BIT;
3130		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3131	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3132		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3133		tarc |= 1;
3134		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3135		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3136		tarc |= 1;
3137		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3138	}
3139
3140	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3141	if (adapter->tx_int_delay.value > 0)
3142		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3143
3144	/* Program the Transmit Control Register */
3145	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3146	tctl &= ~E1000_TCTL_CT;
3147	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3148		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3149
3150	if (adapter->hw.mac.type >= e1000_82571)
3151		tctl |= E1000_TCTL_MULR;
3152
3153	/* This write will effectively turn on the transmit unit. */
3154	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3155
3156}
3157
3158
3159/*********************************************************************
3160 *
3161 *  Free all transmit rings.
3162 *
3163 **********************************************************************/
3164static void
3165em_free_transmit_structures(struct adapter *adapter)
3166{
3167	struct tx_ring *txr = adapter->tx_rings;
3168
3169	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3170		EM_TX_LOCK(txr);
3171		em_free_transmit_buffers(txr);
3172		em_dma_free(adapter, &txr->txdma);
3173		EM_TX_UNLOCK(txr);
3174		EM_TX_LOCK_DESTROY(txr);
3175	}
3176
3177	free(adapter->tx_rings, M_DEVBUF);
3178}
3179
3180/*********************************************************************
3181 *
3182 *  Free transmit ring related data structures.
3183 *
3184 **********************************************************************/
3185static void
3186em_free_transmit_buffers(struct tx_ring *txr)
3187{
3188	struct adapter		*adapter = txr->adapter;
3189	struct em_buffer	*txbuf;
3190
3191	INIT_DEBUGOUT("free_transmit_ring: begin");
3192
3193	if (txr->tx_buffers == NULL)
3194		return;
3195
3196	for (int i = 0; i < adapter->num_tx_desc; i++) {
3197		txbuf = &txr->tx_buffers[i];
3198		if (txbuf->m_head != NULL) {
3199			bus_dmamap_sync(txr->txtag, txbuf->map,
3200			    BUS_DMASYNC_POSTWRITE);
3201			bus_dmamap_unload(txr->txtag,
3202			    txbuf->map);
3203			m_freem(txbuf->m_head);
3204			txbuf->m_head = NULL;
3205			if (txbuf->map != NULL) {
3206				bus_dmamap_destroy(txr->txtag,
3207				    txbuf->map);
3208				txbuf->map = NULL;
3209			}
3210		} else if (txbuf->map != NULL) {
3211			bus_dmamap_unload(txr->txtag,
3212			    txbuf->map);
3213			bus_dmamap_destroy(txr->txtag,
3214			    txbuf->map);
3215			txbuf->map = NULL;
3216		}
3217	}
3218#if __FreeBSD_version >= 800000
3219	if (txr->br != NULL)
3220		buf_ring_free(txr->br, M_DEVBUF);
3221#endif
3222	if (txr->tx_buffers != NULL) {
3223		free(txr->tx_buffers, M_DEVBUF);
3224		txr->tx_buffers = NULL;
3225	}
3226	if (txr->txtag != NULL) {
3227		bus_dma_tag_destroy(txr->txtag);
3228		txr->txtag = NULL;
3229	}
3230	return;
3231}
3232
3233
3234/*********************************************************************
3235 *
3236 *  The offload context needs to be set when we transfer the first
3237 *  packet of a particular protocol (TCP/UDP). This routine has been
3238 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3239 *
3240 *  Added back the old method of keeping the current context type
3241 *  and not setting if unnecessary, as this is reported to be a
3242 *  big performance win.  -jfv
3243 **********************************************************************/
3244static void
3245em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3246    u32 *txd_upper, u32 *txd_lower)
3247{
3248	struct adapter			*adapter = txr->adapter;
3249	struct e1000_context_desc	*TXD = NULL;
3250	struct em_buffer *tx_buffer;
3251	struct ether_vlan_header *eh;
3252	struct ip *ip = NULL;
3253	struct ip6_hdr *ip6;
3254	int cur, ehdrlen;
3255	u32 cmd, hdr_len, ip_hlen;
3256	u16 etype;
3257	u8 ipproto;
3258
3259
3260	cmd = hdr_len = ipproto = 0;
3261	cur = txr->next_avail_desc;
3262
3263	/*
3264	 * Determine where frame payload starts.
3265	 * Jump over vlan headers if already present,
3266	 * helpful for QinQ too.
3267	 */
3268	eh = mtod(mp, struct ether_vlan_header *);
3269	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3270		etype = ntohs(eh->evl_proto);
3271		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3272	} else {
3273		etype = ntohs(eh->evl_encap_proto);
3274		ehdrlen = ETHER_HDR_LEN;
3275	}
3276
3277	/*
3278	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3279	 * TODO: Support SCTP too when it hits the tree.
3280	 */
3281	switch (etype) {
3282	case ETHERTYPE_IP:
3283		ip = (struct ip *)(mp->m_data + ehdrlen);
3284		ip_hlen = ip->ip_hl << 2;
3285
3286		/* Setup of IP header checksum. */
3287		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3288			/*
3289			 * Start offset for header checksum calculation.
3290			 * End offset for header checksum calculation.
3291			 * Offset of place to put the checksum.
3292			 */
3293			TXD = (struct e1000_context_desc *)
3294			    &txr->tx_base[cur];
3295			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3296			TXD->lower_setup.ip_fields.ipcse =
3297			    htole16(ehdrlen + ip_hlen);
3298			TXD->lower_setup.ip_fields.ipcso =
3299			    ehdrlen + offsetof(struct ip, ip_sum);
3300			cmd |= E1000_TXD_CMD_IP;
3301			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3302		}
3303
3304		if (mp->m_len < ehdrlen + ip_hlen)
3305			return;	/* failure */
3306
3307		hdr_len = ehdrlen + ip_hlen;
3308		ipproto = ip->ip_p;
3309
3310		break;
3311	case ETHERTYPE_IPV6:
3312		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3313		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3314
3315		if (mp->m_len < ehdrlen + ip_hlen)
3316			return;	/* failure */
3317
3318		/* IPv6 doesn't have a header checksum. */
3319
3320		hdr_len = ehdrlen + ip_hlen;
3321		ipproto = ip6->ip6_nxt;
3322
3323		break;
3324	default:
3325		*txd_upper = 0;
3326		*txd_lower = 0;
3327		return;
3328	}
3329
3330	switch (ipproto) {
3331	case IPPROTO_TCP:
3332		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3333			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3334			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3335			/* no need for context if already set */
3336			if (txr->last_hw_offload == CSUM_TCP)
3337				return;
3338			txr->last_hw_offload = CSUM_TCP;
3339			/*
3340			 * Start offset for payload checksum calculation.
3341			 * End offset for payload checksum calculation.
3342			 * Offset of place to put the checksum.
3343			 */
3344			TXD = (struct e1000_context_desc *)
3345			    &txr->tx_base[cur];
3346			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3347			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3348			TXD->upper_setup.tcp_fields.tucso =
3349			    hdr_len + offsetof(struct tcphdr, th_sum);
3350			cmd |= E1000_TXD_CMD_TCP;
3351		}
3352		break;
3353	case IPPROTO_UDP:
3354	{
3355		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3356			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3357			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3358			/* no need for context if already set */
3359			if (txr->last_hw_offload == CSUM_UDP)
3360				return;
3361			txr->last_hw_offload = CSUM_UDP;
3362			/*
3363			 * Start offset for header checksum calculation.
3364			 * End offset for header checksum calculation.
3365			 * Offset of place to put the checksum.
3366			 */
3367			TXD = (struct e1000_context_desc *)
3368			    &txr->tx_base[cur];
3369			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3370			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3371			TXD->upper_setup.tcp_fields.tucso =
3372			    hdr_len + offsetof(struct udphdr, uh_sum);
3373		}
3374		/* Fall Thru */
3375	}
3376	default:
3377		break;
3378	}
3379
3380	TXD->tcp_seg_setup.data = htole32(0);
3381	TXD->cmd_and_length =
3382	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3383	tx_buffer = &txr->tx_buffers[cur];
3384	tx_buffer->m_head = NULL;
3385	tx_buffer->next_eop = -1;
3386
3387	if (++cur == adapter->num_tx_desc)
3388		cur = 0;
3389
3390	txr->tx_avail--;
3391	txr->next_avail_desc = cur;
3392}
3393
3394
3395/**********************************************************************
3396 *
3397 *  Setup work for hardware segmentation offload (TSO)
3398 *
3399 **********************************************************************/
3400static bool
3401em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3402   u32 *txd_lower)
3403{
3404	struct adapter			*adapter = txr->adapter;
3405	struct e1000_context_desc	*TXD;
3406	struct em_buffer		*tx_buffer;
3407	struct ether_vlan_header	*eh;
3408	struct ip			*ip;
3409	struct ip6_hdr			*ip6;
3410	struct tcphdr			*th;
3411	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3412	u16 etype;
3413
3414	/*
3415	 * This function could/should be extended to support IP/IPv6
3416	 * fragmentation as well.  But as they say, one step at a time.
3417	 */
3418
3419	/*
3420	 * Determine where frame payload starts.
3421	 * Jump over vlan headers if already present,
3422	 * helpful for QinQ too.
3423	 */
3424	eh = mtod(mp, struct ether_vlan_header *);
3425	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3426		etype = ntohs(eh->evl_proto);
3427		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3428	} else {
3429		etype = ntohs(eh->evl_encap_proto);
3430		ehdrlen = ETHER_HDR_LEN;
3431	}
3432
3433	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3434	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3435		return FALSE;	/* -1 */
3436
3437	/*
3438	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3439	 * TODO: Support SCTP too when it hits the tree.
3440	 */
3441	switch (etype) {
3442	case ETHERTYPE_IP:
3443		isip6 = 0;
3444		ip = (struct ip *)(mp->m_data + ehdrlen);
3445		if (ip->ip_p != IPPROTO_TCP)
3446			return FALSE;	/* 0 */
3447		ip->ip_len = 0;
3448		ip->ip_sum = 0;
3449		ip_hlen = ip->ip_hl << 2;
3450		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3451			return FALSE;	/* -1 */
3452		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3453#if 1
3454		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3455		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3456#else
3457		th->th_sum = mp->m_pkthdr.csum_data;
3458#endif
3459		break;
3460	case ETHERTYPE_IPV6:
3461		isip6 = 1;
3462		return FALSE;			/* Not supported yet. */
3463		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3464		if (ip6->ip6_nxt != IPPROTO_TCP)
3465			return FALSE;	/* 0 */
3466		ip6->ip6_plen = 0;
3467		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3468		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3469			return FALSE;	/* -1 */
3470		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3471#if 0
3472		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3473		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3474#else
3475		th->th_sum = mp->m_pkthdr.csum_data;
3476#endif
3477		break;
3478	default:
3479		return FALSE;
3480	}
3481	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3482
3483	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3484		      E1000_TXD_DTYP_D |	/* Data descr type */
3485		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3486
3487	/* IP and/or TCP header checksum calculation and insertion. */
3488	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3489		      E1000_TXD_POPTS_TXSM) << 8;
3490
3491	cur = txr->next_avail_desc;
3492	tx_buffer = &txr->tx_buffers[cur];
3493	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3494
3495	/* IPv6 doesn't have a header checksum. */
3496	if (!isip6) {
3497		/*
3498		 * Start offset for header checksum calculation.
3499		 * End offset for header checksum calculation.
3500		 * Offset of place put the checksum.
3501		 */
3502		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3503		TXD->lower_setup.ip_fields.ipcse =
3504		    htole16(ehdrlen + ip_hlen - 1);
3505		TXD->lower_setup.ip_fields.ipcso =
3506		    ehdrlen + offsetof(struct ip, ip_sum);
3507	}
3508	/*
3509	 * Start offset for payload checksum calculation.
3510	 * End offset for payload checksum calculation.
3511	 * Offset of place to put the checksum.
3512	 */
3513	TXD->upper_setup.tcp_fields.tucss =
3514	    ehdrlen + ip_hlen;
3515	TXD->upper_setup.tcp_fields.tucse = 0;
3516	TXD->upper_setup.tcp_fields.tucso =
3517	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3518	/*
3519	 * Payload size per packet w/o any headers.
3520	 * Length of all headers up to payload.
3521	 */
3522	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3523	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3524
3525	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3526				E1000_TXD_CMD_DEXT |	/* Extended descr */
3527				E1000_TXD_CMD_TSE |	/* TSE context */
3528				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3529				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3530				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3531
3532	tx_buffer->m_head = NULL;
3533	tx_buffer->next_eop = -1;
3534
3535	if (++cur == adapter->num_tx_desc)
3536		cur = 0;
3537
3538	txr->tx_avail--;
3539	txr->next_avail_desc = cur;
3540	txr->tx_tso = TRUE;
3541
3542	return TRUE;
3543}
3544
3545
3546/**********************************************************************
3547 *
3548 *  Examine each tx_buffer in the used queue. If the hardware is done
3549 *  processing the packet then free associated resources. The
3550 *  tx_buffer is put back on the free queue.
3551 *
3552 **********************************************************************/
3553static bool
3554em_txeof(struct tx_ring *txr)
3555{
3556	struct adapter	*adapter = txr->adapter;
3557        int first, last, done, num_avail;
3558        struct em_buffer *tx_buffer;
3559        struct e1000_tx_desc   *tx_desc, *eop_desc;
3560	struct ifnet   *ifp = adapter->ifp;
3561
3562	EM_TX_LOCK_ASSERT(txr);
3563
3564        if (txr->tx_avail == adapter->num_tx_desc)
3565                return (FALSE);
3566
3567        num_avail = txr->tx_avail;
3568        first = txr->next_to_clean;
3569        tx_desc = &txr->tx_base[first];
3570        tx_buffer = &txr->tx_buffers[first];
3571	last = tx_buffer->next_eop;
3572        eop_desc = &txr->tx_base[last];
3573
3574	/*
3575	 * What this does is get the index of the
3576	 * first descriptor AFTER the EOP of the
3577	 * first packet, that way we can do the
3578	 * simple comparison on the inner while loop.
3579	 */
3580	if (++last == adapter->num_tx_desc)
3581 		last = 0;
3582	done = last;
3583
3584        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3585            BUS_DMASYNC_POSTREAD);
3586
3587        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3588		/* We clean the range of the packet */
3589		while (first != done) {
3590                	tx_desc->upper.data = 0;
3591                	tx_desc->lower.data = 0;
3592                	tx_desc->buffer_addr = 0;
3593                	++num_avail;
3594
3595			if (tx_buffer->m_head) {
3596				ifp->if_opackets++;
3597				bus_dmamap_sync(txr->txtag,
3598				    tx_buffer->map,
3599				    BUS_DMASYNC_POSTWRITE);
3600				bus_dmamap_unload(txr->txtag,
3601				    tx_buffer->map);
3602
3603                        	m_freem(tx_buffer->m_head);
3604                        	tx_buffer->m_head = NULL;
3605                	}
3606			tx_buffer->next_eop = -1;
3607			txr->watchdog_time = ticks;
3608
3609	                if (++first == adapter->num_tx_desc)
3610				first = 0;
3611
3612	                tx_buffer = &txr->tx_buffers[first];
3613			tx_desc = &txr->tx_base[first];
3614		}
3615		/* See if we can continue to the next packet */
3616		last = tx_buffer->next_eop;
3617		if (last != -1) {
3618        		eop_desc = &txr->tx_base[last];
3619			/* Get new done point */
3620			if (++last == adapter->num_tx_desc) last = 0;
3621			done = last;
3622		} else
3623			break;
3624        }
3625        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3626            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3627
3628        txr->next_to_clean = first;
3629
3630        /*
3631         * If we have enough room, clear IFF_DRV_OACTIVE to
3632         * tell the stack that it is OK to send packets.
3633         * If there are no pending descriptors, clear the watchdog.
3634         */
3635        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3636                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3637                if (num_avail == adapter->num_tx_desc) {
3638			txr->watchdog_check = FALSE;
3639        		txr->tx_avail = num_avail;
3640			return (FALSE);
3641		}
3642        }
3643
3644        txr->tx_avail = num_avail;
3645	return (TRUE);
3646}
3647
3648
3649/*********************************************************************
3650 *
3651 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3652 *
3653 **********************************************************************/
3654static void
3655em_refresh_mbufs(struct rx_ring *rxr, int limit)
3656{
3657	struct adapter		*adapter = rxr->adapter;
3658	struct mbuf		*m;
3659	bus_dma_segment_t	segs[1];
3660	bus_dmamap_t		map;
3661	struct em_buffer	*rxbuf;
3662	int			i, error, nsegs, cleaned;
3663
3664	i = rxr->next_to_refresh;
3665	cleaned = -1;
3666	while (i != limit) {
3667		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3668		if (m == NULL)
3669			goto update;
3670		m->m_len = m->m_pkthdr.len = MCLBYTES;
3671
3672		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3673			m_adj(m, ETHER_ALIGN);
3674
3675		/*
3676		 * Using memory from the mbuf cluster pool, invoke the
3677		 * bus_dma machinery to arrange the memory mapping.
3678		 */
3679		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3680		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3681		if (error != 0) {
3682			m_free(m);
3683			goto update;
3684		}
3685
3686		/* If nsegs is wrong then the stack is corrupt. */
3687		KASSERT(nsegs == 1, ("Too many segments returned!"));
3688
3689		rxbuf = &rxr->rx_buffers[i];
3690		if (rxbuf->m_head != NULL)
3691			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3692
3693		map = rxbuf->map;
3694		rxbuf->map = rxr->rx_sparemap;
3695		rxr->rx_sparemap = map;
3696		bus_dmamap_sync(rxr->rxtag,
3697		    rxbuf->map, BUS_DMASYNC_PREREAD);
3698		rxbuf->m_head = m;
3699		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3700
3701		cleaned = i;
3702		/* Calculate next index */
3703		if (++i == adapter->num_rx_desc)
3704			i = 0;
3705		/* This is the work marker for refresh */
3706		rxr->next_to_refresh = i;
3707	}
3708update:
3709	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3710	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3711	if (cleaned != -1) /* Update tail index */
3712		E1000_WRITE_REG(&adapter->hw,
3713		    E1000_RDT(rxr->me), cleaned);
3714
3715	return;
3716}
3717
3718
3719/*********************************************************************
3720 *
3721 *  Allocate memory for rx_buffer structures. Since we use one
3722 *  rx_buffer per received packet, the maximum number of rx_buffer's
3723 *  that we'll need is equal to the number of receive descriptors
3724 *  that we've allocated.
3725 *
3726 **********************************************************************/
3727static int
3728em_allocate_receive_buffers(struct rx_ring *rxr)
3729{
3730	struct adapter		*adapter = rxr->adapter;
3731	device_t		dev = adapter->dev;
3732	struct em_buffer	*rxbuf;
3733	int			error;
3734
3735	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3736	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3737	if (rxr->rx_buffers == NULL) {
3738		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3739		return (ENOMEM);
3740	}
3741
3742	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3743				1, 0,			/* alignment, bounds */
3744				BUS_SPACE_MAXADDR,	/* lowaddr */
3745				BUS_SPACE_MAXADDR,	/* highaddr */
3746				NULL, NULL,		/* filter, filterarg */
3747				MCLBYTES,		/* maxsize */
3748				1,			/* nsegments */
3749				MCLBYTES,		/* maxsegsize */
3750				0,			/* flags */
3751				NULL,			/* lockfunc */
3752				NULL,			/* lockarg */
3753				&rxr->rxtag);
3754	if (error) {
3755		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3756		    __func__, error);
3757		goto fail;
3758	}
3759
3760	/* Create the spare map (used by getbuf) */
3761	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3762	     &rxr->rx_sparemap);
3763	if (error) {
3764		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3765		    __func__, error);
3766		goto fail;
3767	}
3768
3769	rxbuf = rxr->rx_buffers;
3770	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3771		rxbuf = &rxr->rx_buffers[i];
3772		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3773		    &rxbuf->map);
3774		if (error) {
3775			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3776			    __func__, error);
3777			goto fail;
3778		}
3779	}
3780
3781	return (0);
3782
3783fail:
3784	em_free_receive_structures(adapter);
3785	return (error);
3786}
3787
3788
3789/*********************************************************************
3790 *
3791 *  Initialize a receive ring and its buffers.
3792 *
3793 **********************************************************************/
3794static int
3795em_setup_receive_ring(struct rx_ring *rxr)
3796{
3797	struct	adapter 	*adapter = rxr->adapter;
3798	struct em_buffer	*rxbuf;
3799	bus_dma_segment_t	seg[1];
3800	int			rsize, nsegs, error;
3801
3802
3803	/* Clear the ring contents */
3804	EM_RX_LOCK(rxr);
3805	rsize = roundup2(adapter->num_rx_desc *
3806	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3807	bzero((void *)rxr->rx_base, rsize);
3808
3809	/*
3810	** Free current RX buffer structs and their mbufs
3811	*/
3812	for (int i = 0; i < adapter->num_rx_desc; i++) {
3813		rxbuf = &rxr->rx_buffers[i];
3814		if (rxbuf->m_head != NULL) {
3815			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3816			    BUS_DMASYNC_POSTREAD);
3817			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3818			m_freem(rxbuf->m_head);
3819		}
3820	}
3821
3822	/* Now replenish the mbufs */
3823	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3824
3825		rxbuf = &rxr->rx_buffers[j];
3826		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3827		if (rxbuf->m_head == NULL)
3828			panic("RX ring hdr initialization failed!\n");
3829		rxbuf->m_head->m_len = MCLBYTES;
3830		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3831		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3832
3833		/* Get the memory mapping */
3834		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3835		    rxbuf->map, rxbuf->m_head, seg,
3836		    &nsegs, BUS_DMA_NOWAIT);
3837		if (error != 0)
3838			panic("RX ring dma initialization failed!\n");
3839		bus_dmamap_sync(rxr->rxtag,
3840		    rxbuf->map, BUS_DMASYNC_PREREAD);
3841
3842		/* Update descriptor */
3843		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3844	}
3845
3846
3847	/* Setup our descriptor indices */
3848	rxr->next_to_check = 0;
3849	rxr->next_to_refresh = 0;
3850
3851	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3852	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3853
3854	EM_RX_UNLOCK(rxr);
3855	return (0);
3856}
3857
3858/*********************************************************************
3859 *
3860 *  Initialize all receive rings.
3861 *
3862 **********************************************************************/
3863static int
3864em_setup_receive_structures(struct adapter *adapter)
3865{
3866	struct rx_ring *rxr = adapter->rx_rings;
3867	int j;
3868
3869	for (j = 0; j < adapter->num_queues; j++, rxr++)
3870		if (em_setup_receive_ring(rxr))
3871			goto fail;
3872
3873	return (0);
3874fail:
3875	/*
3876	 * Free RX buffers allocated so far, we will only handle
3877	 * the rings that completed, the failing case will have
3878	 * cleaned up for itself. 'j' failed, so its the terminus.
3879	 */
3880	for (int i = 0; i < j; ++i) {
3881		rxr = &adapter->rx_rings[i];
3882		for (int n = 0; n < adapter->num_rx_desc; n++) {
3883			struct em_buffer *rxbuf;
3884			rxbuf = &rxr->rx_buffers[n];
3885			if (rxbuf->m_head != NULL) {
3886				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3887			  	  BUS_DMASYNC_POSTREAD);
3888				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3889				m_freem(rxbuf->m_head);
3890				rxbuf->m_head = NULL;
3891			}
3892		}
3893	}
3894
3895	return (ENOBUFS);
3896}
3897
3898/*********************************************************************
3899 *
3900 *  Free all receive rings.
3901 *
3902 **********************************************************************/
3903static void
3904em_free_receive_structures(struct adapter *adapter)
3905{
3906	struct rx_ring *rxr = adapter->rx_rings;
3907
3908	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3909		em_free_receive_buffers(rxr);
3910		/* Free the ring memory as well */
3911		em_dma_free(adapter, &rxr->rxdma);
3912		EM_RX_LOCK_DESTROY(rxr);
3913	}
3914
3915	free(adapter->rx_rings, M_DEVBUF);
3916}
3917
3918
3919/*********************************************************************
3920 *
3921 *  Free receive ring data structures
3922 *
3923 **********************************************************************/
3924static void
3925em_free_receive_buffers(struct rx_ring *rxr)
3926{
3927	struct adapter		*adapter = rxr->adapter;
3928	struct em_buffer	*rxbuf = NULL;
3929
3930	INIT_DEBUGOUT("free_receive_buffers: begin");
3931
3932	if (rxr->rx_sparemap) {
3933		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3934		rxr->rx_sparemap = NULL;
3935	}
3936
3937	if (rxr->rx_buffers != NULL) {
3938		for (int i = 0; i < adapter->num_rx_desc; i++) {
3939			rxbuf = &rxr->rx_buffers[i];
3940			if (rxbuf->map != NULL) {
3941				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3942				    BUS_DMASYNC_POSTREAD);
3943				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3944				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3945			}
3946			if (rxbuf->m_head != NULL) {
3947				m_freem(rxbuf->m_head);
3948				rxbuf->m_head = NULL;
3949			}
3950		}
3951		free(rxr->rx_buffers, M_DEVBUF);
3952		rxr->rx_buffers = NULL;
3953	}
3954
3955	if (rxr->rxtag != NULL) {
3956		bus_dma_tag_destroy(rxr->rxtag);
3957		rxr->rxtag = NULL;
3958	}
3959
3960	return;
3961}
3962
3963
3964/*********************************************************************
3965 *
3966 *  Enable receive unit.
3967 *
3968 **********************************************************************/
3969#define MAX_INTS_PER_SEC	8000
3970#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3971
3972static void
3973em_initialize_receive_unit(struct adapter *adapter)
3974{
3975	struct rx_ring	*rxr = adapter->rx_rings;
3976	struct ifnet	*ifp = adapter->ifp;
3977	struct e1000_hw	*hw = &adapter->hw;
3978	u64	bus_addr;
3979	u32	rctl, rxcsum;
3980
3981	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3982
3983	/*
3984	 * Make sure receives are disabled while setting
3985	 * up the descriptor ring
3986	 */
3987	rctl = E1000_READ_REG(hw, E1000_RCTL);
3988	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3989
3990	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3991	    adapter->rx_abs_int_delay.value);
3992	/*
3993	 * Set the interrupt throttling rate. Value is calculated
3994	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3995	 */
3996	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
3997
3998	/*
3999	** When using MSIX interrupts we need to throttle
4000	** using the EITR register (82574 only)
4001	*/
4002	if (adapter->msix)
4003		for (int i = 0; i < 4; i++)
4004			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4005			    DEFAULT_ITR);
4006
4007	/* Disable accelerated ackknowledge */
4008	if (adapter->hw.mac.type == e1000_82574)
4009		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4010
4011	if (ifp->if_capenable & IFCAP_RXCSUM) {
4012		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4013		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4014		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4015	}
4016
4017	/*
4018	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4019	** long latencies are observed, like Lenovo X60. This
4020	** change eliminates the problem, but since having positive
4021	** values in RDTR is a known source of problems on other
4022	** platforms another solution is being sought.
4023	*/
4024	if (hw->mac.type == e1000_82573)
4025		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4026
4027	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4028		/* Setup the Base and Length of the Rx Descriptor Ring */
4029		bus_addr = rxr->rxdma.dma_paddr;
4030		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4031		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4032		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4033		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4034		/* Setup the Head and Tail Descriptor Pointers */
4035		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4036		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4037	}
4038
4039	/* Setup the Receive Control Register */
4040	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4041	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4042	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4043	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4044
4045        /* Strip the CRC */
4046        rctl |= E1000_RCTL_SECRC;
4047
4048        /* Make sure VLAN Filters are off */
4049        rctl &= ~E1000_RCTL_VFE;
4050	rctl &= ~E1000_RCTL_SBP;
4051	rctl |= E1000_RCTL_SZ_2048;
4052	if (ifp->if_mtu > ETHERMTU)
4053		rctl |= E1000_RCTL_LPE;
4054	else
4055		rctl &= ~E1000_RCTL_LPE;
4056
4057	/* Write out the settings */
4058	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4059
4060	return;
4061}
4062
4063
4064/*********************************************************************
4065 *
4066 *  This routine executes in interrupt context. It replenishes
4067 *  the mbufs in the descriptor and sends data which has been
4068 *  dma'ed into host memory to upper layer.
4069 *
4070 *  We loop at most count times if count is > 0, or until done if
4071 *  count < 0.
4072 *
4073 *  For polling we also now return the number of cleaned packets
4074 *********************************************************************/
4075static int
4076em_rxeof(struct rx_ring *rxr, int count)
4077{
4078	struct adapter		*adapter = rxr->adapter;
4079	struct ifnet		*ifp = adapter->ifp;
4080	struct mbuf		*mp, *sendmp;
4081	u8			status = 0;
4082	u16 			len;
4083	int			i, processed, rxdone = 0;
4084	bool			eop;
4085	struct e1000_rx_desc	*cur;
4086
4087	EM_RX_LOCK(rxr);
4088
4089	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4090
4091		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4092			break;
4093
4094		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4095		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4096
4097		cur = &rxr->rx_base[i];
4098		status = cur->status;
4099		mp = sendmp = NULL;
4100
4101		if ((status & E1000_RXD_STAT_DD) == 0)
4102			break;
4103
4104		len = le16toh(cur->length);
4105		eop = (status & E1000_RXD_STAT_EOP) != 0;
4106		count--;
4107
4108		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4109
4110			/* Assign correct length to the current fragment */
4111			mp = rxr->rx_buffers[i].m_head;
4112			mp->m_len = len;
4113
4114			if (rxr->fmp == NULL) {
4115				mp->m_pkthdr.len = len;
4116				rxr->fmp = mp; /* Store the first mbuf */
4117				rxr->lmp = mp;
4118			} else {
4119				/* Chain mbuf's together */
4120				mp->m_flags &= ~M_PKTHDR;
4121				rxr->lmp->m_next = mp;
4122				rxr->lmp = rxr->lmp->m_next;
4123				rxr->fmp->m_pkthdr.len += len;
4124			}
4125
4126			if (eop) {
4127				rxr->fmp->m_pkthdr.rcvif = ifp;
4128				ifp->if_ipackets++;
4129				em_receive_checksum(cur, rxr->fmp);
4130#ifndef __NO_STRICT_ALIGNMENT
4131				if (adapter->max_frame_size >
4132				    (MCLBYTES - ETHER_ALIGN) &&
4133				    em_fixup_rx(rxr) != 0)
4134					goto skip;
4135#endif
4136				if (status & E1000_RXD_STAT_VP) {
4137					rxr->fmp->m_pkthdr.ether_vtag =
4138					    (le16toh(cur->special) &
4139					    E1000_RXD_SPC_VLAN_MASK);
4140					rxr->fmp->m_flags |= M_VLANTAG;
4141				}
4142#ifdef EM_MULTIQUEUE
4143				rxr->fmp->m_pkthdr.flowid = curcpu;
4144				rxr->fmp->m_flags |= M_FLOWID;
4145#endif
4146#ifndef __NO_STRICT_ALIGNMENT
4147skip:
4148#endif
4149				sendmp = rxr->fmp;
4150				rxr->fmp = NULL;
4151				rxr->lmp = NULL;
4152			}
4153		} else {
4154			ifp->if_ierrors++;
4155			/* Reuse loaded DMA map and just update mbuf chain */
4156			mp = rxr->rx_buffers[i].m_head;
4157			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4158			mp->m_data = mp->m_ext.ext_buf;
4159			mp->m_next = NULL;
4160			if (adapter->max_frame_size <=
4161			    (MCLBYTES - ETHER_ALIGN))
4162				m_adj(mp, ETHER_ALIGN);
4163			if (rxr->fmp != NULL) {
4164				m_freem(rxr->fmp);
4165				rxr->fmp = NULL;
4166				rxr->lmp = NULL;
4167			}
4168			sendmp = NULL;
4169		}
4170
4171		/* Zero out the receive descriptors status. */
4172		cur->status = 0;
4173		++rxdone;	/* cumulative for POLL */
4174		++processed;
4175
4176		/* Advance our pointers to the next descriptor. */
4177		if (++i == adapter->num_rx_desc)
4178			i = 0;
4179
4180		/* Send to the stack */
4181		if (sendmp != NULL)
4182			(*ifp->if_input)(ifp, sendmp);
4183
4184		/* Only refresh mbufs every 8 descriptors */
4185		if (processed == 8) {
4186			em_refresh_mbufs(rxr, i);
4187			processed = 0;
4188		}
4189	}
4190
4191	/* Catch any remaining refresh work */
4192	if (processed != 0) {
4193		em_refresh_mbufs(rxr, i);
4194		processed = 0;
4195	}
4196
4197	rxr->next_to_check = i;
4198	EM_RX_UNLOCK(rxr);
4199
4200#ifdef DEVICE_POLLING
4201	return (rxdone);
4202#else
4203	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4204#endif
4205}
4206
4207#ifndef __NO_STRICT_ALIGNMENT
4208/*
4209 * When jumbo frames are enabled we should realign entire payload on
4210 * architecures with strict alignment. This is serious design mistake of 8254x
4211 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4212 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4213 * payload. On architecures without strict alignment restrictions 8254x still
4214 * performs unaligned memory access which would reduce the performance too.
4215 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4216 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4217 * existing mbuf chain.
4218 *
4219 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4220 * not used at all on architectures with strict alignment.
4221 */
4222static int
4223em_fixup_rx(struct rx_ring *rxr)
4224{
4225	struct adapter *adapter = rxr->adapter;
4226	struct mbuf *m, *n;
4227	int error;
4228
4229	error = 0;
4230	m = rxr->fmp;
4231	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4232		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4233		m->m_data += ETHER_HDR_LEN;
4234	} else {
4235		MGETHDR(n, M_DONTWAIT, MT_DATA);
4236		if (n != NULL) {
4237			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4238			m->m_data += ETHER_HDR_LEN;
4239			m->m_len -= ETHER_HDR_LEN;
4240			n->m_len = ETHER_HDR_LEN;
4241			M_MOVE_PKTHDR(n, m);
4242			n->m_next = m;
4243			rxr->fmp = n;
4244		} else {
4245			adapter->dropped_pkts++;
4246			m_freem(rxr->fmp);
4247			rxr->fmp = NULL;
4248			error = ENOMEM;
4249		}
4250	}
4251
4252	return (error);
4253}
4254#endif
4255
4256/*********************************************************************
4257 *
4258 *  Verify that the hardware indicated that the checksum is valid.
4259 *  Inform the stack about the status of checksum so that stack
4260 *  doesn't spend time verifying the checksum.
4261 *
4262 *********************************************************************/
4263static void
4264em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4265{
4266	/* Ignore Checksum bit is set */
4267	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4268		mp->m_pkthdr.csum_flags = 0;
4269		return;
4270	}
4271
4272	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4273		/* Did it pass? */
4274		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4275			/* IP Checksum Good */
4276			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4277			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4278
4279		} else {
4280			mp->m_pkthdr.csum_flags = 0;
4281		}
4282	}
4283
4284	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4285		/* Did it pass? */
4286		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4287			mp->m_pkthdr.csum_flags |=
4288			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4289			mp->m_pkthdr.csum_data = htons(0xffff);
4290		}
4291	}
4292}
4293
4294/*
4295 * This routine is run via an vlan
4296 * config EVENT
4297 */
4298static void
4299em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4300{
4301	struct adapter	*adapter = ifp->if_softc;
4302	u32		index, bit;
4303
4304	if (ifp->if_softc !=  arg)   /* Not our event */
4305		return;
4306
4307	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4308                return;
4309
4310	index = (vtag >> 5) & 0x7F;
4311	bit = vtag & 0x1F;
4312	em_shadow_vfta[index] |= (1 << bit);
4313	++adapter->num_vlans;
4314	/* Re-init to load the changes */
4315	em_init(adapter);
4316}
4317
4318/*
4319 * This routine is run via an vlan
4320 * unconfig EVENT
4321 */
4322static void
4323em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4324{
4325	struct adapter	*adapter = ifp->if_softc;
4326	u32		index, bit;
4327
4328	if (ifp->if_softc !=  arg)
4329		return;
4330
4331	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4332                return;
4333
4334	index = (vtag >> 5) & 0x7F;
4335	bit = vtag & 0x1F;
4336	em_shadow_vfta[index] &= ~(1 << bit);
4337	--adapter->num_vlans;
4338	/* Re-init to load the changes */
4339	em_init(adapter);
4340}
4341
4342static void
4343em_setup_vlan_hw_support(struct adapter *adapter)
4344{
4345	struct e1000_hw *hw = &adapter->hw;
4346	u32             reg;
4347
4348	/*
4349	** We get here thru init_locked, meaning
4350	** a soft reset, this has already cleared
4351	** the VFTA and other state, so if there
4352	** have been no vlan's registered do nothing.
4353	*/
4354	if (adapter->num_vlans == 0)
4355                return;
4356
4357	/*
4358	** A soft reset zero's out the VFTA, so
4359	** we need to repopulate it now.
4360	*/
4361	for (int i = 0; i < EM_VFTA_SIZE; i++)
4362                if (em_shadow_vfta[i] != 0)
4363			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4364                            i, em_shadow_vfta[i]);
4365
4366	reg = E1000_READ_REG(hw, E1000_CTRL);
4367	reg |= E1000_CTRL_VME;
4368	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4369
4370	/* Enable the Filter Table */
4371	reg = E1000_READ_REG(hw, E1000_RCTL);
4372	reg &= ~E1000_RCTL_CFIEN;
4373	reg |= E1000_RCTL_VFE;
4374	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4375
4376	/* Update the frame size */
4377	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4378	    adapter->max_frame_size + VLAN_TAG_SIZE);
4379}
4380
4381static void
4382em_enable_intr(struct adapter *adapter)
4383{
4384	struct e1000_hw *hw = &adapter->hw;
4385	u32 ims_mask = IMS_ENABLE_MASK;
4386
4387	if (adapter->msix) {
4388		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4389		ims_mask |= EM_MSIX_MASK;
4390	}
4391	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4392}
4393
4394static void
4395em_disable_intr(struct adapter *adapter)
4396{
4397	struct e1000_hw *hw = &adapter->hw;
4398
4399	if (adapter->msix)
4400		E1000_WRITE_REG(hw, EM_EIAC, 0);
4401	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4402}
4403
4404/*
4405 * Bit of a misnomer, what this really means is
4406 * to enable OS management of the system... aka
4407 * to disable special hardware management features
4408 */
4409static void
4410em_init_manageability(struct adapter *adapter)
4411{
4412	/* A shared code workaround */
4413#define E1000_82542_MANC2H E1000_MANC2H
4414	if (adapter->has_manage) {
4415		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4416		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4417
4418		/* disable hardware interception of ARP */
4419		manc &= ~(E1000_MANC_ARP_EN);
4420
4421                /* enable receiving management packets to the host */
4422		manc |= E1000_MANC_EN_MNG2HOST;
4423#define E1000_MNG2HOST_PORT_623 (1 << 5)
4424#define E1000_MNG2HOST_PORT_664 (1 << 6)
4425		manc2h |= E1000_MNG2HOST_PORT_623;
4426		manc2h |= E1000_MNG2HOST_PORT_664;
4427		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4428		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4429	}
4430}
4431
4432/*
4433 * Give control back to hardware management
4434 * controller if there is one.
4435 */
4436static void
4437em_release_manageability(struct adapter *adapter)
4438{
4439	if (adapter->has_manage) {
4440		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4441
4442		/* re-enable hardware interception of ARP */
4443		manc |= E1000_MANC_ARP_EN;
4444		manc &= ~E1000_MANC_EN_MNG2HOST;
4445
4446		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4447	}
4448}
4449
4450/*
4451 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4452 * For ASF and Pass Through versions of f/w this means
4453 * that the driver is loaded. For AMT version type f/w
4454 * this means that the network i/f is open.
4455 */
4456static void
4457em_get_hw_control(struct adapter *adapter)
4458{
4459	u32 ctrl_ext, swsm;
4460
4461	if (adapter->hw.mac.type == e1000_82573) {
4462		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4463		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4464		    swsm | E1000_SWSM_DRV_LOAD);
4465		return;
4466	}
4467	/* else */
4468	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4469	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4470	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4471	return;
4472}
4473
4474/*
4475 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4476 * For ASF and Pass Through versions of f/w this means that
4477 * the driver is no longer loaded. For AMT versions of the
4478 * f/w this means that the network i/f is closed.
4479 */
4480static void
4481em_release_hw_control(struct adapter *adapter)
4482{
4483	u32 ctrl_ext, swsm;
4484
4485	if (!adapter->has_manage)
4486		return;
4487
4488	if (adapter->hw.mac.type == e1000_82573) {
4489		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4490		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4491		    swsm & ~E1000_SWSM_DRV_LOAD);
4492		return;
4493	}
4494	/* else */
4495	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4496	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4497	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4498	return;
4499}
4500
4501static int
4502em_is_valid_ether_addr(u8 *addr)
4503{
4504	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4505
4506	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4507		return (FALSE);
4508	}
4509
4510	return (TRUE);
4511}
4512
4513/*
4514** Parse the interface capabilities with regard
4515** to both system management and wake-on-lan for
4516** later use.
4517*/
4518static void
4519em_get_wakeup(device_t dev)
4520{
4521	struct adapter	*adapter = device_get_softc(dev);
4522	u16		eeprom_data = 0, device_id, apme_mask;
4523
4524	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4525	apme_mask = EM_EEPROM_APME;
4526
4527	switch (adapter->hw.mac.type) {
4528	case e1000_82573:
4529	case e1000_82583:
4530		adapter->has_amt = TRUE;
4531		/* Falls thru */
4532	case e1000_82571:
4533	case e1000_82572:
4534	case e1000_80003es2lan:
4535		if (adapter->hw.bus.func == 1) {
4536			e1000_read_nvm(&adapter->hw,
4537			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4538			break;
4539		} else
4540			e1000_read_nvm(&adapter->hw,
4541			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4542		break;
4543	case e1000_ich8lan:
4544	case e1000_ich9lan:
4545	case e1000_ich10lan:
4546	case e1000_pchlan:
4547		apme_mask = E1000_WUC_APME;
4548		adapter->has_amt = TRUE;
4549		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4550		break;
4551	default:
4552		e1000_read_nvm(&adapter->hw,
4553		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4554		break;
4555	}
4556	if (eeprom_data & apme_mask)
4557		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4558	/*
4559         * We have the eeprom settings, now apply the special cases
4560         * where the eeprom may be wrong or the board won't support
4561         * wake on lan on a particular port
4562	 */
4563	device_id = pci_get_device(dev);
4564        switch (device_id) {
4565	case E1000_DEV_ID_82571EB_FIBER:
4566		/* Wake events only supported on port A for dual fiber
4567		 * regardless of eeprom setting */
4568		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4569		    E1000_STATUS_FUNC_1)
4570			adapter->wol = 0;
4571		break;
4572	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4573	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4574	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4575                /* if quad port adapter, disable WoL on all but port A */
4576		if (global_quad_port_a != 0)
4577			adapter->wol = 0;
4578		/* Reset for multiple quad port adapters */
4579		if (++global_quad_port_a == 4)
4580			global_quad_port_a = 0;
4581                break;
4582	}
4583	return;
4584}
4585
4586
4587/*
4588 * Enable PCI Wake On Lan capability
4589 */
4590static void
4591em_enable_wakeup(device_t dev)
4592{
4593	struct adapter	*adapter = device_get_softc(dev);
4594	struct ifnet	*ifp = adapter->ifp;
4595	u32		pmc, ctrl, ctrl_ext, rctl;
4596	u16     	status;
4597
4598	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4599		return;
4600
4601	/* Advertise the wakeup capability */
4602	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4603	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4604	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4605	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4606
4607	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4608	    (adapter->hw.mac.type == e1000_pchlan) ||
4609	    (adapter->hw.mac.type == e1000_ich9lan) ||
4610	    (adapter->hw.mac.type == e1000_ich10lan)) {
4611		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4612		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4613	}
4614
4615	/* Keep the laser running on Fiber adapters */
4616	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4617	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4618		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4619		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4620		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4621	}
4622
4623	/*
4624	** Determine type of Wakeup: note that wol
4625	** is set with all bits on by default.
4626	*/
4627	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4628		adapter->wol &= ~E1000_WUFC_MAG;
4629
4630	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4631		adapter->wol &= ~E1000_WUFC_MC;
4632	else {
4633		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4634		rctl |= E1000_RCTL_MPE;
4635		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4636	}
4637
4638	if (adapter->hw.mac.type == e1000_pchlan) {
4639		if (em_enable_phy_wakeup(adapter))
4640			return;
4641	} else {
4642		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4643		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4644	}
4645
4646	if (adapter->hw.phy.type == e1000_phy_igp_3)
4647		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4648
4649        /* Request PME */
4650        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4651	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4652	if (ifp->if_capenable & IFCAP_WOL)
4653		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4654        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4655
4656	return;
4657}
4658
4659/*
4660** WOL in the newer chipset interfaces (pchlan)
4661** require thing to be copied into the phy
4662*/
4663static int
4664em_enable_phy_wakeup(struct adapter *adapter)
4665{
4666	struct e1000_hw *hw = &adapter->hw;
4667	u32 mreg, ret = 0;
4668	u16 preg;
4669
4670	/* copy MAC RARs to PHY RARs */
4671	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4672		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4673		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4674		e1000_write_phy_reg(hw, BM_RAR_M(i),
4675		    (u16)((mreg >> 16) & 0xFFFF));
4676		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4677		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4678		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4679		    (u16)((mreg >> 16) & 0xFFFF));
4680	}
4681
4682	/* copy MAC MTA to PHY MTA */
4683	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4684		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4685		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4686		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4687		    (u16)((mreg >> 16) & 0xFFFF));
4688	}
4689
4690	/* configure PHY Rx Control register */
4691	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4692	mreg = E1000_READ_REG(hw, E1000_RCTL);
4693	if (mreg & E1000_RCTL_UPE)
4694		preg |= BM_RCTL_UPE;
4695	if (mreg & E1000_RCTL_MPE)
4696		preg |= BM_RCTL_MPE;
4697	preg &= ~(BM_RCTL_MO_MASK);
4698	if (mreg & E1000_RCTL_MO_3)
4699		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4700				<< BM_RCTL_MO_SHIFT);
4701	if (mreg & E1000_RCTL_BAM)
4702		preg |= BM_RCTL_BAM;
4703	if (mreg & E1000_RCTL_PMCF)
4704		preg |= BM_RCTL_PMCF;
4705	mreg = E1000_READ_REG(hw, E1000_CTRL);
4706	if (mreg & E1000_CTRL_RFCE)
4707		preg |= BM_RCTL_RFCE;
4708	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4709
4710	/* enable PHY wakeup in MAC register */
4711	E1000_WRITE_REG(hw, E1000_WUC,
4712	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4713	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4714
4715	/* configure and enable PHY wakeup in PHY registers */
4716	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4717	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4718
4719	/* activate PHY wakeup */
4720	ret = hw->phy.ops.acquire(hw);
4721	if (ret) {
4722		printf("Could not acquire PHY\n");
4723		return ret;
4724	}
4725	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4726	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4727	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4728	if (ret) {
4729		printf("Could not read PHY page 769\n");
4730		goto out;
4731	}
4732	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4733	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4734	if (ret)
4735		printf("Could not set PHY Host Wakeup bit\n");
4736out:
4737	hw->phy.ops.release(hw);
4738
4739	return ret;
4740}
4741
4742static void
4743em_led_func(void *arg, int onoff)
4744{
4745	struct adapter	*adapter = arg;
4746
4747	EM_CORE_LOCK(adapter);
4748	if (onoff) {
4749		e1000_setup_led(&adapter->hw);
4750		e1000_led_on(&adapter->hw);
4751	} else {
4752		e1000_led_off(&adapter->hw);
4753		e1000_cleanup_led(&adapter->hw);
4754	}
4755	EM_CORE_UNLOCK(adapter);
4756}
4757
4758/**********************************************************************
4759 *
4760 *  Update the board statistics counters.
4761 *
4762 **********************************************************************/
4763static void
4764em_update_stats_counters(struct adapter *adapter)
4765{
4766	struct ifnet   *ifp;
4767
4768	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4769	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4770		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4771		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4772	}
4773	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4774	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4775	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4776	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4777
4778	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4779	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4780	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4781	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4782	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4783	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4784	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4785	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4786	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4787	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4788	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4789	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4790	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4791	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4792	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4793	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4794	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4795	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4796	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4797	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4798
4799	/* For the 64-bit byte counters the low dword must be read first. */
4800	/* Both registers clear on the read of the high dword */
4801
4802	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4803	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4804
4805	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4806	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4807	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4808	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4809	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4810
4811	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4812	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4813
4814	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4815	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4816	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4817	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4818	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4819	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4820	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4821	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4822	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4823	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4824
4825	if (adapter->hw.mac.type >= e1000_82543) {
4826		adapter->stats.algnerrc +=
4827		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4828		adapter->stats.rxerrc +=
4829		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4830		adapter->stats.tncrs +=
4831		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4832		adapter->stats.cexterr +=
4833		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4834		adapter->stats.tsctc +=
4835		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4836		adapter->stats.tsctfc +=
4837		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4838	}
4839	ifp = adapter->ifp;
4840
4841	ifp->if_collisions = adapter->stats.colc;
4842
4843	/* Rx Errors */
4844	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4845	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4846	    adapter->stats.ruc + adapter->stats.roc +
4847	    adapter->stats.mpc + adapter->stats.cexterr;
4848
4849	/* Tx Errors */
4850	ifp->if_oerrors = adapter->stats.ecol +
4851	    adapter->stats.latecol + adapter->watchdog_events;
4852}
4853
4854
4855/**********************************************************************
4856 *
4857 *  This routine is called only when em_display_debug_stats is enabled.
4858 *  This routine provides a way to take a look at important statistics
4859 *  maintained by the driver and hardware.
4860 *
4861 **********************************************************************/
4862static void
4863em_print_debug_info(struct adapter *adapter)
4864{
4865	device_t dev = adapter->dev;
4866	u8 *hw_addr = adapter->hw.hw_addr;
4867	struct rx_ring *rxr = adapter->rx_rings;
4868	struct tx_ring *txr = adapter->tx_rings;
4869
4870	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4871	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4872	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4873	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4874	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4875	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4876	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4877	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4878	    adapter->hw.fc.high_water,
4879	    adapter->hw.fc.low_water);
4880	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4881	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4882	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4883	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4884	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4885	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4886
4887	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4888		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4889		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4890		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4891		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4892		    txr->me, txr->no_desc_avail);
4893		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4894		    txr->me, txr->tx_irq);
4895		device_printf(dev, "Num Tx descriptors avail = %d\n",
4896		    txr->tx_avail);
4897		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4898		    txr->no_desc_avail);
4899	}
4900	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4901		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4902		    rxr->me, rxr->rx_irq);
4903		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4904		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4905		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4906	}
4907	device_printf(dev, "Std mbuf failed = %ld\n",
4908	    adapter->mbuf_alloc_failed);
4909	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4910	    adapter->mbuf_cluster_failed);
4911	device_printf(dev, "Driver dropped packets = %ld\n",
4912	    adapter->dropped_pkts);
4913}
4914
4915static void
4916em_print_hw_stats(struct adapter *adapter)
4917{
4918	device_t dev = adapter->dev;
4919
4920	device_printf(dev, "Excessive collisions = %lld\n",
4921	    (long long)adapter->stats.ecol);
4922#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4923	device_printf(dev, "Symbol errors = %lld\n",
4924	    (long long)adapter->stats.symerrs);
4925#endif
4926	device_printf(dev, "Sequence errors = %lld\n",
4927	    (long long)adapter->stats.sec);
4928	device_printf(dev, "Defer count = %lld\n",
4929	    (long long)adapter->stats.dc);
4930	device_printf(dev, "Missed Packets = %lld\n",
4931	    (long long)adapter->stats.mpc);
4932	device_printf(dev, "Receive No Buffers = %lld\n",
4933	    (long long)adapter->stats.rnbc);
4934	/* RLEC is inaccurate on some hardware, calculate our own. */
4935	device_printf(dev, "Receive Length Errors = %lld\n",
4936	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4937	device_printf(dev, "Receive errors = %lld\n",
4938	    (long long)adapter->stats.rxerrc);
4939	device_printf(dev, "Crc errors = %lld\n",
4940	    (long long)adapter->stats.crcerrs);
4941	device_printf(dev, "Alignment errors = %lld\n",
4942	    (long long)adapter->stats.algnerrc);
4943	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4944	    (long long)adapter->stats.cexterr);
4945	device_printf(dev, "watchdog timeouts = %ld\n",
4946	    adapter->watchdog_events);
4947	device_printf(dev, "XON Rcvd = %lld\n",
4948	    (long long)adapter->stats.xonrxc);
4949	device_printf(dev, "XON Xmtd = %lld\n",
4950	    (long long)adapter->stats.xontxc);
4951	device_printf(dev, "XOFF Rcvd = %lld\n",
4952	    (long long)adapter->stats.xoffrxc);
4953	device_printf(dev, "XOFF Xmtd = %lld\n",
4954	    (long long)adapter->stats.xofftxc);
4955	device_printf(dev, "Good Packets Rcvd = %lld\n",
4956	    (long long)adapter->stats.gprc);
4957	device_printf(dev, "Good Packets Xmtd = %lld\n",
4958	    (long long)adapter->stats.gptc);
4959	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4960	    (long long)adapter->stats.tsctc);
4961	device_printf(dev, "TSO Contexts Failed = %lld\n",
4962	    (long long)adapter->stats.tsctfc);
4963}
4964
4965/**********************************************************************
4966 *
4967 *  This routine provides a way to dump out the adapter eeprom,
4968 *  often a useful debug/service tool. This only dumps the first
4969 *  32 words, stuff that matters is in that extent.
4970 *
4971 **********************************************************************/
4972static void
4973em_print_nvm_info(struct adapter *adapter)
4974{
4975	u16	eeprom_data;
4976	int	i, j, row = 0;
4977
4978	/* Its a bit crude, but it gets the job done */
4979	printf("\nInterface EEPROM Dump:\n");
4980	printf("Offset\n0x0000  ");
4981	for (i = 0, j = 0; i < 32; i++, j++) {
4982		if (j == 8) { /* Make the offset block */
4983			j = 0; ++row;
4984			printf("\n0x00%x0  ",row);
4985		}
4986		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4987		printf("%04x ", eeprom_data);
4988	}
4989	printf("\n");
4990}
4991
4992static int
4993em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4994{
4995	struct adapter *adapter;
4996	int error;
4997	int result;
4998
4999	result = -1;
5000	error = sysctl_handle_int(oidp, &result, 0, req);
5001
5002	if (error || !req->newptr)
5003		return (error);
5004
5005	if (result == 1) {
5006		adapter = (struct adapter *)arg1;
5007		em_print_debug_info(adapter);
5008	}
5009	/*
5010	 * This value will cause a hex dump of the
5011	 * first 32 16-bit words of the EEPROM to
5012	 * the screen.
5013	 */
5014	if (result == 2) {
5015		adapter = (struct adapter *)arg1;
5016		em_print_nvm_info(adapter);
5017        }
5018
5019	return (error);
5020}
5021
5022
5023static int
5024em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5025{
5026	struct adapter *adapter;
5027	int error;
5028	int result;
5029
5030	result = -1;
5031	error = sysctl_handle_int(oidp, &result, 0, req);
5032
5033	if (error || !req->newptr)
5034		return (error);
5035
5036	if (result == 1) {
5037		adapter = (struct adapter *)arg1;
5038		em_print_hw_stats(adapter);
5039	}
5040
5041	return (error);
5042}
5043
5044static int
5045em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5046{
5047	struct em_int_delay_info *info;
5048	struct adapter *adapter;
5049	u32 regval;
5050	int error, usecs, ticks;
5051
5052	info = (struct em_int_delay_info *)arg1;
5053	usecs = info->value;
5054	error = sysctl_handle_int(oidp, &usecs, 0, req);
5055	if (error != 0 || req->newptr == NULL)
5056		return (error);
5057	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5058		return (EINVAL);
5059	info->value = usecs;
5060	ticks = EM_USECS_TO_TICKS(usecs);
5061
5062	adapter = info->adapter;
5063
5064	EM_CORE_LOCK(adapter);
5065	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5066	regval = (regval & ~0xffff) | (ticks & 0xffff);
5067	/* Handle a few special cases. */
5068	switch (info->offset) {
5069	case E1000_RDTR:
5070		break;
5071	case E1000_TIDV:
5072		if (ticks == 0) {
5073			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5074			/* Don't write 0 into the TIDV register. */
5075			regval++;
5076		} else
5077			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5078		break;
5079	}
5080	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5081	EM_CORE_UNLOCK(adapter);
5082	return (0);
5083}
5084
5085static void
5086em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5087	const char *description, struct em_int_delay_info *info,
5088	int offset, int value)
5089{
5090	info->adapter = adapter;
5091	info->offset = offset;
5092	info->value = value;
5093	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5094	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5095	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5096	    info, 0, em_sysctl_int_delay, "I", description);
5097}
5098
5099static void
5100em_add_rx_process_limit(struct adapter *adapter, const char *name,
5101	const char *description, int *limit, int value)
5102{
5103	*limit = value;
5104	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5105	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5106	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5107}
5108
5109
5110