if_em.c revision 171624
1/**************************************************************************
2
3Copyright (c) 2001-2007, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 171624 2007-07-27 14:48:05Z cognet $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79
80#include "e1000_api.h"
81#include "e1000_82575.h"
82#include "if_em.h"
83
84/*********************************************************************
85 *  Set this to one to display debug statistics
86 *********************************************************************/
87int	em_display_debug_stats = 0;
88
89/*********************************************************************
90 *  Driver version:
91 *********************************************************************/
92char em_driver_version[] = "Version - 6.5.3";
93
94
95/*********************************************************************
96 *  PCI Device ID Table
97 *
98 *  Used by probe to select devices to load on
99 *  Last field stores an index into e1000_strings
100 *  Last entry must be all 0s
101 *
102 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
103 *********************************************************************/
104
105static em_vendor_info_t em_vendor_info_array[] =
106{
107	/* Intel(R) PRO/1000 Network Connection */
108	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
113
114	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126
127	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137
138	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148
149	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
152
153	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166
167	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
177						PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
185
186	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
191
192	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	/* required last entry */
201	{ 0, 0, 0, 0, 0}
202};
203
204/*********************************************************************
205 *  Table of branding strings for all supported NICs.
206 *********************************************************************/
207
208static char *em_strings[] = {
209	"Intel(R) PRO/1000 Network Connection"
210};
211
212/*********************************************************************
213 *  Function prototypes
214 *********************************************************************/
215static int	em_probe(device_t);
216static int	em_attach(device_t);
217static int	em_detach(device_t);
218static int	em_shutdown(device_t);
219static int	em_suspend(device_t);
220static int	em_resume(device_t);
221static void	em_start(struct ifnet *);
222static void	em_start_locked(struct ifnet *ifp);
223static int	em_ioctl(struct ifnet *, u_long, caddr_t);
224static void	em_watchdog(struct adapter *);
225static void	em_init(void *);
226static void	em_init_locked(struct adapter *);
227static void	em_stop(void *);
228static void	em_media_status(struct ifnet *, struct ifmediareq *);
229static int	em_media_change(struct ifnet *);
230static void	em_identify_hardware(struct adapter *);
231static int	em_allocate_pci_resources(struct adapter *);
232static int	em_allocate_intr(struct adapter *);
233static void	em_free_intr(struct adapter *);
234static void	em_free_pci_resources(struct adapter *);
235static void	em_local_timer(void *);
236static int	em_hardware_init(struct adapter *);
237static void	em_setup_interface(device_t, struct adapter *);
238static int	em_setup_transmit_structures(struct adapter *);
239static void	em_initialize_transmit_unit(struct adapter *);
240static int	em_setup_receive_structures(struct adapter *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_enable_intr(struct adapter *);
243static void	em_disable_intr(struct adapter *);
244static void	em_free_transmit_structures(struct adapter *);
245static void	em_free_receive_structures(struct adapter *);
246static void	em_update_stats_counters(struct adapter *);
247static void	em_txeof(struct adapter *);
248static int	em_allocate_receive_structures(struct adapter *);
249static int	em_allocate_transmit_structures(struct adapter *);
250static int	em_rxeof(struct adapter *, int);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct adapter *);
253#endif
254static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
255		    struct mbuf *);
256static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
257		    uint32_t *, uint32_t *);
258static boolean_t em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
259static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
260		    uint32_t *);
261static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
262static void	em_set_promisc(struct adapter *);
263static void	em_disable_promisc(struct adapter *);
264static void	em_set_multi(struct adapter *);
265static void	em_print_hw_stats(struct adapter *);
266static void	em_update_link_status(struct adapter *);
267static int	em_get_buf(struct adapter *, int);
268static void	em_enable_vlans(struct adapter *);
269static int	em_encap(struct adapter *, struct mbuf **);
270static int	em_adv_encap(struct adapter *, struct mbuf **);
271static void	em_smartspeed(struct adapter *);
272static int	em_82547_fifo_workaround(struct adapter *, int);
273static void	em_82547_update_fifo_head(struct adapter *, int);
274static int	em_82547_tx_fifo_reset(struct adapter *);
275static void	em_82547_move_tail(void *);
276static int	em_dma_malloc(struct adapter *, bus_size_t,
277		    struct em_dma_alloc *, int);
278static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
279static void	em_print_debug_info(struct adapter *);
280static int 	em_is_valid_ether_addr(uint8_t *);
281static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
282static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
284		    PDESC_ARRAY desc_array);
285static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287		    const char *, struct em_int_delay_info *, int, int);
288/* Management and WOL Support */
289static void	em_init_manageability(struct adapter *);
290static void	em_release_manageability(struct adapter *);
291static void     em_get_hw_control(struct adapter *);
292static void     em_release_hw_control(struct adapter *);
293static void     em_enable_wakeup(device_t);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297static void	em_intr(void *);
298#else
299static int	em_intr_fast(void *);
300static void	em_add_rx_process_limit(struct adapter *, const char *,
301		    const char *, int *, int);
302static void	em_handle_rxtx(void *context, int pending);
303static void	em_handle_link(void *context, int pending);
304#endif
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	{0, 0}
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325static devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342static int em_rxd = EM_DEFAULT_RXD;
343static int em_txd = EM_DEFAULT_TXD;
344static int em_smart_pwr_down = FALSE;
345
346TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
348TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rxd", &em_rxd);
351TUNABLE_INT("hw.em.txd", &em_txd);
352TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
353#ifndef DEVICE_POLLING
354/* How many packets rxeof tries to clean at a time */
355static int em_rx_process_limit = 100;
356TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357#endif
358/* Global used in WOL setup with multiport cards */
359static int global_quad_port_a = 0;
360
361/*********************************************************************
362 *  Device identification routine
363 *
364 *  em_probe determines if the driver should be loaded on
365 *  adapter based on PCI vendor/device id of the adapter.
366 *
367 *  return BUS_PROBE_DEFAULT on success, positive on failure
368 *********************************************************************/
369
370static int
371em_probe(device_t dev)
372{
373	char		adapter_name[60];
374	uint16_t	pci_vendor_id = 0;
375	uint16_t	pci_device_id = 0;
376	uint16_t	pci_subvendor_id = 0;
377	uint16_t	pci_subdevice_id = 0;
378	em_vendor_info_t *ent;
379
380	INIT_DEBUGOUT("em_probe: begin");
381
382	pci_vendor_id = pci_get_vendor(dev);
383	if (pci_vendor_id != EM_VENDOR_ID)
384		return (ENXIO);
385
386	pci_device_id = pci_get_device(dev);
387	pci_subvendor_id = pci_get_subvendor(dev);
388	pci_subdevice_id = pci_get_subdevice(dev);
389
390	ent = em_vendor_info_array;
391	while (ent->vendor_id != 0) {
392		if ((pci_vendor_id == ent->vendor_id) &&
393		    (pci_device_id == ent->device_id) &&
394
395		    ((pci_subvendor_id == ent->subvendor_id) ||
396		    (ent->subvendor_id == PCI_ANY_ID)) &&
397
398		    ((pci_subdevice_id == ent->subdevice_id) ||
399		    (ent->subdevice_id == PCI_ANY_ID))) {
400			sprintf(adapter_name, "%s %s",
401				em_strings[ent->index],
402				em_driver_version);
403			device_set_desc_copy(dev, adapter_name);
404			return (BUS_PROBE_DEFAULT);
405		}
406		ent++;
407	}
408
409	return (ENXIO);
410}
411
412/*********************************************************************
413 *  Device initialization routine
414 *
415 *  The attach entry point is called when the driver is being loaded.
416 *  This routine identifies the type of hardware, allocates all resources
417 *  and initializes the hardware.
418 *
419 *  return 0 on success, positive on failure
420 *********************************************************************/
421
422static int
423em_attach(device_t dev)
424{
425	struct adapter	*adapter;
426	int		tsize, rsize;
427	int		error = 0;
428	u16		eeprom_data, device_id;
429
430	INIT_DEBUGOUT("em_attach: begin");
431
432	adapter = device_get_softc(dev);
433	adapter->dev = adapter->osdep.dev = dev;
434	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
435
436	/* SYSCTL stuff */
437	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
438	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
439	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
440	    em_sysctl_debug_info, "I", "Debug Information");
441
442	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
443	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
444	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
445	    em_sysctl_stats, "I", "Statistics");
446
447	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
448	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
449
450	/* Determine hardware and mac info */
451	em_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (em_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/*
461	** For ICH8 and family we need to
462	** map the flash memory, and this
463	** must happen after the MAC is
464	** identified
465	*/
466	if ((adapter->hw.mac.type == e1000_ich8lan) ||
467	    (adapter->hw.mac.type == e1000_ich9lan)) {
468		int rid = EM_BAR_TYPE_FLASH;
469		adapter->flash_mem = bus_alloc_resource_any(dev,
470		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
471		/* This is used in the shared code */
472		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
473		adapter->osdep.flash_bus_space_tag =
474		    rman_get_bustag(adapter->flash_mem);
475		adapter->osdep.flash_bus_space_handle =
476		    rman_get_bushandle(adapter->flash_mem);
477	}
478
479	/* Do Shared Code initialization */
480	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
481		device_printf(dev, "Setup of Shared code failed\n");
482		error = ENXIO;
483		goto err_pci;
484	}
485
486	e1000_get_bus_info(&adapter->hw);
487
488	/* Set up some sysctls for the tunable interrupt delays */
489	em_add_int_delay_sysctl(adapter, "rx_int_delay",
490	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
491	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
492	em_add_int_delay_sysctl(adapter, "tx_int_delay",
493	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
494	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
495	if (adapter->hw.mac.type >= e1000_82540) {
496		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
497		    "receive interrupt delay limit in usecs",
498		    &adapter->rx_abs_int_delay,
499		    E1000_REGISTER(&adapter->hw, E1000_RADV),
500		    em_rx_abs_int_delay_dflt);
501		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
502		    "transmit interrupt delay limit in usecs",
503		    &adapter->tx_abs_int_delay,
504		    E1000_REGISTER(&adapter->hw, E1000_TADV),
505		    em_tx_abs_int_delay_dflt);
506	}
507
508#ifndef DEVICE_POLLING
509	/* Sysctls for limiting the amount of work done in the taskqueue */
510	em_add_rx_process_limit(adapter, "rx_processing_limit",
511	    "max number of rx packets to process", &adapter->rx_process_limit,
512	    em_rx_process_limit);
513#endif
514
515	/*
516	 * Validate number of transmit and receive descriptors. It
517	 * must not exceed hardware maximum, and must be multiple
518	 * of E1000_DBA_ALIGN.
519	 */
520	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
521	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
522	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
523	    (em_txd < EM_MIN_TXD)) {
524		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
525		    EM_DEFAULT_TXD, em_txd);
526		adapter->num_tx_desc = EM_DEFAULT_TXD;
527	} else
528		adapter->num_tx_desc = em_txd;
529	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
530	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
531	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
532	    (em_rxd < EM_MIN_RXD)) {
533		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
534		    EM_DEFAULT_RXD, em_rxd);
535		adapter->num_rx_desc = EM_DEFAULT_RXD;
536	} else
537		adapter->num_rx_desc = em_rxd;
538
539	adapter->hw.mac.autoneg = DO_AUTO_NEG;
540	adapter->hw.phy.wait_for_link = FALSE;
541	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
542	adapter->rx_buffer_len = 2048;
543
544	e1000_init_script_state_82541(&adapter->hw, TRUE);
545	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
546
547	/* Copper options */
548	if (adapter->hw.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the max frame size assuming standard ethernet
556	 * sized frames.
557	 */
558	adapter->hw.mac.max_frame_size =
559	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560
561	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
562
563	/*
564	 * This controls when hardware reports transmit completion
565	 * status.
566	 */
567	adapter->hw.mac.report_tx_early = 1;
568
569	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
570	    EM_DBA_ALIGN);
571
572	/* Allocate Transmit Descriptor ring */
573	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
574		device_printf(dev, "Unable to allocate tx_desc memory\n");
575		error = ENOMEM;
576		goto err_tx_desc;
577	}
578	adapter->tx_desc_base =
579	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
580
581	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
582	    EM_DBA_ALIGN);
583
584	/* Allocate Receive Descriptor ring */
585	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
586		device_printf(dev, "Unable to allocate rx_desc memory\n");
587		error = ENOMEM;
588		goto err_rx_desc;
589	}
590	adapter->rx_desc_base =
591	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_hw_init;
605		}
606	}
607
608	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
609		device_printf(dev, "EEPROM read error "
610		    "reading part number\n");
611		error = EIO;
612		goto err_hw_init;
613	}
614
615	/* Initialize the hardware */
616	if (em_hardware_init(adapter)) {
617		device_printf(dev, "Unable to initialize the hardware\n");
618		error = EIO;
619		goto err_hw_init;
620	}
621
622	/* Copy the permanent MAC address out of the EEPROM */
623	if (e1000_read_mac_addr(&adapter->hw) < 0) {
624		device_printf(dev, "EEPROM read error while reading MAC"
625		    " address\n");
626		error = EIO;
627		goto err_hw_init;
628	}
629
630	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
631		device_printf(dev, "Invalid MAC address\n");
632		error = EIO;
633		goto err_hw_init;
634	}
635
636	/* Setup OS specific network interface */
637	em_setup_interface(dev, adapter);
638
639	em_allocate_intr(adapter);
640
641	/* Initialize statistics */
642	em_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	em_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	switch (adapter->hw.mac.type) {
659
660	case e1000_82542:
661	case e1000_82543:
662		break;
663	case e1000_82546:
664	case e1000_82546_rev_3:
665	case e1000_82571:
666	case e1000_80003es2lan:
667		if (adapter->hw.bus.func == 1)
668			e1000_read_nvm(&adapter->hw,
669			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
670		else
671			e1000_read_nvm(&adapter->hw,
672			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
673		eeprom_data &= EM_EEPROM_APME;
674		break;
675	default:
676		/* APME bit in EEPROM is mapped to WUC.APME */
677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
678		    E1000_WUC_APME;
679		break;
680	}
681	if (eeprom_data)
682		adapter->wol = E1000_WUFC_MAG;
683	/*
684         * We have the eeprom settings, now apply the special cases
685         * where the eeprom may be wrong or the board won't support
686         * wake on lan on a particular port
687	 */
688	device_id = pci_get_device(dev);
689        switch (device_id) {
690	case E1000_DEV_ID_82546GB_PCIE:
691		adapter->wol = 0;
692		break;
693	case E1000_DEV_ID_82546EB_FIBER:
694	case E1000_DEV_ID_82546GB_FIBER:
695	case E1000_DEV_ID_82571EB_FIBER:
696		/* Wake events only supported on port A for dual fiber
697		 * regardless of eeprom setting */
698		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
699		    E1000_STATUS_FUNC_1)
700			adapter->wol = 0;
701		break;
702	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
706                /* if quad port adapter, disable WoL on all but port A */
707		if (global_quad_port_a != 0)
708			adapter->wol = 0;
709		/* Reset for multiple quad port adapters */
710		if (++global_quad_port_a == 4)
711			global_quad_port_a = 0;
712                break;
713	}
714
715	/* Do we need workaround for 82544 PCI-X adapter? */
716	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
717	    adapter->hw.mac.type == e1000_82544)
718		adapter->pcix_82544 = TRUE;
719	else
720		adapter->pcix_82544 = FALSE;
721
722	/* Tell the stack that the interface is not active */
723	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
724
725	INIT_DEBUGOUT("em_attach: end");
726
727	return (0);
728
729err_hw_init:
730	em_release_hw_control(adapter);
731	e1000_remove_device(&adapter->hw);
732	em_dma_free(adapter, &adapter->rxdma);
733err_rx_desc:
734	em_dma_free(adapter, &adapter->txdma);
735err_tx_desc:
736err_pci:
737	em_free_intr(adapter);
738	em_free_pci_resources(adapter);
739	EM_LOCK_DESTROY(adapter);
740
741	return (error);
742}
743
744/*********************************************************************
745 *  Device removal routine
746 *
747 *  The detach entry point is called when the driver is being removed.
748 *  This routine stops the adapter and deallocates all the resources
749 *  that were allocated for driver operation.
750 *
751 *  return 0 on success, positive on failure
752 *********************************************************************/
753
754static int
755em_detach(device_t dev)
756{
757	struct adapter	*adapter = device_get_softc(dev);
758	struct ifnet	*ifp = adapter->ifp;
759
760	INIT_DEBUGOUT("em_detach: begin");
761
762#ifdef DEVICE_POLLING
763	if (ifp->if_capenable & IFCAP_POLLING)
764		ether_poll_deregister(ifp);
765#endif
766
767	em_disable_intr(adapter);
768	em_free_intr(adapter);
769	EM_LOCK(adapter);
770	adapter->in_detach = 1;
771	em_stop(adapter);
772	e1000_phy_hw_reset(&adapter->hw);
773
774	em_release_manageability(adapter);
775
776	if (((adapter->hw.mac.type == e1000_82573) ||
777	    (adapter->hw.mac.type == e1000_ich8lan) ||
778	    (adapter->hw.mac.type == e1000_ich9lan)) &&
779	    e1000_check_mng_mode(&adapter->hw))
780		em_release_hw_control(adapter);
781
782	if (adapter->wol) {
783		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
784		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
785		em_enable_wakeup(dev);
786	}
787
788	EM_UNLOCK(adapter);
789	ether_ifdetach(adapter->ifp);
790
791	callout_drain(&adapter->timer);
792	callout_drain(&adapter->tx_fifo_timer);
793
794	em_free_pci_resources(adapter);
795	bus_generic_detach(dev);
796	if_free(ifp);
797
798	e1000_remove_device(&adapter->hw);
799	em_free_transmit_structures(adapter);
800	em_free_receive_structures(adapter);
801
802	/* Free Transmit Descriptor ring */
803	if (adapter->tx_desc_base) {
804		em_dma_free(adapter, &adapter->txdma);
805		adapter->tx_desc_base = NULL;
806	}
807
808	/* Free Receive Descriptor ring */
809	if (adapter->rx_desc_base) {
810		em_dma_free(adapter, &adapter->rxdma);
811		adapter->rx_desc_base = NULL;
812	}
813
814	EM_LOCK_DESTROY(adapter);
815
816	return (0);
817}
818
819/*********************************************************************
820 *
821 *  Shutdown entry point
822 *
823 **********************************************************************/
824
825static int
826em_shutdown(device_t dev)
827{
828	return em_suspend(dev);
829}
830
831/*
832 * Suspend/resume device methods.
833 */
834static int
835em_suspend(device_t dev)
836{
837	struct adapter *adapter = device_get_softc(dev);
838
839	EM_LOCK(adapter);
840	em_stop(adapter);
841
842        em_release_manageability(adapter);
843
844        if (((adapter->hw.mac.type == e1000_82573) ||
845            (adapter->hw.mac.type == e1000_ich8lan) ||
846            (adapter->hw.mac.type == e1000_ich9lan)) &&
847            e1000_check_mng_mode(&adapter->hw))
848                em_release_hw_control(adapter);
849
850        if (adapter->wol) {
851                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
852                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
853                em_enable_wakeup(dev);
854        }
855
856	EM_UNLOCK(adapter);
857
858	return bus_generic_suspend(dev);
859}
860
861static int
862em_resume(device_t dev)
863{
864	struct adapter *adapter = device_get_softc(dev);
865	struct ifnet *ifp = adapter->ifp;
866
867	EM_LOCK(adapter);
868	em_init_locked(adapter);
869	em_init_manageability(adapter);
870
871	if ((ifp->if_flags & IFF_UP) &&
872	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
873		em_start_locked(ifp);
874
875	EM_UNLOCK(adapter);
876
877	return bus_generic_resume(dev);
878}
879
880
881/*********************************************************************
882 *  Transmit entry point
883 *
884 *  em_start is called by the stack to initiate a transmit.
885 *  The driver will remain in this routine as long as there are
886 *  packets to transmit and transmit resources are available.
887 *  In case resources are not available stack is notified and
888 *  the packet is requeued.
889 **********************************************************************/
890
891static void
892em_start_locked(struct ifnet *ifp)
893{
894	struct adapter	*adapter = ifp->if_softc;
895	struct mbuf	*m_head;
896
897	EM_LOCK_ASSERT(adapter);
898
899	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
900	    IFF_DRV_RUNNING)
901		return;
902	if (!adapter->link_active)
903		return;
904
905	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
906
907		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
908		if (m_head == NULL)
909			break;
910		/*
911		 *  Encapsulation can modify our pointer, and or make it
912		 *  NULL on failure.  In that event, we can't requeue.
913		 *
914		 *  We now use a pointer to accomodate legacy and
915		 *  advanced transmit functions.
916		 */
917		if (adapter->em_xmit(adapter, &m_head)) {
918			if (m_head == NULL)
919				break;
920			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
921			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
922			break;
923		}
924
925		/* Send a copy of the frame to the BPF listener */
926		ETHER_BPF_MTAP(ifp, m_head);
927
928		/* Set timeout in case hardware has problems transmitting. */
929		adapter->watchdog_timer = EM_TX_TIMEOUT;
930	}
931}
932
933static void
934em_start(struct ifnet *ifp)
935{
936	struct adapter *adapter = ifp->if_softc;
937
938	EM_LOCK(adapter);
939	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
940		em_start_locked(ifp);
941	EM_UNLOCK(adapter);
942}
943
944/*********************************************************************
945 *  Ioctl entry point
946 *
947 *  em_ioctl is called when the user wants to configure the
948 *  interface.
949 *
950 *  return 0 on success, positive on failure
951 **********************************************************************/
952
953static int
954em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
955{
956	struct adapter	*adapter = ifp->if_softc;
957	struct ifreq *ifr = (struct ifreq *)data;
958	struct ifaddr *ifa = (struct ifaddr *)data;
959	int error = 0;
960
961	if (adapter->in_detach)
962		return (error);
963
964	switch (command) {
965	case SIOCSIFADDR:
966		if (ifa->ifa_addr->sa_family == AF_INET) {
967			/*
968			 * XXX
969			 * Since resetting hardware takes a very long time
970			 * and results in link renegotiation we only
971			 * initialize the hardware only when it is absolutely
972			 * required.
973			 */
974			ifp->if_flags |= IFF_UP;
975			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
976				EM_LOCK(adapter);
977				em_init_locked(adapter);
978				EM_UNLOCK(adapter);
979			}
980			arp_ifinit(ifp, ifa);
981		} else
982			error = ether_ioctl(ifp, command, data);
983		break;
984	case SIOCSIFMTU:
985	    {
986		int max_frame_size;
987		uint16_t eeprom_data = 0;
988
989		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
990
991		EM_LOCK(adapter);
992		switch (adapter->hw.mac.type) {
993		case e1000_82573:
994			/*
995			 * 82573 only supports jumbo frames
996			 * if ASPM is disabled.
997			 */
998			e1000_read_nvm(&adapter->hw,
999			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1000			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1001				max_frame_size = ETHER_MAX_LEN;
1002				break;
1003			}
1004			/* Allow Jumbo frames - fall thru */
1005		case e1000_82571:
1006		case e1000_82572:
1007		case e1000_ich9lan:
1008		case e1000_82575:
1009		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1010			max_frame_size = 9234;
1011			break;
1012			/* Adapters that do not support jumbo frames */
1013		case e1000_82542:
1014		case e1000_ich8lan:
1015			max_frame_size = ETHER_MAX_LEN;
1016			break;
1017		default:
1018			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1019		}
1020		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1021		    ETHER_CRC_LEN) {
1022			EM_UNLOCK(adapter);
1023			error = EINVAL;
1024			break;
1025		}
1026
1027		ifp->if_mtu = ifr->ifr_mtu;
1028		adapter->hw.mac.max_frame_size =
1029		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1030		em_init_locked(adapter);
1031		EM_UNLOCK(adapter);
1032		break;
1033	    }
1034	case SIOCSIFFLAGS:
1035		IOCTL_DEBUGOUT("ioctl rcv'd:\
1036		    SIOCSIFFLAGS (Set Interface Flags)");
1037		EM_LOCK(adapter);
1038		if (ifp->if_flags & IFF_UP) {
1039			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1040				if ((ifp->if_flags ^ adapter->if_flags) &
1041				    IFF_PROMISC) {
1042					em_disable_promisc(adapter);
1043					em_set_promisc(adapter);
1044				}
1045			} else
1046				em_init_locked(adapter);
1047		} else
1048			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1049				em_stop(adapter);
1050		adapter->if_flags = ifp->if_flags;
1051		EM_UNLOCK(adapter);
1052		break;
1053	case SIOCADDMULTI:
1054	case SIOCDELMULTI:
1055		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1056		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1057			EM_LOCK(adapter);
1058			em_disable_intr(adapter);
1059			em_set_multi(adapter);
1060			if (adapter->hw.mac.type == e1000_82542 &&
1061	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1062				em_initialize_receive_unit(adapter);
1063			}
1064#ifdef DEVICE_POLLING
1065			if (!(ifp->if_capenable & IFCAP_POLLING))
1066#endif
1067				em_enable_intr(adapter);
1068			EM_UNLOCK(adapter);
1069		}
1070		break;
1071	case SIOCSIFMEDIA:
1072		/* Check SOL/IDER usage */
1073		EM_LOCK(adapter);
1074		if (e1000_check_reset_block(&adapter->hw)) {
1075			EM_UNLOCK(adapter);
1076			device_printf(adapter->dev, "Media change is"
1077			    " blocked due to SOL/IDER session.\n");
1078			break;
1079		}
1080		EM_UNLOCK(adapter);
1081	case SIOCGIFMEDIA:
1082		IOCTL_DEBUGOUT("ioctl rcv'd: \
1083		    SIOCxIFMEDIA (Get/Set Interface Media)");
1084		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1085		break;
1086	case SIOCSIFCAP:
1087	    {
1088		int mask, reinit;
1089
1090		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1091		reinit = 0;
1092		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1093#ifdef DEVICE_POLLING
1094		if (mask & IFCAP_POLLING) {
1095			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1096				error = ether_poll_register(em_poll, ifp);
1097				if (error)
1098					return (error);
1099				EM_LOCK(adapter);
1100				em_disable_intr(adapter);
1101				ifp->if_capenable |= IFCAP_POLLING;
1102				EM_UNLOCK(adapter);
1103			} else {
1104				error = ether_poll_deregister(ifp);
1105				/* Enable interrupt even in error case */
1106				EM_LOCK(adapter);
1107				em_enable_intr(adapter);
1108				ifp->if_capenable &= ~IFCAP_POLLING;
1109				EM_UNLOCK(adapter);
1110			}
1111		}
1112#endif
1113		if (mask & IFCAP_HWCSUM) {
1114			ifp->if_capenable ^= IFCAP_HWCSUM;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_TSO4) {
1118			ifp->if_capenable ^= IFCAP_TSO4;
1119			reinit = 1;
1120		}
1121		if (mask & IFCAP_VLAN_HWTAGGING) {
1122			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1123			reinit = 1;
1124		}
1125		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1126			em_init(adapter);
1127		VLAN_CAPABILITIES(ifp);
1128		break;
1129	    }
1130	default:
1131		error = ether_ioctl(ifp, command, data);
1132		break;
1133	}
1134
1135	return (error);
1136}
1137
1138/*********************************************************************
1139 *  Watchdog timer:
1140 *
1141 *  This routine is called from the local timer every second.
1142 *  As long as transmit descriptors are being cleaned the value
1143 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1144 *  and we then reset the device.
1145 *
1146 **********************************************************************/
1147
1148static void
1149em_watchdog(struct adapter *adapter)
1150{
1151
1152	EM_LOCK_ASSERT(adapter);
1153
1154	/*
1155	** The timer is set to 5 every time start queues a packet.
1156	** Then txeof keeps resetting to 5 as long as it cleans at
1157	** least one descriptor.
1158	** Finally, anytime all descriptors are clean the timer is
1159	** set to 0.
1160	*/
1161	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1162		return;
1163
1164	/* If we are in this routine because of pause frames, then
1165	 * don't reset the hardware.
1166	 */
1167	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1168	    E1000_STATUS_TXOFF) {
1169		adapter->watchdog_timer = EM_TX_TIMEOUT;
1170		return;
1171	}
1172
1173	if (e1000_check_for_link(&adapter->hw) == 0)
1174		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1175	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1176	adapter->watchdog_events++;
1177
1178	em_init_locked(adapter);
1179}
1180
1181/*********************************************************************
1182 *  Init entry point
1183 *
1184 *  This routine is used in two ways. It is used by the stack as
1185 *  init entry point in network interface structure. It is also used
1186 *  by the driver as a hw/sw initialization routine to get to a
1187 *  consistent state.
1188 *
1189 *  return 0 on success, positive on failure
1190 **********************************************************************/
1191
1192static void
1193em_init_locked(struct adapter *adapter)
1194{
1195	struct ifnet	*ifp = adapter->ifp;
1196	device_t	dev = adapter->dev;
1197	uint32_t	pba;
1198
1199	INIT_DEBUGOUT("em_init: begin");
1200
1201	EM_LOCK_ASSERT(adapter);
1202
1203	em_stop(adapter);
1204
1205	/*
1206	 * Packet Buffer Allocation (PBA)
1207	 * Writing PBA sets the receive portion of the buffer
1208	 * the remainder is used for the transmit buffer.
1209	 *
1210	 * Devices before the 82547 had a Packet Buffer of 64K.
1211	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1212	 * After the 82547 the buffer was reduced to 40K.
1213	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1214	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1215	 */
1216	switch (adapter->hw.mac.type) {
1217	case e1000_82547:
1218	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1219		if (adapter->hw.mac.max_frame_size > 8192)
1220			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1221		else
1222			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1223		adapter->tx_fifo_head = 0;
1224		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1225		adapter->tx_fifo_size =
1226		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1227		break;
1228	/* Total Packet Buffer on these is 48K */
1229	case e1000_82571:
1230	case e1000_82572:
1231	case e1000_82575:
1232	case e1000_80003es2lan:
1233			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1234		break;
1235	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1236			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1237		break;
1238	case e1000_ich9lan:
1239#define E1000_PBA_10K	0x000A
1240		pba = E1000_PBA_10K;
1241		break;
1242	case e1000_ich8lan:
1243		pba = E1000_PBA_8K;
1244		break;
1245	default:
1246		/* Devices before 82547 had a Packet Buffer of 64K.   */
1247		if (adapter->hw.mac.max_frame_size > 8192)
1248			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1249		else
1250			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1251	}
1252
1253	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1254	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1255
1256	/* Get the latest mac address, User can use a LAA */
1257        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1258              ETHER_ADDR_LEN);
1259
1260	/* Put the address into the Receive Address Array */
1261	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1262
1263	/*
1264	 * With 82571 controllers, LAA may be overwritten
1265	 * due to controller reset from the other port.
1266	 */
1267	if (adapter->hw.mac.type == e1000_82571)
1268                e1000_set_laa_state_82571(&adapter->hw, TRUE);
1269
1270	/* Initialize the hardware */
1271	if (em_hardware_init(adapter)) {
1272		device_printf(dev, "Unable to initialize the hardware\n");
1273		return;
1274	}
1275	em_update_link_status(adapter);
1276
1277	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1278		em_enable_vlans(adapter);
1279
1280	/* Set hardware offload abilities */
1281	ifp->if_hwassist = 0;
1282	if (adapter->hw.mac.type >= e1000_82543) {
1283		if (ifp->if_capenable & IFCAP_TXCSUM)
1284			ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1285		if (ifp->if_capenable & IFCAP_TSO4)
1286			ifp->if_hwassist |= CSUM_TSO;
1287	}
1288
1289	/* Configure for OS presence */
1290	em_init_manageability(adapter);
1291
1292	/* Prepare transmit descriptors and buffers */
1293	if (em_setup_transmit_structures(adapter)) {
1294		device_printf(dev, "Could not setup transmit structures\n");
1295		em_stop(adapter);
1296		return;
1297	}
1298	em_initialize_transmit_unit(adapter);
1299
1300	/* Setup Multicast table */
1301	em_set_multi(adapter);
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Don't lose promiscuous settings */
1312	em_set_promisc(adapter);
1313
1314	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1315	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1316
1317	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1318	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1319
1320#ifdef DEVICE_POLLING
1321	/*
1322	 * Only enable interrupts if we are not polling, make sure
1323	 * they are off otherwise.
1324	 */
1325	if (ifp->if_capenable & IFCAP_POLLING)
1326		em_disable_intr(adapter);
1327	else
1328#endif /* DEVICE_POLLING */
1329		em_enable_intr(adapter);
1330
1331	/* Don't reset the phy next time init gets called */
1332	adapter->hw.phy.reset_disable = TRUE;
1333}
1334
1335static void
1336em_init(void *arg)
1337{
1338	struct adapter *adapter = arg;
1339
1340	EM_LOCK(adapter);
1341	em_init_locked(adapter);
1342	EM_UNLOCK(adapter);
1343}
1344
1345
1346#ifdef DEVICE_POLLING
1347/*********************************************************************
1348 *
1349 *  Legacy polling routine
1350 *
1351 *********************************************************************/
1352static void
1353em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1354{
1355	struct adapter *adapter = ifp->if_softc;
1356	uint32_t reg_icr;
1357
1358	EM_LOCK(adapter);
1359	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1360		EM_UNLOCK(adapter);
1361		return;
1362	}
1363
1364	if (cmd == POLL_AND_CHECK_STATUS) {
1365		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1366		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1367			callout_stop(&adapter->timer);
1368			adapter->hw.mac.get_link_status = 1;
1369			e1000_check_for_link(&adapter->hw);
1370			em_update_link_status(adapter);
1371			callout_reset(&adapter->timer, hz,
1372			    em_local_timer, adapter);
1373		}
1374	}
1375	em_rxeof(adapter, count);
1376	em_txeof(adapter);
1377
1378	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1379		em_start_locked(ifp);
1380	EM_UNLOCK(adapter);
1381}
1382
1383/*********************************************************************
1384 *
1385 *  Legacy Interrupt Service routine
1386 *
1387 *********************************************************************/
1388
1389static void
1390em_intr(void *arg)
1391{
1392	struct adapter	*adapter = arg;
1393	struct ifnet	*ifp;
1394	uint32_t	reg_icr;
1395
1396	EM_LOCK(adapter);
1397	ifp = adapter->ifp;
1398
1399	if (ifp->if_capenable & IFCAP_POLLING) {
1400		EM_UNLOCK(adapter);
1401		return;
1402	}
1403
1404	for (;;) {
1405		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1406
1407		if (adapter->hw.mac.type >= e1000_82571 &&
1408	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1409			break;
1410		else if (reg_icr == 0)
1411			break;
1412
1413		/*
1414		 * XXX: some laptops trigger several spurious interrupts
1415		 * on em(4) when in the resume cycle. The ICR register
1416		 * reports all-ones value in this case. Processing such
1417		 * interrupts would lead to a freeze. I don't know why.
1418		 */
1419		if (reg_icr == 0xffffffff)
1420			break;
1421
1422		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1423			em_rxeof(adapter, -1);
1424			em_txeof(adapter);
1425		}
1426
1427		/* Link status change */
1428		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1429			callout_stop(&adapter->timer);
1430			adapter->hw.mac.get_link_status = 1;
1431			e1000_check_for_link(&adapter->hw);
1432			em_update_link_status(adapter);
1433			callout_reset(&adapter->timer, hz,
1434			    em_local_timer, adapter);
1435		}
1436
1437		if (reg_icr & E1000_ICR_RXO)
1438			adapter->rx_overruns++;
1439	}
1440
1441	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1442	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443		em_start_locked(ifp);
1444	EM_UNLOCK(adapter);
1445}
1446
1447#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1448
1449static void
1450em_handle_link(void *context, int pending)
1451{
1452	struct adapter	*adapter = context;
1453	struct ifnet *ifp;
1454
1455	ifp = adapter->ifp;
1456
1457	EM_LOCK(adapter);
1458	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1459		EM_UNLOCK(adapter);
1460		return;
1461	}
1462
1463	callout_stop(&adapter->timer);
1464	adapter->hw.mac.get_link_status = 1;
1465	e1000_check_for_link(&adapter->hw);
1466	em_update_link_status(adapter);
1467	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1468	EM_UNLOCK(adapter);
1469}
1470
1471static void
1472em_handle_rxtx(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp;
1476
1477	NET_LOCK_GIANT();
1478	ifp = adapter->ifp;
1479
1480	/*
1481	 * TODO:
1482	 * It should be possible to run the tx clean loop without the lock.
1483	 */
1484	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1485		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1486			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1487		EM_LOCK(adapter);
1488		em_txeof(adapter);
1489
1490		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491			em_start_locked(ifp);
1492		EM_UNLOCK(adapter);
1493	}
1494
1495	em_enable_intr(adapter);
1496	NET_UNLOCK_GIANT();
1497}
1498
1499/*********************************************************************
1500 *
1501 *  Fast Interrupt Service routine
1502 *
1503 *********************************************************************/
1504static int
1505em_intr_fast(void *arg)
1506{
1507	struct adapter	*adapter = arg;
1508	struct ifnet	*ifp;
1509	uint32_t	reg_icr;
1510
1511	ifp = adapter->ifp;
1512
1513	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514
1515	/* Hot eject?  */
1516	if (reg_icr == 0xffffffff)
1517		return (FILTER_STRAY);
1518
1519	/* Definitely not our interrupt.  */
1520	if (reg_icr == 0x0)
1521		return (FILTER_STRAY);
1522
1523	/*
1524	 * Starting with the 82571 chip, bit 31 should be used to
1525	 * determine whether the interrupt belongs to us.
1526	 */
1527	if (adapter->hw.mac.type >= e1000_82571 &&
1528	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1529		return (FILTER_STRAY);
1530
1531	/*
1532	 * Mask interrupts until the taskqueue is finished running.  This is
1533	 * cheap, just assume that it is needed.  This also works around the
1534	 * MSI message reordering errata on certain systems.
1535	 */
1536	em_disable_intr(adapter);
1537	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1538
1539	/* Link status change */
1540	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1541		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1542
1543	if (reg_icr & E1000_ICR_RXO)
1544		adapter->rx_overruns++;
1545	return (FILTER_HANDLED);
1546}
1547#endif /* ! DEVICE_POLLING */
1548
1549/*********************************************************************
1550 *
1551 *  Media Ioctl callback
1552 *
1553 *  This routine is called whenever the user queries the status of
1554 *  the interface using ifconfig.
1555 *
1556 **********************************************************************/
1557static void
1558em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1559{
1560	struct adapter *adapter = ifp->if_softc;
1561	u_char fiber_type = IFM_1000_SX;
1562
1563	INIT_DEBUGOUT("em_media_status: begin");
1564
1565	EM_LOCK(adapter);
1566	e1000_check_for_link(&adapter->hw);
1567	em_update_link_status(adapter);
1568
1569	ifmr->ifm_status = IFM_AVALID;
1570	ifmr->ifm_active = IFM_ETHER;
1571
1572	if (!adapter->link_active) {
1573		EM_UNLOCK(adapter);
1574		return;
1575	}
1576
1577	ifmr->ifm_status |= IFM_ACTIVE;
1578
1579	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1580	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1581		if (adapter->hw.mac.type == e1000_82545)
1582			fiber_type = IFM_1000_LX;
1583		ifmr->ifm_active |= fiber_type | IFM_FDX;
1584	} else {
1585		switch (adapter->link_speed) {
1586		case 10:
1587			ifmr->ifm_active |= IFM_10_T;
1588			break;
1589		case 100:
1590			ifmr->ifm_active |= IFM_100_TX;
1591			break;
1592		case 1000:
1593			ifmr->ifm_active |= IFM_1000_T;
1594			break;
1595		}
1596		if (adapter->link_duplex == FULL_DUPLEX)
1597			ifmr->ifm_active |= IFM_FDX;
1598		else
1599			ifmr->ifm_active |= IFM_HDX;
1600	}
1601	EM_UNLOCK(adapter);
1602}
1603
1604/*********************************************************************
1605 *
1606 *  Media Ioctl callback
1607 *
1608 *  This routine is called when the user changes speed/duplex using
1609 *  media/mediopt option with ifconfig.
1610 *
1611 **********************************************************************/
1612static int
1613em_media_change(struct ifnet *ifp)
1614{
1615	struct adapter *adapter = ifp->if_softc;
1616	struct ifmedia  *ifm = &adapter->media;
1617
1618	INIT_DEBUGOUT("em_media_change: begin");
1619
1620	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1621		return (EINVAL);
1622
1623	EM_LOCK(adapter);
1624	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1625	case IFM_AUTO:
1626		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1627		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1628		break;
1629	case IFM_1000_LX:
1630	case IFM_1000_SX:
1631	case IFM_1000_T:
1632		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1633		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1634		break;
1635	case IFM_100_TX:
1636		adapter->hw.mac.autoneg = FALSE;
1637		adapter->hw.phy.autoneg_advertised = 0;
1638		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1639			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1640		else
1641			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1642		break;
1643	case IFM_10_T:
1644		adapter->hw.mac.autoneg = FALSE;
1645		adapter->hw.phy.autoneg_advertised = 0;
1646		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1647			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1648		else
1649			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1650		break;
1651	default:
1652		device_printf(adapter->dev, "Unsupported media type\n");
1653	}
1654
1655	/* As the speed/duplex settings my have changed we need to
1656	 * reset the PHY.
1657	 */
1658	adapter->hw.phy.reset_disable = FALSE;
1659
1660	em_init_locked(adapter);
1661	EM_UNLOCK(adapter);
1662
1663	return (0);
1664}
1665
1666/*********************************************************************
1667 *
1668 *  This routine maps the mbufs to tx descriptors.
1669 *
1670 *  return 0 on success, positive on failure
1671 **********************************************************************/
1672
1673static int
1674em_encap(struct adapter *adapter, struct mbuf **m_headp)
1675{
1676	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1677	bus_dmamap_t		map;
1678	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1679	struct e1000_tx_desc	*ctxd = NULL;
1680	struct mbuf		*m_head;
1681	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1682	int			nsegs, i, j, first, last = 0;
1683	int			error, do_tso, tso_desc = 0;
1684
1685	m_head = *m_headp;
1686	txd_upper = txd_lower = txd_used = txd_saved = 0;
1687
1688	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1689
1690        /*
1691         * Force a cleanup if number of TX descriptors
1692         * available hits the threshold
1693         */
1694	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1695		em_txeof(adapter);
1696		/* Now do we at least have a minimal? */
1697		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1698			adapter->no_tx_desc_avail1++;
1699			return (ENOBUFS);
1700		}
1701	}
1702
1703
1704	/*
1705	 * TSO workaround:
1706	 *  If an mbuf is only header we need
1707	 *     to pull 4 bytes of data into it.
1708	 */
1709	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1710		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1711		*m_headp = m_head;
1712		if (m_head == NULL)
1713			return (ENOBUFS);
1714	}
1715
1716	/*
1717	 * Map the packet for DMA
1718	 *
1719	 * Capture the first descriptor index,
1720	 * this descriptor will have the index
1721	 * of the EOP which is the only one that
1722	 * now gets a DONE bit writeback.
1723	 */
1724	first = adapter->next_avail_tx_desc;
1725	tx_buffer = &adapter->tx_buffer_area[first];
1726	tx_buffer_mapped = tx_buffer;
1727	map = tx_buffer->map;
1728
1729	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1730	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1731
1732	/*
1733	 * There are two types of errors we can (try) to handle:
1734	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1735	 *   out of segments.  Defragment the mbuf chain and try again.
1736	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1737	 *   at this point in time.  Defer sending and try again later.
1738	 * All other errors, in particular EINVAL, are fatal and prevent the
1739	 * mbuf chain from ever going through.  Drop it and report error.
1740	 */
1741	if (error == EFBIG) {
1742		struct mbuf *m;
1743
1744		m = m_defrag(*m_headp, M_DONTWAIT);
1745		if (m == NULL) {
1746			adapter->mbuf_alloc_failed++;
1747			m_freem(*m_headp);
1748			*m_headp = NULL;
1749			return (ENOBUFS);
1750		}
1751		*m_headp = m;
1752
1753		/* Try it again */
1754		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1755		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1756
1757		if (error == ENOMEM) {
1758			adapter->no_tx_dma_setup++;
1759			return (error);
1760		} else if (error != 0) {
1761			adapter->no_tx_dma_setup++;
1762			m_freem(*m_headp);
1763			*m_headp = NULL;
1764			return (error);
1765		}
1766	} else if (error == ENOMEM) {
1767		adapter->no_tx_dma_setup++;
1768		return (error);
1769	} else if (error != 0) {
1770		adapter->no_tx_dma_setup++;
1771		m_freem(*m_headp);
1772		*m_headp = NULL;
1773		return (error);
1774	}
1775
1776	/*
1777	 * TSO Hardware workaround, if this packet is not
1778	 * TSO, and is only a single descriptor long, and
1779	 * it follows a TSO burst, then we need to add a
1780	 * sentinel descriptor to prevent premature writeback.
1781	 */
1782	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1783		if (nsegs == 1)
1784			tso_desc = TRUE;
1785		adapter->tx_tso = FALSE;
1786	}
1787
1788        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1789                adapter->no_tx_desc_avail2++;
1790		bus_dmamap_unload(adapter->txtag, map);
1791		return (ENOBUFS);
1792        }
1793	m_head = *m_headp;
1794
1795	/* Do hardware assists */
1796	if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower))
1797		/* we need to make a final sentinel transmit desc */
1798		tso_desc = TRUE;
1799	else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1800		em_transmit_checksum_setup(adapter,  m_head,
1801		    &txd_upper, &txd_lower);
1802
1803	i = adapter->next_avail_tx_desc;
1804	if (adapter->pcix_82544)
1805		txd_saved = i;
1806
1807	/* Set up our transmit descriptors */
1808	for (j = 0; j < nsegs; j++) {
1809		bus_size_t seg_len;
1810		bus_addr_t seg_addr;
1811		/* If adapter is 82544 and on PCIX bus */
1812		if(adapter->pcix_82544) {
1813			DESC_ARRAY	desc_array;
1814			uint32_t	array_elements, counter;
1815			/*
1816			 * Check the Address and Length combination and
1817			 * split the data accordingly
1818			 */
1819			array_elements = em_fill_descriptors(segs[j].ds_addr,
1820			    segs[j].ds_len, &desc_array);
1821			for (counter = 0; counter < array_elements; counter++) {
1822				if (txd_used == adapter->num_tx_desc_avail) {
1823					adapter->next_avail_tx_desc = txd_saved;
1824					adapter->no_tx_desc_avail2++;
1825					bus_dmamap_unload(adapter->txtag, map);
1826					return (ENOBUFS);
1827				}
1828				tx_buffer = &adapter->tx_buffer_area[i];
1829				ctxd = &adapter->tx_desc_base[i];
1830				ctxd->buffer_addr = htole64(
1831				    desc_array.descriptor[counter].address);
1832				ctxd->lower.data = htole32(
1833				    (adapter->txd_cmd | txd_lower | (uint16_t)
1834				    desc_array.descriptor[counter].length));
1835				ctxd->upper.data =
1836				    htole32((txd_upper));
1837				last = i;
1838				if (++i == adapter->num_tx_desc)
1839                                         i = 0;
1840				tx_buffer->m_head = NULL;
1841				tx_buffer->next_eop = -1;
1842				txd_used++;
1843                        }
1844		} else {
1845			tx_buffer = &adapter->tx_buffer_area[i];
1846			ctxd = &adapter->tx_desc_base[i];
1847			seg_addr = segs[j].ds_addr;
1848			seg_len  = segs[j].ds_len;
1849			/*
1850			** TSO Workaround:
1851			** If this is the last descriptor, we want to
1852			** split it so we have a small final sentinel
1853			*/
1854			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1855				seg_len -= 4;
1856				ctxd->buffer_addr = htole64(seg_addr);
1857				ctxd->lower.data = htole32(
1858				adapter->txd_cmd | txd_lower | seg_len);
1859				ctxd->upper.data =
1860				    htole32(txd_upper);
1861				if (++i == adapter->num_tx_desc)
1862					i = 0;
1863				/* Now make the sentinel */
1864				++txd_used; /* using an extra txd */
1865				ctxd = &adapter->tx_desc_base[i];
1866				tx_buffer = &adapter->tx_buffer_area[i];
1867				ctxd->buffer_addr =
1868				    htole64(seg_addr + seg_len);
1869				ctxd->lower.data = htole32(
1870				adapter->txd_cmd | txd_lower | 4);
1871				ctxd->upper.data =
1872				    htole32(txd_upper);
1873				last = i;
1874				if (++i == adapter->num_tx_desc)
1875					i = 0;
1876			} else {
1877				ctxd->buffer_addr = htole64(seg_addr);
1878				ctxd->lower.data = htole32(
1879				adapter->txd_cmd | txd_lower | seg_len);
1880				ctxd->upper.data =
1881				    htole32(txd_upper);
1882				last = i;
1883				if (++i == adapter->num_tx_desc)
1884					i = 0;
1885			}
1886			tx_buffer->m_head = NULL;
1887			tx_buffer->next_eop = -1;
1888		}
1889	}
1890
1891	adapter->next_avail_tx_desc = i;
1892	if (adapter->pcix_82544)
1893		adapter->num_tx_desc_avail -= txd_used;
1894	else {
1895		adapter->num_tx_desc_avail -= nsegs;
1896		if (tso_desc) /* TSO used an extra for sentinel */
1897			adapter->num_tx_desc_avail -= txd_used;
1898	}
1899
1900	if (m_head->m_flags & M_VLANTAG) {
1901		/* Set the vlan id. */
1902		ctxd->upper.fields.special =
1903		    htole16(m_head->m_pkthdr.ether_vtag);
1904                /* Tell hardware to add tag */
1905                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1906        }
1907
1908        tx_buffer->m_head = m_head;
1909	tx_buffer_mapped->map = tx_buffer->map;
1910	tx_buffer->map = map;
1911        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1912
1913        /*
1914         * Last Descriptor of Packet
1915	 * needs End Of Packet (EOP)
1916	 * and Report Status (RS)
1917         */
1918        ctxd->lower.data |=
1919	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1920	/*
1921	 * Keep track in the first buffer which
1922	 * descriptor will be written back
1923	 */
1924	tx_buffer = &adapter->tx_buffer_area[first];
1925	tx_buffer->next_eop = last;
1926
1927	/*
1928	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1929	 * that this frame is available to transmit.
1930	 */
1931	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1932	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1933	if (adapter->hw.mac.type == e1000_82547 &&
1934	    adapter->link_duplex == HALF_DUPLEX)
1935		em_82547_move_tail(adapter);
1936	else {
1937		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1938		if (adapter->hw.mac.type == e1000_82547)
1939			em_82547_update_fifo_head(adapter,
1940			    m_head->m_pkthdr.len);
1941	}
1942
1943	return (0);
1944}
1945
1946/*********************************************************************
1947 *
1948 *  This routine maps the mbufs to Advanced TX descriptors.
1949 *  used by the 82575 adapter. It also needs no workarounds.
1950 *
1951 **********************************************************************/
1952
1953static int
1954em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1955{
1956	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1957	bus_dmamap_t		map;
1958	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1959	union e1000_adv_tx_desc	*txd = NULL;
1960	struct mbuf		*m_head;
1961	u32			olinfo_status = 0, cmd_type_len = 0;
1962	u32			paylen = 0;
1963	int			nsegs, i, j, error, first, last = 0;
1964
1965	m_head = *m_headp;
1966
1967
1968	/* Set basic descriptor constants */
1969	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1970	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1971
1972        /*
1973         * Force a cleanup if number of TX descriptors
1974         * available hits the threshold
1975         */
1976	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1977		em_txeof(adapter);
1978		/* Now do we at least have a minimal? */
1979		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1980			adapter->no_tx_desc_avail1++;
1981			return (ENOBUFS);
1982		}
1983	}
1984
1985	/*
1986         * Map the packet for DMA.
1987	 *
1988	 * Capture the first descriptor index,
1989	 * this descriptor will have the index
1990	 * of the EOP which is the only one that
1991	 * now gets a DONE bit writeback.
1992	 */
1993	first = adapter->next_avail_tx_desc;
1994	tx_buffer = &adapter->tx_buffer_area[first];
1995	tx_buffer_mapped = tx_buffer;
1996	map = tx_buffer->map;
1997
1998	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1999	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2000
2001	if (error == EFBIG) {
2002		struct mbuf *m;
2003
2004		m = m_defrag(*m_headp, M_DONTWAIT);
2005		if (m == NULL) {
2006			adapter->mbuf_alloc_failed++;
2007			m_freem(*m_headp);
2008			*m_headp = NULL;
2009			return (ENOBUFS);
2010		}
2011		*m_headp = m;
2012
2013		/* Try it again */
2014		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2015		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2016
2017		if (error == ENOMEM) {
2018			adapter->no_tx_dma_setup++;
2019			return (error);
2020		} else if (error != 0) {
2021			adapter->no_tx_dma_setup++;
2022			m_freem(*m_headp);
2023			*m_headp = NULL;
2024			return (error);
2025		}
2026	} else if (error == ENOMEM) {
2027		adapter->no_tx_dma_setup++;
2028		return (error);
2029	} else if (error != 0) {
2030		adapter->no_tx_dma_setup++;
2031		m_freem(*m_headp);
2032		*m_headp = NULL;
2033		return (error);
2034	}
2035
2036	/* Check again to be sure we have enough descriptors */
2037        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2038                adapter->no_tx_desc_avail2++;
2039		bus_dmamap_unload(adapter->txtag, map);
2040		return (ENOBUFS);
2041        }
2042	m_head = *m_headp;
2043
2044        /*
2045         * Set up the context descriptor:
2046         * used when any hardware offload is done.
2047	 * This includes CSUM, VLAN, and TSO. It
2048	 * will use the first descriptor.
2049         */
2050	/* First try TSO */
2051	if (em_tso_adv_setup(adapter, m_head, &paylen)) {
2052		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2053		olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2054		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2055		olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2056	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
2057		if (em_tx_adv_ctx_setup(adapter, m_head))
2058			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2059	}
2060
2061	/* Set up our transmit descriptors */
2062	i = adapter->next_avail_tx_desc;
2063	for (j = 0; j < nsegs; j++) {
2064		bus_size_t seg_len;
2065		bus_addr_t seg_addr;
2066
2067		tx_buffer = &adapter->tx_buffer_area[i];
2068		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2069		seg_addr = segs[j].ds_addr;
2070		seg_len  = segs[j].ds_len;
2071
2072		txd->read.buffer_addr = htole64(seg_addr);
2073		txd->read.cmd_type_len = htole32(
2074		    adapter->txd_cmd | cmd_type_len | seg_len);
2075		txd->read.olinfo_status = htole32(olinfo_status);
2076		last = i;
2077		if (++i == adapter->num_tx_desc)
2078			i = 0;
2079		tx_buffer->m_head = NULL;
2080		tx_buffer->next_eop = -1;
2081	}
2082
2083	adapter->next_avail_tx_desc = i;
2084	adapter->num_tx_desc_avail -= nsegs;
2085
2086        tx_buffer->m_head = m_head;
2087	tx_buffer_mapped->map = tx_buffer->map;
2088	tx_buffer->map = map;
2089        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2090
2091        /*
2092         * Last Descriptor of Packet
2093	 * needs End Of Packet (EOP)
2094	 * and Report Status (RS)
2095         */
2096        txd->read.cmd_type_len |=
2097	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2098	/*
2099	 * Keep track in the first buffer which
2100	 * descriptor will be written back
2101	 */
2102	tx_buffer = &adapter->tx_buffer_area[first];
2103	tx_buffer->next_eop = last;
2104
2105	/*
2106	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2107	 * that this frame is available to transmit.
2108	 */
2109	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2110	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2111	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2112
2113	return (0);
2114
2115}
2116
2117/*********************************************************************
2118 *
2119 * 82547 workaround to avoid controller hang in half-duplex environment.
2120 * The workaround is to avoid queuing a large packet that would span
2121 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2122 * in this case. We do that only when FIFO is quiescent.
2123 *
2124 **********************************************************************/
2125static void
2126em_82547_move_tail(void *arg)
2127{
2128	struct adapter *adapter = arg;
2129	uint16_t hw_tdt;
2130	uint16_t sw_tdt;
2131	struct e1000_tx_desc *tx_desc;
2132	uint16_t length = 0;
2133	boolean_t eop = 0;
2134
2135	EM_LOCK_ASSERT(adapter);
2136
2137	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2138	sw_tdt = adapter->next_avail_tx_desc;
2139
2140	while (hw_tdt != sw_tdt) {
2141		tx_desc = &adapter->tx_desc_base[hw_tdt];
2142		length += tx_desc->lower.flags.length;
2143		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2144		if (++hw_tdt == adapter->num_tx_desc)
2145			hw_tdt = 0;
2146
2147		if (eop) {
2148			if (em_82547_fifo_workaround(adapter, length)) {
2149				adapter->tx_fifo_wrk_cnt++;
2150				callout_reset(&adapter->tx_fifo_timer, 1,
2151					em_82547_move_tail, adapter);
2152				break;
2153			}
2154			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2155			em_82547_update_fifo_head(adapter, length);
2156			length = 0;
2157		}
2158	}
2159}
2160
2161static int
2162em_82547_fifo_workaround(struct adapter *adapter, int len)
2163{
2164	int fifo_space, fifo_pkt_len;
2165
2166	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2167
2168	if (adapter->link_duplex == HALF_DUPLEX) {
2169		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2170
2171		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2172			if (em_82547_tx_fifo_reset(adapter))
2173				return (0);
2174			else
2175				return (1);
2176		}
2177	}
2178
2179	return (0);
2180}
2181
2182static void
2183em_82547_update_fifo_head(struct adapter *adapter, int len)
2184{
2185	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2186
2187	/* tx_fifo_head is always 16 byte aligned */
2188	adapter->tx_fifo_head += fifo_pkt_len;
2189	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2190		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2191	}
2192}
2193
2194
2195static int
2196em_82547_tx_fifo_reset(struct adapter *adapter)
2197{
2198	uint32_t tctl;
2199
2200	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2201	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2202	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2203	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2204	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2205	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2206	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2207		/* Disable TX unit */
2208		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2209		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2210		    tctl & ~E1000_TCTL_EN);
2211
2212		/* Reset FIFO pointers */
2213		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2214		    adapter->tx_head_addr);
2215		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2216		    adapter->tx_head_addr);
2217		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2218		    adapter->tx_head_addr);
2219		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2220		    adapter->tx_head_addr);
2221
2222		/* Re-enable TX unit */
2223		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2224		E1000_WRITE_FLUSH(&adapter->hw);
2225
2226		adapter->tx_fifo_head = 0;
2227		adapter->tx_fifo_reset_cnt++;
2228
2229		return (TRUE);
2230	}
2231	else {
2232		return (FALSE);
2233	}
2234}
2235
2236static void
2237em_set_promisc(struct adapter *adapter)
2238{
2239	struct ifnet	*ifp = adapter->ifp;
2240	uint32_t	reg_rctl;
2241
2242	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243
2244	if (ifp->if_flags & IFF_PROMISC) {
2245		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2246		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2247	} else if (ifp->if_flags & IFF_ALLMULTI) {
2248		reg_rctl |= E1000_RCTL_MPE;
2249		reg_rctl &= ~E1000_RCTL_UPE;
2250		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2251	}
2252}
2253
2254static void
2255em_disable_promisc(struct adapter *adapter)
2256{
2257	uint32_t	reg_rctl;
2258
2259	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2260
2261	reg_rctl &=  (~E1000_RCTL_UPE);
2262	reg_rctl &=  (~E1000_RCTL_MPE);
2263	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2264}
2265
2266
2267/*********************************************************************
2268 *  Multicast Update
2269 *
2270 *  This routine is called whenever multicast address list is updated.
2271 *
2272 **********************************************************************/
2273
2274static void
2275em_set_multi(struct adapter *adapter)
2276{
2277	struct ifnet	*ifp = adapter->ifp;
2278	struct ifmultiaddr *ifma;
2279	uint32_t reg_rctl = 0;
2280	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2281	int mcnt = 0;
2282
2283	IOCTL_DEBUGOUT("em_set_multi: begin");
2284
2285	if (adapter->hw.mac.type == e1000_82542 &&
2286	    adapter->hw.revision_id == E1000_REVISION_2) {
2287		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2288		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2289			e1000_pci_clear_mwi(&adapter->hw);
2290		reg_rctl |= E1000_RCTL_RST;
2291		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2292		msec_delay(5);
2293	}
2294
2295	IF_ADDR_LOCK(ifp);
2296	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2297		if (ifma->ifma_addr->sa_family != AF_LINK)
2298			continue;
2299
2300		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2301			break;
2302
2303		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2304		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2305		mcnt++;
2306	}
2307	IF_ADDR_UNLOCK(ifp);
2308
2309	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2310		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2311		reg_rctl |= E1000_RCTL_MPE;
2312		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2313	} else
2314		e1000_mc_addr_list_update(&adapter->hw, mta,
2315		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2316
2317	if (adapter->hw.mac.type == e1000_82542 &&
2318	    adapter->hw.revision_id == E1000_REVISION_2) {
2319		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2320		reg_rctl &= ~E1000_RCTL_RST;
2321		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2322		msec_delay(5);
2323		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2324			e1000_pci_set_mwi(&adapter->hw);
2325	}
2326}
2327
2328
2329/*********************************************************************
2330 *  Timer routine
2331 *
2332 *  This routine checks for link status and updates statistics.
2333 *
2334 **********************************************************************/
2335
2336static void
2337em_local_timer(void *arg)
2338{
2339	struct adapter	*adapter = arg;
2340	struct ifnet	*ifp = adapter->ifp;
2341
2342	EM_LOCK_ASSERT(adapter);
2343
2344	e1000_check_for_link(&adapter->hw);
2345	em_update_link_status(adapter);
2346	em_update_stats_counters(adapter);
2347
2348	/* Check for 82571 LAA reset by other port */
2349	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2350		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2351
2352	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2353		em_print_hw_stats(adapter);
2354
2355	em_smartspeed(adapter);
2356
2357	/*
2358	 * Each second we check the watchdog to
2359	 * protect against hardware hangs.
2360	 */
2361	em_watchdog(adapter);
2362
2363	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2364}
2365
2366static void
2367em_update_link_status(struct adapter *adapter)
2368{
2369	struct ifnet *ifp = adapter->ifp;
2370	device_t dev = adapter->dev;
2371
2372	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2373	    E1000_STATUS_LU) {
2374		if (adapter->link_active == 0) {
2375			e1000_get_speed_and_duplex(&adapter->hw,
2376			    &adapter->link_speed, &adapter->link_duplex);
2377			/* Check if we must disable SPEED_MODE bit on PCI-E */
2378			if ((adapter->link_speed != SPEED_1000) &&
2379			    ((adapter->hw.mac.type == e1000_82571) ||
2380			    (adapter->hw.mac.type == e1000_82572))) {
2381				int tarc0;
2382
2383				tarc0 = E1000_READ_REG(&adapter->hw,
2384				    E1000_TARC0);
2385				tarc0 &= ~SPEED_MODE_BIT;
2386				E1000_WRITE_REG(&adapter->hw,
2387				    E1000_TARC0, tarc0);
2388			}
2389			if (bootverbose)
2390				device_printf(dev, "Link is up %d Mbps %s\n",
2391				    adapter->link_speed,
2392				    ((adapter->link_duplex == FULL_DUPLEX) ?
2393				    "Full Duplex" : "Half Duplex"));
2394			adapter->link_active = 1;
2395			adapter->smartspeed = 0;
2396			ifp->if_baudrate = adapter->link_speed * 1000000;
2397			if_link_state_change(ifp, LINK_STATE_UP);
2398		}
2399	} else {
2400		if (adapter->link_active == 1) {
2401			ifp->if_baudrate = adapter->link_speed = 0;
2402			adapter->link_duplex = 0;
2403			if (bootverbose)
2404				device_printf(dev, "Link is Down\n");
2405			adapter->link_active = 0;
2406			if_link_state_change(ifp, LINK_STATE_DOWN);
2407		}
2408	}
2409}
2410
2411/*********************************************************************
2412 *
2413 *  This routine disables all traffic on the adapter by issuing a
2414 *  global reset on the MAC and deallocates TX/RX buffers.
2415 *
2416 **********************************************************************/
2417
2418static void
2419em_stop(void *arg)
2420{
2421	struct adapter	*adapter = arg;
2422	struct ifnet	*ifp = adapter->ifp;
2423
2424	EM_LOCK_ASSERT(adapter);
2425
2426	INIT_DEBUGOUT("em_stop: begin");
2427
2428	em_disable_intr(adapter);
2429	callout_stop(&adapter->timer);
2430	callout_stop(&adapter->tx_fifo_timer);
2431	em_free_transmit_structures(adapter);
2432	em_free_receive_structures(adapter);
2433
2434	/* Tell the stack that the interface is no longer active */
2435	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2436
2437	e1000_reset_hw(&adapter->hw);
2438	if (adapter->hw.mac.type >= e1000_82544)
2439		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2440}
2441
2442
2443/*********************************************************************
2444 *
2445 *  Determine hardware revision.
2446 *
2447 **********************************************************************/
2448static void
2449em_identify_hardware(struct adapter *adapter)
2450{
2451	device_t dev = adapter->dev;
2452
2453	/* Make sure our PCI config space has the necessary stuff set */
2454	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2455	if ((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
2456	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN)) {
2457		device_printf(dev, "Memory Access and/or Bus Master bits "
2458		    "were not set!\n");
2459		adapter->hw.bus.pci_cmd_word |=
2460		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2461		pci_write_config(dev, PCIR_COMMAND,
2462		    adapter->hw.bus.pci_cmd_word, 2);
2463	}
2464
2465	/* Save off the information about this board */
2466	adapter->hw.vendor_id = pci_get_vendor(dev);
2467	adapter->hw.device_id = pci_get_device(dev);
2468	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2469	adapter->hw.subsystem_vendor_id =
2470	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2471	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2472
2473	/* Do Shared Code Init and Setup */
2474	if (e1000_set_mac_type(&adapter->hw)) {
2475		device_printf(dev, "Setup init failure\n");
2476		return;
2477	}
2478}
2479
2480static int
2481em_allocate_pci_resources(struct adapter *adapter)
2482{
2483	device_t	dev = adapter->dev;
2484	int		val, rid;
2485
2486	rid = PCIR_BAR(0);
2487	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2488	    &rid, RF_ACTIVE);
2489	if (adapter->res_memory == NULL) {
2490		device_printf(dev, "Unable to allocate bus resource: memory\n");
2491		return (ENXIO);
2492	}
2493	adapter->osdep.mem_bus_space_tag =
2494	    rman_get_bustag(adapter->res_memory);
2495	adapter->osdep.mem_bus_space_handle =
2496	    rman_get_bushandle(adapter->res_memory);
2497	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2498
2499	/* Only older adapters use IO mapping */
2500	if ((adapter->hw.mac.type > e1000_82542) &&
2501	    (adapter->hw.mac.type < e1000_82571)) {
2502		/* Figure our where our IO BAR is ? */
2503		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2504			val = pci_read_config(dev, rid, 4);
2505			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2506				adapter->io_rid = rid;
2507				break;
2508			}
2509			rid += 4;
2510			/* check for 64bit BAR */
2511			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2512				rid += 4;
2513		}
2514		if (rid >= PCIR_CIS) {
2515			device_printf(dev, "Unable to locate IO BAR\n");
2516			return (ENXIO);
2517		}
2518		adapter->res_ioport = bus_alloc_resource_any(dev,
2519		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2520		if (adapter->res_ioport == NULL) {
2521			device_printf(dev, "Unable to allocate bus resource: "
2522			    "ioport\n");
2523			return (ENXIO);
2524		}
2525		adapter->hw.io_base = 0;
2526		adapter->osdep.io_bus_space_tag =
2527		    rman_get_bustag(adapter->res_ioport);
2528		adapter->osdep.io_bus_space_handle =
2529		    rman_get_bushandle(adapter->res_ioport);
2530	}
2531
2532	/*
2533	 * Setup MSI/X or MSI if PCI Express
2534	 * only the latest can use MSI/X and
2535	 * real support for it is forthcoming
2536	 */
2537	adapter->msi = 0; /* Set defaults */
2538	rid = 0x0;
2539	if (adapter->hw.mac.type >= e1000_82575) {
2540		/*
2541		 * Setup MSI/X
2542		 */
2543		rid = PCIR_BAR(EM_MSIX_BAR);
2544		adapter->msix_mem = bus_alloc_resource_any(dev,
2545		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2546        	if (!adapter->msix_mem) {
2547                	device_printf(dev,"Unable to map MSIX table \n");
2548                        return (ENXIO);
2549        	}
2550		/*
2551		 * Eventually this may be used
2552		 * for Multiqueue, for now we will
2553		 * just use one vector.
2554		 *
2555        	 * val = pci_msix_count(dev);
2556		 */
2557		val = 1;
2558		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2559                	rid = 1;
2560                	adapter->msi = 1;
2561		}
2562	} else if (adapter->hw.mac.type > e1000_82571) {
2563        	val = pci_msi_count(dev);
2564        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2565                	rid = 1;
2566                	adapter->msi = 1;
2567        	}
2568	}
2569	adapter->res_interrupt = bus_alloc_resource_any(dev,
2570	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2571	if (adapter->res_interrupt == NULL) {
2572		device_printf(dev, "Unable to allocate bus resource: "
2573		    "interrupt\n");
2574		return (ENXIO);
2575	}
2576
2577	adapter->hw.back = &adapter->osdep;
2578
2579	return (0);
2580}
2581
2582/*********************************************************************
2583 *
2584 *  Setup the appropriate Interrupt handlers.
2585 *
2586 **********************************************************************/
2587int
2588em_allocate_intr(struct adapter *adapter)
2589{
2590	device_t dev = adapter->dev;
2591	int error;
2592
2593	/* Manually turn off all interrupts */
2594	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2595
2596#ifdef DEVICE_POLLING
2597	/* We do Legacy setup */
2598	if (adapter->int_handler_tag == NULL &&
2599	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2600	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2601	    &adapter->int_handler_tag)) != 0) {
2602		device_printf(dev, "Failed to register interrupt handler");
2603		return (error);
2604	}
2605
2606#else
2607	/*
2608	 * Try allocating a fast interrupt and the associated deferred
2609	 * processing contexts.
2610	 */
2611	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2612	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2613	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2614	    taskqueue_thread_enqueue, &adapter->tq);
2615	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2616	    device_get_nameunit(adapter->dev));
2617	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2618	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2619	    &adapter->int_handler_tag)) != 0) {
2620		device_printf(dev, "Failed to register fast interrupt "
2621			    "handler: %d\n", error);
2622		taskqueue_free(adapter->tq);
2623		adapter->tq = NULL;
2624		return (error);
2625	}
2626#endif
2627
2628	em_enable_intr(adapter);
2629	return (0);
2630}
2631
2632static void
2633em_free_intr(struct adapter *adapter)
2634{
2635	device_t dev = adapter->dev;
2636
2637	if (adapter->res_interrupt != NULL) {
2638		bus_teardown_intr(dev, adapter->res_interrupt,
2639			adapter->int_handler_tag);
2640		adapter->int_handler_tag = NULL;
2641	}
2642	if (adapter->tq != NULL) {
2643		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2644		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2645		taskqueue_free(adapter->tq);
2646		adapter->tq = NULL;
2647	}
2648}
2649
2650static void
2651em_free_pci_resources(struct adapter *adapter)
2652{
2653	device_t dev = adapter->dev;
2654
2655	if (adapter->res_interrupt != NULL)
2656		bus_release_resource(dev, SYS_RES_IRQ,
2657		    adapter->msi ? 1 : 0, adapter->res_interrupt);
2658
2659	if (adapter->msix_mem != NULL)
2660		bus_release_resource(dev, SYS_RES_MEMORY,
2661		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2662
2663	if (adapter->msi)
2664		pci_release_msi(dev);
2665
2666	if (adapter->res_memory != NULL)
2667		bus_release_resource(dev, SYS_RES_MEMORY,
2668		    PCIR_BAR(0), adapter->res_memory);
2669
2670	if (adapter->flash_mem != NULL)
2671		bus_release_resource(dev, SYS_RES_MEMORY,
2672		    EM_FLASH, adapter->flash_mem);
2673
2674	if (adapter->res_ioport != NULL)
2675		bus_release_resource(dev, SYS_RES_IOPORT,
2676		    adapter->io_rid, adapter->res_ioport);
2677}
2678
2679/*********************************************************************
2680 *
2681 *  Initialize the hardware to a configuration
2682 *  as specified by the adapter structure.
2683 *
2684 **********************************************************************/
2685static int
2686em_hardware_init(struct adapter *adapter)
2687{
2688	device_t dev = adapter->dev;
2689	uint16_t rx_buffer_size;
2690
2691	INIT_DEBUGOUT("em_hardware_init: begin");
2692
2693	/* Issue a global reset */
2694	e1000_reset_hw(&adapter->hw);
2695
2696	/* Get control from any management/hw control */
2697	if (((adapter->hw.mac.type == e1000_82573) ||
2698	    (adapter->hw.mac.type == e1000_ich8lan) ||
2699	    (adapter->hw.mac.type == e1000_ich9lan)) &&
2700	    e1000_check_mng_mode(&adapter->hw))
2701		em_get_hw_control(adapter);
2702
2703	/* When hardware is reset, fifo_head is also reset */
2704	adapter->tx_fifo_head = 0;
2705
2706	/* Set up smart power down as default off on newer adapters. */
2707	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2708	    adapter->hw.mac.type == e1000_82572)) {
2709		uint16_t phy_tmp = 0;
2710
2711		/* Speed up time to link by disabling smart power down. */
2712		e1000_read_phy_reg(&adapter->hw,
2713		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2714		phy_tmp &= ~IGP02E1000_PM_SPD;
2715		e1000_write_phy_reg(&adapter->hw,
2716		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2717	}
2718
2719	/*
2720	 * These parameters control the automatic generation (Tx) and
2721	 * response (Rx) to Ethernet PAUSE frames.
2722	 * - High water mark should allow for at least two frames to be
2723	 *   received after sending an XOFF.
2724	 * - Low water mark works best when it is very near the high water mark.
2725	 *   This allows the receiver to restart by sending XON when it has
2726	 *   drained a bit. Here we use an arbitary value of 1500 which will
2727	 *   restart after one full frame is pulled from the buffer. There
2728	 *   could be several smaller frames in the buffer and if so they will
2729	 *   not trigger the XON until their total number reduces the buffer
2730	 *   by 1500.
2731	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2732	 */
2733	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2734	    0xffff) << 10 );
2735
2736	adapter->hw.mac.fc_high_water = rx_buffer_size -
2737	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2738	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2739	if (adapter->hw.mac.type == e1000_80003es2lan)
2740		adapter->hw.mac.fc_pause_time = 0xFFFF;
2741	else
2742		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2743	adapter->hw.mac.fc_send_xon = TRUE;
2744	adapter->hw.mac.fc = e1000_fc_full;
2745
2746	if (e1000_init_hw(&adapter->hw) < 0) {
2747		device_printf(dev, "Hardware Initialization Failed\n");
2748		return (EIO);
2749	}
2750
2751	e1000_check_for_link(&adapter->hw);
2752
2753	return (0);
2754}
2755
2756/*********************************************************************
2757 *
2758 *  Setup networking device structure and register an interface.
2759 *
2760 **********************************************************************/
2761static void
2762em_setup_interface(device_t dev, struct adapter *adapter)
2763{
2764	struct ifnet   *ifp;
2765
2766	INIT_DEBUGOUT("em_setup_interface: begin");
2767
2768	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2769	if (ifp == NULL)
2770		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2771	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2772	ifp->if_mtu = ETHERMTU;
2773	ifp->if_init =  em_init;
2774	ifp->if_softc = adapter;
2775	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2776	ifp->if_ioctl = em_ioctl;
2777	ifp->if_start = em_start;
2778	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2779	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2780	IFQ_SET_READY(&ifp->if_snd);
2781
2782	ether_ifattach(ifp, adapter->hw.mac.addr);
2783
2784	ifp->if_capabilities = ifp->if_capenable = 0;
2785
2786	if (adapter->hw.mac.type >= e1000_82543) {
2787		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2788		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2789	}
2790
2791	/* Identify TSO capable adapters */
2792	if ((adapter->hw.mac.type > e1000_82544) &&
2793	    (adapter->hw.mac.type != e1000_82547))
2794		ifp->if_capabilities |= IFCAP_TSO4;
2795	/*
2796	 * By default only enable on PCI-E, this
2797	 * can be overriden by ifconfig.
2798	 */
2799	if (adapter->hw.mac.type >= e1000_82571)
2800		ifp->if_capenable |= IFCAP_TSO4;
2801
2802	/*
2803	 * Tell the upper layer(s) we support long frames.
2804	 */
2805	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2806	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2807	ifp->if_capenable |= IFCAP_VLAN_MTU;
2808
2809#ifdef DEVICE_POLLING
2810	ifp->if_capabilities |= IFCAP_POLLING;
2811#endif
2812
2813	/*
2814	 * Specify the media types supported by this adapter and register
2815	 * callbacks to update media and link information
2816	 */
2817	ifmedia_init(&adapter->media, IFM_IMASK,
2818	    em_media_change, em_media_status);
2819	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2820	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2821		u_char fiber_type = IFM_1000_SX;	/* default type */
2822
2823		if (adapter->hw.mac.type == e1000_82545)
2824			fiber_type = IFM_1000_LX;
2825		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2826			    0, NULL);
2827		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2828	} else {
2829		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2830		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2831			    0, NULL);
2832		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2833			    0, NULL);
2834		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2835			    0, NULL);
2836		if (adapter->hw.phy.type != e1000_phy_ife) {
2837			ifmedia_add(&adapter->media,
2838				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2839			ifmedia_add(&adapter->media,
2840				IFM_ETHER | IFM_1000_T, 0, NULL);
2841		}
2842	}
2843	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2844	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2845}
2846
2847
2848/*********************************************************************
2849 *
2850 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2851 *
2852 **********************************************************************/
2853static void
2854em_smartspeed(struct adapter *adapter)
2855{
2856	uint16_t phy_tmp;
2857
2858	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2859	    adapter->hw.mac.autoneg == 0 ||
2860	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2861		return;
2862
2863	if (adapter->smartspeed == 0) {
2864		/* If Master/Slave config fault is asserted twice,
2865		 * we assume back-to-back */
2866		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2867		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2868			return;
2869		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2870		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2871			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2872			if(phy_tmp & CR_1000T_MS_ENABLE) {
2873				phy_tmp &= ~CR_1000T_MS_ENABLE;
2874				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2875				    phy_tmp);
2876				adapter->smartspeed++;
2877				if(adapter->hw.mac.autoneg &&
2878				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2879				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2880				    &phy_tmp)) {
2881					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2882						    MII_CR_RESTART_AUTO_NEG);
2883					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2884					    phy_tmp);
2885				}
2886			}
2887		}
2888		return;
2889	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2890		/* If still no link, perhaps using 2/3 pair cable */
2891		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2892		phy_tmp |= CR_1000T_MS_ENABLE;
2893		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2894		if(adapter->hw.mac.autoneg &&
2895		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2896		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2897			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2898				    MII_CR_RESTART_AUTO_NEG);
2899			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2900		}
2901	}
2902	/* Restart process after EM_SMARTSPEED_MAX iterations */
2903	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2904		adapter->smartspeed = 0;
2905}
2906
2907
2908/*
2909 * Manage DMA'able memory.
2910 */
2911static void
2912em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2913{
2914	if (error)
2915		return;
2916	*(bus_addr_t *) arg = segs[0].ds_addr;
2917}
2918
2919static int
2920em_dma_malloc(struct adapter *adapter, bus_size_t size,
2921        struct em_dma_alloc *dma, int mapflags)
2922{
2923	int error;
2924
2925	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2926				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2927				BUS_SPACE_MAXADDR,	/* lowaddr */
2928				BUS_SPACE_MAXADDR,	/* highaddr */
2929				NULL, NULL,		/* filter, filterarg */
2930				size,			/* maxsize */
2931				1,			/* nsegments */
2932				size,			/* maxsegsize */
2933				0,			/* flags */
2934				NULL,			/* lockfunc */
2935				NULL,			/* lockarg */
2936				&dma->dma_tag);
2937	if (error) {
2938		device_printf(adapter->dev,
2939		    "%s: bus_dma_tag_create failed: %d\n",
2940		    __func__, error);
2941		goto fail_0;
2942	}
2943
2944#ifdef __arm__
2945	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2946	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2947#else
2948	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2949	    BUS_DMA_NOWAIT, &dma->dma_map);
2950#endif
2951	if (error) {
2952		device_printf(adapter->dev,
2953		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2954		    __func__, (uintmax_t)size, error);
2955		goto fail_2;
2956	}
2957
2958	dma->dma_paddr = 0;
2959	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2960	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2961	if (error || dma->dma_paddr == 0) {
2962		device_printf(adapter->dev,
2963		    "%s: bus_dmamap_load failed: %d\n",
2964		    __func__, error);
2965		goto fail_3;
2966	}
2967
2968	return (0);
2969
2970fail_3:
2971	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2972fail_2:
2973	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2974	bus_dma_tag_destroy(dma->dma_tag);
2975fail_0:
2976	dma->dma_map = NULL;
2977	dma->dma_tag = NULL;
2978
2979	return (error);
2980}
2981
2982static void
2983em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2984{
2985	if (dma->dma_tag == NULL)
2986		return;
2987	if (dma->dma_map != NULL) {
2988		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2989		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2990		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2991		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2992		dma->dma_map = NULL;
2993	}
2994	bus_dma_tag_destroy(dma->dma_tag);
2995	dma->dma_tag = NULL;
2996}
2997
2998
2999/*********************************************************************
3000 *
3001 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3002 *  the information needed to transmit a packet on the wire.
3003 *
3004 **********************************************************************/
3005static int
3006em_allocate_transmit_structures(struct adapter *adapter)
3007{
3008	device_t dev = adapter->dev;
3009
3010	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
3011	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3012	if (adapter->tx_buffer_area == NULL) {
3013		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3014		return (ENOMEM);
3015	}
3016
3017	bzero(adapter->tx_buffer_area,
3018	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
3019
3020	return (0);
3021}
3022
3023/*********************************************************************
3024 *
3025 *  Initialize transmit structures.
3026 *
3027 **********************************************************************/
3028static int
3029em_setup_transmit_structures(struct adapter *adapter)
3030{
3031	device_t dev = adapter->dev;
3032	struct em_buffer *tx_buffer;
3033	int error, i;
3034
3035	/*
3036	 * Create DMA tags for tx descriptors
3037	 */
3038	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3039				1, 0,			/* alignment, bounds */
3040				BUS_SPACE_MAXADDR,	/* lowaddr */
3041				BUS_SPACE_MAXADDR,	/* highaddr */
3042				NULL, NULL,		/* filter, filterarg */
3043				EM_TSO_SIZE,		/* maxsize */
3044				EM_MAX_SCATTER,		/* nsegments */
3045				EM_TSO_SEG_SIZE,	/* maxsegsize */
3046				0,			/* flags */
3047				NULL,		/* lockfunc */
3048				NULL,		/* lockarg */
3049				&adapter->txtag)) != 0) {
3050		device_printf(dev, "Unable to allocate TX DMA tag\n");
3051		goto fail;
3052	}
3053
3054	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3055		goto fail;
3056
3057	/* Clear the old ring contents */
3058	bzero(adapter->tx_desc_base,
3059	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3060
3061	/* Create the descriptor buffer dma maps */
3062	tx_buffer = adapter->tx_buffer_area;
3063	for (i = 0; i < adapter->num_tx_desc; i++) {
3064		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3065		if (error != 0) {
3066			device_printf(dev, "Unable to create TX DMA map\n");
3067			goto fail;
3068		}
3069		tx_buffer->next_eop = -1;
3070		tx_buffer++;
3071	}
3072
3073	adapter->next_avail_tx_desc = 0;
3074	adapter->next_tx_to_clean = 0;
3075
3076	/* Set number of descriptors available */
3077	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3078
3079	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3080	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3081
3082	return (0);
3083
3084fail:
3085	em_free_transmit_structures(adapter);
3086	return (error);
3087}
3088
3089/*********************************************************************
3090 *
3091 *  Enable transmit unit.
3092 *
3093 **********************************************************************/
3094static void
3095em_initialize_transmit_unit(struct adapter *adapter)
3096{
3097	uint32_t	tctl, tarc, tipg = 0;
3098	uint64_t	bus_addr;
3099
3100	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3101	/* Setup the Base and Length of the Tx Descriptor Ring */
3102	bus_addr = adapter->txdma.dma_paddr;
3103	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3104	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3105	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3106	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3107
3108	/* Setup the HW Tx Head and Tail descriptor pointers */
3109	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3110	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3111
3112	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3113	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3114	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3115
3116	/* Set the default values for the Tx Inter Packet Gap timer */
3117	switch (adapter->hw.mac.type) {
3118	case e1000_82542:
3119		tipg = DEFAULT_82542_TIPG_IPGT;
3120		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3121		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3122		break;
3123	case e1000_80003es2lan:
3124		tipg = DEFAULT_82543_TIPG_IPGR1;
3125		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3126		    E1000_TIPG_IPGR2_SHIFT;
3127		break;
3128	default:
3129		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3130		    (adapter->hw.media_type ==
3131		    e1000_media_type_internal_serdes))
3132			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3133		else
3134			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3135		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3136		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3137	}
3138
3139	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3140	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3141	if(adapter->hw.mac.type >= e1000_82540)
3142		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3143		    adapter->tx_abs_int_delay.value);
3144
3145	if ((adapter->hw.mac.type == e1000_82571) ||
3146	    (adapter->hw.mac.type == e1000_82572)) {
3147		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3148		tarc |= SPEED_MODE_BIT;
3149		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3150	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3151		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3152		tarc |= 1;
3153		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3154		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3155		tarc |= 1;
3156		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3157	}
3158
3159	/* Program the Transmit Control Register */
3160	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3161	tctl &= ~E1000_TCTL_CT;
3162	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3163		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3164
3165	if (adapter->hw.mac.type >= e1000_82571)
3166		tctl |= E1000_TCTL_MULR;
3167
3168	/* This write will effectively turn on the transmit unit. */
3169	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3170
3171	/* Setup Transmit Descriptor Base Settings */
3172	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3173
3174	if ((adapter->tx_int_delay.value > 0) &&
3175	    (adapter->hw.mac.type != e1000_82575))
3176		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3177
3178        /* Set the function pointer for the transmit routine */
3179        if (adapter->hw.mac.type >= e1000_82575)
3180                adapter->em_xmit = em_adv_encap;
3181        else
3182                adapter->em_xmit = em_encap;
3183}
3184
3185/*********************************************************************
3186 *
3187 *  Free all transmit related data structures.
3188 *
3189 **********************************************************************/
3190static void
3191em_free_transmit_structures(struct adapter *adapter)
3192{
3193	struct em_buffer *tx_buffer;
3194	int i;
3195
3196	INIT_DEBUGOUT("free_transmit_structures: begin");
3197
3198	if (adapter->tx_buffer_area != NULL) {
3199		tx_buffer = adapter->tx_buffer_area;
3200		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3201			if (tx_buffer->m_head != NULL) {
3202				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3203				    BUS_DMASYNC_POSTWRITE);
3204				bus_dmamap_unload(adapter->txtag,
3205				    tx_buffer->map);
3206				m_freem(tx_buffer->m_head);
3207				tx_buffer->m_head = NULL;
3208			} else if (tx_buffer->map != NULL)
3209				bus_dmamap_unload(adapter->txtag,
3210				    tx_buffer->map);
3211			if (tx_buffer->map != NULL) {
3212				bus_dmamap_destroy(adapter->txtag,
3213				    tx_buffer->map);
3214				tx_buffer->map = NULL;
3215			}
3216		}
3217	}
3218	if (adapter->tx_buffer_area != NULL) {
3219		free(adapter->tx_buffer_area, M_DEVBUF);
3220		adapter->tx_buffer_area = NULL;
3221	}
3222	if (adapter->txtag != NULL) {
3223		bus_dma_tag_destroy(adapter->txtag);
3224		adapter->txtag = NULL;
3225	}
3226}
3227
3228/*********************************************************************
3229 *
3230 *  The offload context needs to be set when we transfer the first
3231 *  packet of a particular protocol (TCP/UDP). This routine has been
3232 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3233 *
3234 **********************************************************************/
3235static void
3236em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3237    uint32_t *txd_upper, uint32_t *txd_lower)
3238{
3239	struct e1000_context_desc *TXD;
3240	struct em_buffer *tx_buffer;
3241	struct ether_vlan_header *eh;
3242	struct ip *ip;
3243	struct ip6_hdr *ip6;
3244	struct tcp_hdr *th;
3245	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3246	uint32_t cmd = 0;
3247	uint16_t etype;
3248	uint8_t ipproto;
3249
3250	/* Setup checksum offload context. */
3251	curr_txd = adapter->next_avail_tx_desc;
3252	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3253	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3254
3255	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3256		     E1000_TXD_DTYP_D;		/* Data descr */
3257
3258	/*
3259	 * Determine where frame payload starts.
3260	 * Jump over vlan headers if already present,
3261	 * helpful for QinQ too.
3262	 */
3263	eh = mtod(mp, struct ether_vlan_header *);
3264	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3265		etype = ntohs(eh->evl_proto);
3266		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3267	} else {
3268		etype = ntohs(eh->evl_encap_proto);
3269		ehdrlen = ETHER_HDR_LEN;
3270	}
3271
3272	/*
3273	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3274	 * TODO: Support SCTP too when it hits the tree.
3275	 */
3276	switch (etype) {
3277	case ETHERTYPE_IP:
3278		ip = (struct ip *)(mp->m_data + ehdrlen);
3279		ip_hlen = ip->ip_hl << 2;
3280
3281		/* Setup of IP header checksum. */
3282		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3283			/*
3284			 * Start offset for header checksum calculation.
3285			 * End offset for header checksum calculation.
3286			 * Offset of place to put the checksum.
3287			 */
3288			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3289			TXD->lower_setup.ip_fields.ipcse =
3290			    htole16(ehdrlen + ip_hlen);
3291			TXD->lower_setup.ip_fields.ipcso =
3292			    ehdrlen + offsetof(struct ip, ip_sum);
3293			cmd |= E1000_TXD_CMD_IP;
3294			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3295		}
3296
3297		if (mp->m_len < ehdrlen + ip_hlen)
3298			return;	/* failure */
3299
3300		hdr_len = ehdrlen + ip_hlen;
3301		ipproto = ip->ip_p;
3302
3303		break;
3304	case ETHERTYPE_IPV6:
3305		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3306		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3307
3308		if (mp->m_len < ehdrlen + ip_hlen)
3309			return;	/* failure */
3310
3311		/* IPv6 doesn't have a header checksum. */
3312
3313		hdr_len = ehdrlen + ip_hlen;
3314		ipproto = ip6->ip6_nxt;
3315
3316		break;
3317	default:
3318		*txd_upper = 0;
3319		*txd_lower = 0;
3320		return;
3321	}
3322
3323	switch (ipproto) {
3324	case IPPROTO_TCP:
3325		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3326			/*
3327			 * Start offset for payload checksum calculation.
3328			 * End offset for payload checksum calculation.
3329			 * Offset of place to put the checksum.
3330			 */
3331			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3332			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3333			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3334			TXD->upper_setup.tcp_fields.tucso =
3335			    hdr_len + offsetof(struct tcphdr, th_sum);
3336			cmd |= E1000_TXD_CMD_TCP;
3337			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3338		}
3339		break;
3340	case IPPROTO_UDP:
3341		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3342			/*
3343			 * Start offset for header checksum calculation.
3344			 * End offset for header checksum calculation.
3345			 * Offset of place to put the checksum.
3346			 */
3347			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3348			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3349			TXD->upper_setup.tcp_fields.tucso =
3350			    hdr_len + offsetof(struct udphdr, uh_sum);
3351			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3352		}
3353		break;
3354	default:
3355		break;
3356	}
3357
3358	TXD->tcp_seg_setup.data = htole32(0);
3359	TXD->cmd_and_length =
3360	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3361	tx_buffer->m_head = NULL;
3362	tx_buffer->next_eop = -1;
3363
3364	if (++curr_txd == adapter->num_tx_desc)
3365		curr_txd = 0;
3366
3367	adapter->num_tx_desc_avail--;
3368	adapter->next_avail_tx_desc = curr_txd;
3369}
3370
3371/**********************************************************************
3372 *
3373 *  Setup work for hardware segmentation offload (TSO)
3374 *
3375 **********************************************************************/
3376static boolean_t
3377em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3378   uint32_t *txd_lower)
3379{
3380	struct e1000_context_desc *TXD;
3381	struct em_buffer *tx_buffer;
3382	struct ether_vlan_header *eh;
3383	struct ip *ip;
3384	struct ip6_hdr *ip6;
3385	struct tcphdr *th;
3386	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3387	uint16_t etype;
3388
3389	/*
3390	 * XXX: This is not really correct as the stack would not have
3391	 * set up all checksums.
3392	 * XXX: Return FALSE is not sufficient as we may have to return
3393	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3394	 * and 1 (success).
3395	 */
3396	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3397	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3398		return FALSE;
3399
3400	/*
3401	 * This function could/should be extended to support IP/IPv6
3402	 * fragmentation as well.  But as they say, one step at a time.
3403	 */
3404
3405	/*
3406	 * Determine where frame payload starts.
3407	 * Jump over vlan headers if already present,
3408	 * helpful for QinQ too.
3409	 */
3410	eh = mtod(mp, struct ether_vlan_header *);
3411	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3412		etype = ntohs(eh->evl_proto);
3413		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3414	} else {
3415		etype = ntohs(eh->evl_encap_proto);
3416		ehdrlen = ETHER_HDR_LEN;
3417	}
3418
3419	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3420	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3421		return FALSE;	/* -1 */
3422
3423	/*
3424	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3425	 * TODO: Support SCTP too when it hits the tree.
3426	 */
3427	switch (etype) {
3428	case ETHERTYPE_IP:
3429		isip6 = 0;
3430		ip = (struct ip *)(mp->m_data + ehdrlen);
3431		if (ip->ip_p != IPPROTO_TCP)
3432			return FALSE;	/* 0 */
3433		ip->ip_len = 0;
3434		ip->ip_sum = 0;
3435		ip_hlen = ip->ip_hl << 2;
3436		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3437			return FALSE;	/* -1 */
3438		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3439#if 1
3440		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3441		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3442#else
3443		th->th_sum = mp->m_pkthdr.csum_data;
3444#endif
3445		break;
3446	case ETHERTYPE_IPV6:
3447		isip6 = 1;
3448		return FALSE;			/* Not supported yet. */
3449		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3450		if (ip6->ip6_nxt != IPPROTO_TCP)
3451			return FALSE;	/* 0 */
3452		ip6->ip6_plen = 0;
3453		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3454		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3455			return FALSE;	/* -1 */
3456		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3457#if 0
3458		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3459		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3460#else
3461		th->th_sum = mp->m_pkthdr.csum_data;
3462#endif
3463		break;
3464	default:
3465		return FALSE;
3466	}
3467	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3468
3469	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3470		      E1000_TXD_DTYP_D |	/* Data descr type */
3471		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3472
3473	/* IP and/or TCP header checksum calculation and insertion. */
3474	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3475		      E1000_TXD_POPTS_TXSM) << 8;
3476
3477	curr_txd = adapter->next_avail_tx_desc;
3478	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3479	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3480
3481	/* IPv6 doesn't have a header checksum. */
3482	if (!isip6) {
3483		/*
3484		 * Start offset for header checksum calculation.
3485		 * End offset for header checksum calculation.
3486		 * Offset of place put the checksum.
3487		 */
3488		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3489		TXD->lower_setup.ip_fields.ipcse =
3490		    htole16(ehdrlen + ip_hlen - 1);
3491		TXD->lower_setup.ip_fields.ipcso =
3492		    ehdrlen + offsetof(struct ip, ip_sum);
3493	}
3494	/*
3495	 * Start offset for payload checksum calculation.
3496	 * End offset for payload checksum calculation.
3497	 * Offset of place to put the checksum.
3498	 */
3499	TXD->upper_setup.tcp_fields.tucss =
3500	    ehdrlen + ip_hlen;
3501	TXD->upper_setup.tcp_fields.tucse = 0;
3502	TXD->upper_setup.tcp_fields.tucso =
3503	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3504	/*
3505	 * Payload size per packet w/o any headers.
3506	 * Length of all headers up to payload.
3507	 */
3508	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3509	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3510
3511	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3512				E1000_TXD_CMD_DEXT |	/* Extended descr */
3513				E1000_TXD_CMD_TSE |	/* TSE context */
3514				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3515				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3516				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3517
3518	tx_buffer->m_head = NULL;
3519	tx_buffer->next_eop = -1;
3520
3521	if (++curr_txd == adapter->num_tx_desc)
3522		curr_txd = 0;
3523
3524	adapter->num_tx_desc_avail--;
3525	adapter->next_avail_tx_desc = curr_txd;
3526	adapter->tx_tso = TRUE;
3527
3528	return TRUE;
3529}
3530
3531
3532/**********************************************************************
3533 *
3534 *  Setup work for hardware segmentation offload (TSO) on
3535 *  adapters using advanced tx descriptors
3536 *
3537 **********************************************************************/
3538static boolean_t
3539em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3540{
3541	struct e1000_adv_tx_context_desc *TXD;
3542	struct em_buffer        *tx_buffer;
3543	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3544	u32 mss_l4len_idx = 0;
3545	u16 vtag = 0;
3546	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3547	struct ether_vlan_header *eh;
3548	struct ip *ip;
3549	struct tcphdr *th;
3550
3551	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3552	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3553		return FALSE;
3554
3555	/*
3556	 * Determine where frame payload starts.
3557	 * Jump over vlan headers if already present
3558	 */
3559	eh = mtod(mp, struct ether_vlan_header *);
3560	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3561		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3562	else
3563		ehdrlen = ETHER_HDR_LEN;
3564
3565	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3566	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3567		return FALSE;
3568
3569	/* Only supports IPV4 for now */
3570	ctxd = adapter->next_avail_tx_desc;
3571	tx_buffer = &adapter->tx_buffer_area[ctxd];
3572	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3573
3574	ip = (struct ip *)(mp->m_data + ehdrlen);
3575	if (ip->ip_p != IPPROTO_TCP)
3576                return FALSE;   /* 0 */
3577	ip->ip_len = 0;
3578	ip->ip_sum = 0;
3579	ip_hlen = ip->ip_hl << 2;
3580	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3581	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3582	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3583	tcp_hlen = th->th_off << 2;
3584	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3585	/* Calculate payload, this is used in the transmit desc in encap */
3586	*paylen = mp->m_pkthdr.len - hdrlen;
3587
3588	/* VLAN MACLEN IPLEN */
3589	if (mp->m_flags & M_VLANTAG) {
3590		vtag = htole16(mp->m_pkthdr.ether_vtag);
3591		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3592	}
3593	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3594	vlan_macip_lens |= ip_hlen;
3595	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3596
3597	/* ADV DTYPE TUCMD */
3598	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3599	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3600	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3601	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3602
3603	/* MSS L4LEN IDX */
3604	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3605	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3606	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3607
3608	TXD->seqnum_seed = htole32(0);
3609	tx_buffer->m_head = NULL;
3610	tx_buffer->next_eop = -1;
3611
3612	if (++ctxd == adapter->num_tx_desc)
3613		ctxd = 0;
3614
3615	adapter->num_tx_desc_avail--;
3616	adapter->next_avail_tx_desc = ctxd;
3617	return TRUE;
3618}
3619
3620
3621/*********************************************************************
3622 *
3623 *  Advanced Context Descriptor setup for VLAN or CSUM
3624 *
3625 **********************************************************************/
3626
3627static boolean_t
3628em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3629{
3630	struct e1000_adv_tx_context_desc *TXD;
3631	struct em_buffer        *tx_buffer;
3632	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3633	struct ether_vlan_header *eh;
3634	struct ip *ip;
3635	struct ip6_hdr *ip6;
3636	int  ehdrlen, ip_hlen;
3637	u16	etype;
3638	u8	ipproto;
3639
3640	int ctxd = adapter->next_avail_tx_desc;
3641	u16 vtag = 0;
3642
3643	tx_buffer = &adapter->tx_buffer_area[ctxd];
3644	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3645
3646	/*
3647	** In advanced descriptors the vlan tag must
3648	** be placed into the descriptor itself.
3649	*/
3650	if (mp->m_flags & M_VLANTAG) {
3651		vtag = htole16(mp->m_pkthdr.ether_vtag);
3652		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3653	}
3654
3655	/*
3656	 * Determine where frame payload starts.
3657	 * Jump over vlan headers if already present,
3658	 * helpful for QinQ too.
3659	 */
3660	eh = mtod(mp, struct ether_vlan_header *);
3661	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3662		etype = ntohs(eh->evl_proto);
3663		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3664	} else {
3665		etype = ntohs(eh->evl_encap_proto);
3666		ehdrlen = ETHER_HDR_LEN;
3667	}
3668
3669	/* Set the ether header length */
3670	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3671
3672	switch (etype) {
3673		case ETHERTYPE_IP:
3674			ip = (struct ip *)(mp->m_data + ehdrlen);
3675			ip_hlen = ip->ip_hl << 2;
3676			if (mp->m_len < ehdrlen + ip_hlen)
3677				return FALSE; /* failure */
3678			ipproto = ip->ip_p;
3679			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3680			break;
3681		case ETHERTYPE_IPV6:
3682			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3683			ip_hlen = sizeof(struct ip6_hdr);
3684			if (mp->m_len < ehdrlen + ip_hlen)
3685				return FALSE; /* failure */
3686			ipproto = ip6->ip6_nxt;
3687			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3688			break;
3689		default:
3690			return FALSE;
3691	}
3692
3693	vlan_macip_lens |= ip_hlen;
3694	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3695
3696	switch (ipproto) {
3697		case IPPROTO_TCP:
3698			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3699				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3700			break;
3701		case IPPROTO_UDP:
3702			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3703				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3704			break;
3705	}
3706
3707	/* Now copy bits into descriptor */
3708	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3709	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3710	TXD->seqnum_seed = htole32(0);
3711	TXD->mss_l4len_idx = htole32(0);
3712
3713	tx_buffer->m_head = NULL;
3714	tx_buffer->next_eop = -1;
3715
3716	/* We've consumed the first desc, adjust counters */
3717	if (++ctxd == adapter->num_tx_desc)
3718		ctxd = 0;
3719	adapter->next_avail_tx_desc = ctxd;
3720	--adapter->num_tx_desc_avail;
3721
3722        return TRUE;
3723}
3724
3725
3726/**********************************************************************
3727 *
3728 *  Examine each tx_buffer in the used queue. If the hardware is done
3729 *  processing the packet then free associated resources. The
3730 *  tx_buffer is put back on the free queue.
3731 *
3732 **********************************************************************/
3733static void
3734em_txeof(struct adapter *adapter)
3735{
3736        int first, last, done, num_avail;
3737        struct em_buffer *tx_buffer;
3738        struct e1000_tx_desc   *tx_desc, *eop_desc;
3739	struct ifnet   *ifp = adapter->ifp;
3740
3741	EM_LOCK_ASSERT(adapter);
3742
3743        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3744                return;
3745
3746        num_avail = adapter->num_tx_desc_avail;
3747        first = adapter->next_tx_to_clean;
3748        tx_desc = &adapter->tx_desc_base[first];
3749        tx_buffer = &adapter->tx_buffer_area[first];
3750	last = tx_buffer->next_eop;
3751        eop_desc = &adapter->tx_desc_base[last];
3752
3753	/*
3754	 * What this does is get the index of the
3755	 * first descriptor AFTER the EOP of the
3756	 * first packet, that way we can do the
3757	 * simple comparison on the inner while loop.
3758	 */
3759	if (++last == adapter->num_tx_desc)
3760 		last = 0;
3761	done = last;
3762
3763        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3764            BUS_DMASYNC_POSTREAD);
3765
3766        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3767		/* We clean the range of the packet */
3768		while (first != done) {
3769                	tx_desc->upper.data = 0;
3770                	tx_desc->lower.data = 0;
3771                	tx_desc->buffer_addr = 0;
3772                	num_avail++;
3773
3774			if (tx_buffer->m_head) {
3775				ifp->if_opackets++;
3776				bus_dmamap_sync(adapter->txtag,
3777				    tx_buffer->map,
3778				    BUS_DMASYNC_POSTWRITE);
3779				bus_dmamap_unload(adapter->txtag,
3780				    tx_buffer->map);
3781
3782                        	m_freem(tx_buffer->m_head);
3783                        	tx_buffer->m_head = NULL;
3784                	}
3785			tx_buffer->next_eop = -1;
3786
3787	                if (++first == adapter->num_tx_desc)
3788				first = 0;
3789
3790	                tx_buffer = &adapter->tx_buffer_area[first];
3791			tx_desc = &adapter->tx_desc_base[first];
3792		}
3793		/* See if we can continue to the next packet */
3794		last = tx_buffer->next_eop;
3795		if (last != -1) {
3796        		eop_desc = &adapter->tx_desc_base[last];
3797			/* Get new done point */
3798			if (++last == adapter->num_tx_desc) last = 0;
3799			done = last;
3800		} else
3801			break;
3802        }
3803        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3804            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3805
3806        adapter->next_tx_to_clean = first;
3807
3808        /*
3809         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3810         * that it is OK to send packets.
3811         * If there are no pending descriptors, clear the timeout. Otherwise,
3812         * if some descriptors have been freed, restart the timeout.
3813         */
3814        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3815                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3816		/* All clean, turn off the timer */
3817                if (num_avail == adapter->num_tx_desc)
3818			adapter->watchdog_timer = 0;
3819		/* Some cleaned, reset the timer */
3820                else if (num_avail != adapter->num_tx_desc_avail)
3821			adapter->watchdog_timer = EM_TX_TIMEOUT;
3822        }
3823        adapter->num_tx_desc_avail = num_avail;
3824        return;
3825}
3826
3827/*********************************************************************
3828 *
3829 *  Get a buffer from system mbuf buffer pool.
3830 *
3831 **********************************************************************/
3832static int
3833em_get_buf(struct adapter *adapter, int i)
3834{
3835	struct mbuf		*m;
3836	bus_dma_segment_t	segs[1];
3837	bus_dmamap_t		map;
3838	struct em_buffer	*rx_buffer;
3839	int			error, nsegs;
3840
3841	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3842	if (m == NULL) {
3843		adapter->mbuf_cluster_failed++;
3844		return (ENOBUFS);
3845	}
3846	m->m_len = m->m_pkthdr.len = MCLBYTES;
3847
3848	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3849		m_adj(m, ETHER_ALIGN);
3850
3851	/*
3852	 * Using memory from the mbuf cluster pool, invoke the
3853	 * bus_dma machinery to arrange the memory mapping.
3854	 */
3855	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3856	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3857	if (error != 0) {
3858		m_free(m);
3859		return (error);
3860	}
3861
3862	/* If nsegs is wrong then the stack is corrupt. */
3863	KASSERT(nsegs == 1, ("Too many segments returned!"));
3864
3865	rx_buffer = &adapter->rx_buffer_area[i];
3866	if (rx_buffer->m_head != NULL)
3867		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3868
3869	map = rx_buffer->map;
3870	rx_buffer->map = adapter->rx_sparemap;
3871	adapter->rx_sparemap = map;
3872	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3873	rx_buffer->m_head = m;
3874
3875	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3876	return (0);
3877}
3878
3879/*********************************************************************
3880 *
3881 *  Allocate memory for rx_buffer structures. Since we use one
3882 *  rx_buffer per received packet, the maximum number of rx_buffer's
3883 *  that we'll need is equal to the number of receive descriptors
3884 *  that we've allocated.
3885 *
3886 **********************************************************************/
3887static int
3888em_allocate_receive_structures(struct adapter *adapter)
3889{
3890	device_t dev = adapter->dev;
3891	struct em_buffer *rx_buffer;
3892	int i, error;
3893
3894	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3895	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3896	if (adapter->rx_buffer_area == NULL) {
3897		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3898		return (ENOMEM);
3899	}
3900
3901	bzero(adapter->rx_buffer_area,
3902	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3903
3904	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3905				1, 0,			/* alignment, bounds */
3906				BUS_SPACE_MAXADDR,	/* lowaddr */
3907				BUS_SPACE_MAXADDR,	/* highaddr */
3908				NULL, NULL,		/* filter, filterarg */
3909				MCLBYTES,		/* maxsize */
3910				1,			/* nsegments */
3911				MCLBYTES,		/* maxsegsize */
3912				0,			/* flags */
3913				NULL,			/* lockfunc */
3914				NULL,			/* lockarg */
3915				&adapter->rxtag);
3916	if (error) {
3917		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3918		    __func__, error);
3919		goto fail;
3920	}
3921
3922	/* Create the spare map (used by getbuf) */
3923	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3924	     &adapter->rx_sparemap);
3925	if (error) {
3926		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3927		    __func__, error);
3928		goto fail;
3929	}
3930
3931	rx_buffer = adapter->rx_buffer_area;
3932	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3933		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3934		    &rx_buffer->map);
3935		if (error) {
3936			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3937			    __func__, error);
3938			goto fail;
3939		}
3940	}
3941
3942	/* Setup the initial buffers */
3943	for (i = 0; i < adapter->num_rx_desc; i++) {
3944		error = em_get_buf(adapter, i);
3945		if (error)
3946			goto fail;
3947	}
3948	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3949	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3950
3951	return (0);
3952
3953fail:
3954	em_free_receive_structures(adapter);
3955	return (error);
3956}
3957
3958/*********************************************************************
3959 *
3960 *  Allocate and initialize receive structures.
3961 *
3962 **********************************************************************/
3963static int
3964em_setup_receive_structures(struct adapter *adapter)
3965{
3966	int error;
3967
3968	bzero(adapter->rx_desc_base,
3969	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3970
3971	if ((error = em_allocate_receive_structures(adapter)) !=0)
3972		return (error);
3973
3974	/* Setup our descriptor pointers */
3975	adapter->next_rx_desc_to_check = 0;
3976
3977	return (0);
3978}
3979
3980/*********************************************************************
3981 *
3982 *  Enable receive unit.
3983 *
3984 **********************************************************************/
3985static void
3986em_initialize_receive_unit(struct adapter *adapter)
3987{
3988	struct ifnet	*ifp = adapter->ifp;
3989	uint64_t	bus_addr;
3990	uint32_t	reg_rctl;
3991	uint32_t	reg_rxcsum;
3992
3993	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3994
3995	/*
3996	 * Make sure receives are disabled while setting
3997	 * up the descriptor ring
3998	 */
3999	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4000	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
4001
4002	if(adapter->hw.mac.type >= e1000_82540) {
4003		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4004		    adapter->rx_abs_int_delay.value);
4005		/*
4006		 * Set the interrupt throttling rate. Value is calculated
4007		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4008		 */
4009#define MAX_INTS_PER_SEC	8000
4010#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4011		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
4012	}
4013
4014	/* Setup the Base and Length of the Rx Descriptor Ring */
4015	bus_addr = adapter->rxdma.dma_paddr;
4016	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
4017			sizeof(struct e1000_rx_desc));
4018	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
4019	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
4020
4021	/* Setup the Receive Control Register */
4022	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4023	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4024		   E1000_RCTL_RDMTS_HALF |
4025		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4026
4027	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
4028		reg_rctl |= E1000_RCTL_SBP;
4029	else
4030		reg_rctl &= ~E1000_RCTL_SBP;
4031
4032	switch (adapter->rx_buffer_len) {
4033	default:
4034	case 2048:
4035		reg_rctl |= E1000_RCTL_SZ_2048;
4036		break;
4037	case 4096:
4038		reg_rctl |= E1000_RCTL_SZ_4096 |
4039		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4040		break;
4041	case 8192:
4042		reg_rctl |= E1000_RCTL_SZ_8192 |
4043		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4044		break;
4045	case 16384:
4046		reg_rctl |= E1000_RCTL_SZ_16384 |
4047		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4048		break;
4049	}
4050
4051	if (ifp->if_mtu > ETHERMTU)
4052		reg_rctl |= E1000_RCTL_LPE;
4053	else
4054		reg_rctl &= ~E1000_RCTL_LPE;
4055
4056	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4057	if ((adapter->hw.mac.type >= e1000_82543) &&
4058	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4059		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4060		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4061		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4062	}
4063
4064	/*
4065	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4066	** long latencies are observed, like Lenovo X60. This
4067	** change eliminates the problem, but since having positive
4068	** values in RDTR is a known source of problems on other
4069	** platforms another solution is being sought.
4070	*/
4071	if (adapter->hw.mac.type == e1000_82573)
4072		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4073
4074	/* Enable Receives */
4075	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4076
4077	/*
4078	 * Setup the HW Rx Head and
4079	 * Tail Descriptor Pointers
4080	 */
4081	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4082	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4083
4084	return;
4085}
4086
4087/*********************************************************************
4088 *
4089 *  Free receive related data structures.
4090 *
4091 **********************************************************************/
4092static void
4093em_free_receive_structures(struct adapter *adapter)
4094{
4095	struct em_buffer *rx_buffer;
4096	int i;
4097
4098	INIT_DEBUGOUT("free_receive_structures: begin");
4099
4100	if (adapter->rx_sparemap) {
4101		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4102		adapter->rx_sparemap = NULL;
4103	}
4104
4105	/* Cleanup any existing buffers */
4106	if (adapter->rx_buffer_area != NULL) {
4107		rx_buffer = adapter->rx_buffer_area;
4108		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4109			if (rx_buffer->m_head != NULL) {
4110				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4111				    BUS_DMASYNC_POSTREAD);
4112				bus_dmamap_unload(adapter->rxtag,
4113				    rx_buffer->map);
4114				m_freem(rx_buffer->m_head);
4115				rx_buffer->m_head = NULL;
4116			} else if (rx_buffer->map != NULL)
4117				bus_dmamap_unload(adapter->rxtag,
4118				    rx_buffer->map);
4119			if (rx_buffer->map != NULL) {
4120				bus_dmamap_destroy(adapter->rxtag,
4121				    rx_buffer->map);
4122				rx_buffer->map = NULL;
4123			}
4124		}
4125	}
4126
4127	if (adapter->rx_buffer_area != NULL) {
4128		free(adapter->rx_buffer_area, M_DEVBUF);
4129		adapter->rx_buffer_area = NULL;
4130	}
4131
4132	if (adapter->rxtag != NULL) {
4133		bus_dma_tag_destroy(adapter->rxtag);
4134		adapter->rxtag = NULL;
4135	}
4136}
4137
4138/*********************************************************************
4139 *
4140 *  This routine executes in interrupt context. It replenishes
4141 *  the mbufs in the descriptor and sends data which has been
4142 *  dma'ed into host memory to upper layer.
4143 *
4144 *  We loop at most count times if count is > 0, or until done if
4145 *  count < 0.
4146 *
4147 *********************************************************************/
4148static int
4149em_rxeof(struct adapter *adapter, int count)
4150{
4151	struct ifnet	*ifp;
4152	struct mbuf	*mp;
4153	uint8_t		accept_frame = 0;
4154	uint8_t		eop = 0;
4155	uint16_t 	len, desc_len, prev_len_adj;
4156	int		i;
4157
4158	/* Pointer to the receive descriptor being examined. */
4159	struct e1000_rx_desc   *current_desc;
4160	uint8_t		status;
4161
4162	ifp = adapter->ifp;
4163	i = adapter->next_rx_desc_to_check;
4164	current_desc = &adapter->rx_desc_base[i];
4165	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4166	    BUS_DMASYNC_POSTREAD);
4167
4168	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4169		return (0);
4170
4171	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4172	    (count != 0) &&
4173	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4174		struct mbuf *m = NULL;
4175
4176		mp = adapter->rx_buffer_area[i].m_head;
4177		/*
4178		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4179		 * needs to access the last received byte in the mbuf.
4180		 */
4181		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4182		    BUS_DMASYNC_POSTREAD);
4183
4184		accept_frame = 1;
4185		prev_len_adj = 0;
4186		desc_len = le16toh(current_desc->length);
4187		status = current_desc->status;
4188		if (status & E1000_RXD_STAT_EOP) {
4189			count--;
4190			eop = 1;
4191			if (desc_len < ETHER_CRC_LEN) {
4192				len = 0;
4193				prev_len_adj = ETHER_CRC_LEN - desc_len;
4194			} else
4195				len = desc_len - ETHER_CRC_LEN;
4196		} else {
4197			eop = 0;
4198			len = desc_len;
4199		}
4200
4201		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4202			uint8_t		last_byte;
4203			uint32_t	pkt_len = desc_len;
4204
4205			if (adapter->fmp != NULL)
4206				pkt_len += adapter->fmp->m_pkthdr.len;
4207
4208			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4209			if (TBI_ACCEPT(&adapter->hw, status,
4210			    current_desc->errors, pkt_len, last_byte)) {
4211				e1000_tbi_adjust_stats_82543(&adapter->hw,
4212				    &adapter->stats, pkt_len,
4213				    adapter->hw.mac.addr);
4214				if (len > 0)
4215					len--;
4216			} else
4217				accept_frame = 0;
4218		}
4219
4220		if (accept_frame) {
4221			if (em_get_buf(adapter, i) != 0) {
4222				ifp->if_iqdrops++;
4223				goto discard;
4224			}
4225
4226			/* Assign correct length to the current fragment */
4227			mp->m_len = len;
4228
4229			if (adapter->fmp == NULL) {
4230				mp->m_pkthdr.len = len;
4231				adapter->fmp = mp; /* Store the first mbuf */
4232				adapter->lmp = mp;
4233			} else {
4234				/* Chain mbuf's together */
4235				mp->m_flags &= ~M_PKTHDR;
4236				/*
4237				 * Adjust length of previous mbuf in chain if
4238				 * we received less than 4 bytes in the last
4239				 * descriptor.
4240				 */
4241				if (prev_len_adj > 0) {
4242					adapter->lmp->m_len -= prev_len_adj;
4243					adapter->fmp->m_pkthdr.len -=
4244					    prev_len_adj;
4245				}
4246				adapter->lmp->m_next = mp;
4247				adapter->lmp = adapter->lmp->m_next;
4248				adapter->fmp->m_pkthdr.len += len;
4249			}
4250
4251			if (eop) {
4252				adapter->fmp->m_pkthdr.rcvif = ifp;
4253				ifp->if_ipackets++;
4254				em_receive_checksum(adapter, current_desc,
4255				    adapter->fmp);
4256#ifndef __NO_STRICT_ALIGNMENT
4257				if (adapter->hw.mac.max_frame_size >
4258				    (MCLBYTES - ETHER_ALIGN) &&
4259				    em_fixup_rx(adapter) != 0)
4260					goto skip;
4261#endif
4262				if (status & E1000_RXD_STAT_VP) {
4263					adapter->fmp->m_pkthdr.ether_vtag =
4264					    (le16toh(current_desc->special) &
4265					    E1000_RXD_SPC_VLAN_MASK);
4266					adapter->fmp->m_flags |= M_VLANTAG;
4267				}
4268#ifndef __NO_STRICT_ALIGNMENT
4269skip:
4270#endif
4271				m = adapter->fmp;
4272				adapter->fmp = NULL;
4273				adapter->lmp = NULL;
4274			}
4275		} else {
4276			ifp->if_ierrors++;
4277discard:
4278			/* Reuse loaded DMA map and just update mbuf chain */
4279			mp = adapter->rx_buffer_area[i].m_head;
4280			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4281			mp->m_data = mp->m_ext.ext_buf;
4282			mp->m_next = NULL;
4283			if (adapter->hw.mac.max_frame_size <=
4284			    (MCLBYTES - ETHER_ALIGN))
4285				m_adj(mp, ETHER_ALIGN);
4286			if (adapter->fmp != NULL) {
4287				m_freem(adapter->fmp);
4288				adapter->fmp = NULL;
4289				adapter->lmp = NULL;
4290			}
4291			m = NULL;
4292		}
4293
4294		/* Zero out the receive descriptors status. */
4295		current_desc->status = 0;
4296		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4297		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4298
4299		/* Advance our pointers to the next descriptor. */
4300		if (++i == adapter->num_rx_desc)
4301			i = 0;
4302		if (m != NULL) {
4303			adapter->next_rx_desc_to_check = i;
4304#ifdef DEVICE_POLLING
4305			EM_UNLOCK(adapter);
4306			(*ifp->if_input)(ifp, m);
4307			EM_LOCK(adapter);
4308#else
4309			/* Already running unlocked */
4310			(*ifp->if_input)(ifp, m);
4311#endif
4312			i = adapter->next_rx_desc_to_check;
4313		}
4314		current_desc = &adapter->rx_desc_base[i];
4315	}
4316	adapter->next_rx_desc_to_check = i;
4317
4318	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4319	if (--i < 0)
4320		i = adapter->num_rx_desc - 1;
4321	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4322	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4323		return (0);
4324
4325	return (1);
4326}
4327
4328#ifndef __NO_STRICT_ALIGNMENT
4329/*
4330 * When jumbo frames are enabled we should realign entire payload on
4331 * architecures with strict alignment. This is serious design mistake of 8254x
4332 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4333 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4334 * payload. On architecures without strict alignment restrictions 8254x still
4335 * performs unaligned memory access which would reduce the performance too.
4336 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4337 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4338 * existing mbuf chain.
4339 *
4340 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4341 * not used at all on architectures with strict alignment.
4342 */
4343static int
4344em_fixup_rx(struct adapter *adapter)
4345{
4346	struct mbuf *m, *n;
4347	int error;
4348
4349	error = 0;
4350	m = adapter->fmp;
4351	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4352		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4353		m->m_data += ETHER_HDR_LEN;
4354	} else {
4355		MGETHDR(n, M_DONTWAIT, MT_DATA);
4356		if (n != NULL) {
4357			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4358			m->m_data += ETHER_HDR_LEN;
4359			m->m_len -= ETHER_HDR_LEN;
4360			n->m_len = ETHER_HDR_LEN;
4361			M_MOVE_PKTHDR(n, m);
4362			n->m_next = m;
4363			adapter->fmp = n;
4364		} else {
4365			adapter->dropped_pkts++;
4366			m_freem(adapter->fmp);
4367			adapter->fmp = NULL;
4368			error = ENOMEM;
4369		}
4370	}
4371
4372	return (error);
4373}
4374#endif
4375
4376/*********************************************************************
4377 *
4378 *  Verify that the hardware indicated that the checksum is valid.
4379 *  Inform the stack about the status of checksum so that stack
4380 *  doesn't spend time verifying the checksum.
4381 *
4382 *********************************************************************/
4383static void
4384em_receive_checksum(struct adapter *adapter,
4385	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4386{
4387	/* 82543 or newer only */
4388	if ((adapter->hw.mac.type < e1000_82543) ||
4389	    /* Ignore Checksum bit is set */
4390	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4391		mp->m_pkthdr.csum_flags = 0;
4392		return;
4393	}
4394
4395	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4396		/* Did it pass? */
4397		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4398			/* IP Checksum Good */
4399			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4400			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4401
4402		} else {
4403			mp->m_pkthdr.csum_flags = 0;
4404		}
4405	}
4406
4407	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4408		/* Did it pass? */
4409		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4410			mp->m_pkthdr.csum_flags |=
4411			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4412			mp->m_pkthdr.csum_data = htons(0xffff);
4413		}
4414	}
4415}
4416
4417
4418static void
4419em_enable_vlans(struct adapter *adapter)
4420{
4421	uint32_t ctrl;
4422
4423	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4424
4425	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4426	ctrl |= E1000_CTRL_VME;
4427	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4428}
4429
4430static void
4431em_enable_intr(struct adapter *adapter)
4432{
4433	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4434	    (IMS_ENABLE_MASK));
4435}
4436
4437static void
4438em_disable_intr(struct adapter *adapter)
4439{
4440	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4441}
4442
4443/*
4444 * Bit of a misnomer, what this really means is
4445 * to enable OS management of the system... aka
4446 * to disable special hardware management features
4447 */
4448static void
4449em_init_manageability(struct adapter *adapter)
4450{
4451	/* A shared code workaround */
4452#define E1000_82542_MANC2H E1000_MANC2H
4453	if (adapter->has_manage) {
4454		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4455		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4456
4457		/* disable hardware interception of ARP */
4458		manc &= ~(E1000_MANC_ARP_EN);
4459
4460                /* enable receiving management packets to the host */
4461                if (adapter->hw.mac.type >= e1000_82571) {
4462			manc |= E1000_MANC_EN_MNG2HOST;
4463#define E1000_MNG2HOST_PORT_623 (1 << 5)
4464#define E1000_MNG2HOST_PORT_664 (1 << 6)
4465			manc2h |= E1000_MNG2HOST_PORT_623;
4466			manc2h |= E1000_MNG2HOST_PORT_664;
4467			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4468		}
4469
4470		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4471	}
4472}
4473
4474/*
4475 * Give control back to hardware management
4476 * controller if there is one.
4477 */
4478static void
4479em_release_manageability(struct adapter *adapter)
4480{
4481	if (adapter->has_manage) {
4482		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4483
4484		/* re-enable hardware interception of ARP */
4485		manc |= E1000_MANC_ARP_EN;
4486
4487		if (adapter->hw.mac.type >= e1000_82571)
4488			manc &= ~E1000_MANC_EN_MNG2HOST;
4489
4490		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4491	}
4492}
4493
4494/*
4495 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4496 * For ASF and Pass Through versions of f/w this means that
4497 * the driver is loaded. For AMT version (only with 82573)
4498 * of the f/w this means that the network i/f is open.
4499 *
4500 */
4501static void
4502em_get_hw_control(struct adapter *adapter)
4503{
4504	u32 ctrl_ext, swsm;
4505
4506	/* Let firmware know the driver has taken over */
4507	switch (adapter->hw.mac.type) {
4508	case e1000_82573:
4509		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4510		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4511		    swsm | E1000_SWSM_DRV_LOAD);
4512		break;
4513	case e1000_82571:
4514	case e1000_82572:
4515	case e1000_80003es2lan:
4516	case e1000_ich8lan:
4517	case e1000_ich9lan:
4518		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4519		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4520		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4521		break;
4522	default:
4523		break;
4524	}
4525}
4526
4527/*
4528 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4529 * For ASF and Pass Through versions of f/w this means that the
4530 * driver is no longer loaded. For AMT version (only with 82573) i
4531 * of the f/w this means that the network i/f is closed.
4532 *
4533 */
4534static void
4535em_release_hw_control(struct adapter *adapter)
4536{
4537	u32 ctrl_ext, swsm;
4538
4539	/* Let firmware taken over control of h/w */
4540	switch (adapter->hw.mac.type) {
4541	case e1000_82573:
4542		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4543		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4544		    swsm & ~E1000_SWSM_DRV_LOAD);
4545		break;
4546	case e1000_82571:
4547	case e1000_82572:
4548	case e1000_80003es2lan:
4549	case e1000_ich8lan:
4550	case e1000_ich9lan:
4551		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4552		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4553		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4554		break;
4555	default:
4556		break;
4557
4558	}
4559}
4560
4561static int
4562em_is_valid_ether_addr(uint8_t *addr)
4563{
4564	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4565
4566	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4567		return (FALSE);
4568	}
4569
4570	return (TRUE);
4571}
4572
4573/*
4574 * NOTE: the following routines using the e1000
4575 * 	naming style are provided to the shared
4576 *	code which expects that rather than 'em'
4577 */
4578
4579void
4580e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4581{
4582	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4583}
4584
4585void
4586e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4587{
4588	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4589}
4590
4591void
4592e1000_pci_set_mwi(struct e1000_hw *hw)
4593{
4594	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4595	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4596}
4597
4598void
4599e1000_pci_clear_mwi(struct e1000_hw *hw)
4600{
4601	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4602	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4603}
4604
4605/*
4606 * Read the PCI Express capabilities
4607 */
4608int32_t
4609e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4610{
4611	int32_t		error = E1000_SUCCESS;
4612	uint16_t	cap_off;
4613
4614	switch (hw->mac.type) {
4615
4616		case e1000_82571:
4617		case e1000_82572:
4618		case e1000_82573:
4619		case e1000_80003es2lan:
4620			cap_off = 0xE0;
4621			e1000_read_pci_cfg(hw, cap_off + reg, value);
4622			break;
4623		default:
4624			error = ~E1000_NOT_IMPLEMENTED;
4625			break;
4626	}
4627
4628	return (error);
4629}
4630
4631int32_t
4632e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4633{
4634	int32_t error = 0;
4635
4636	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4637	if (hw->dev_spec == NULL)
4638		error = ENOMEM;
4639
4640	return (error);
4641}
4642
4643void
4644e1000_free_dev_spec_struct(struct e1000_hw *hw)
4645{
4646	if (hw->dev_spec != NULL)
4647		free(hw->dev_spec, M_DEVBUF);
4648	return;
4649}
4650
4651/*
4652 * Enable PCI Wake On Lan capability
4653 */
4654void
4655em_enable_wakeup(device_t dev)
4656{
4657	u16     cap, status;
4658	u8      id;
4659
4660	/* First find the capabilities pointer*/
4661	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4662	/* Read the PM Capabilities */
4663	id = pci_read_config(dev, cap, 1);
4664	if (id != PCIY_PMG)     /* Something wrong */
4665		return;
4666	/* OK, we have the power capabilities, so
4667	   now get the status register */
4668	cap += PCIR_POWER_STATUS;
4669	status = pci_read_config(dev, cap, 2);
4670	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4671	pci_write_config(dev, cap, status, 2);
4672	return;
4673}
4674
4675
4676/*********************************************************************
4677* 82544 Coexistence issue workaround.
4678*    There are 2 issues.
4679*       1. Transmit Hang issue.
4680*    To detect this issue, following equation can be used...
4681*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4682*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4683*
4684*       2. DAC issue.
4685*    To detect this issue, following equation can be used...
4686*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4687*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4688*
4689*
4690*    WORKAROUND:
4691*	  Make sure we do not have ending address
4692*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4693*
4694*************************************************************************/
4695static uint32_t
4696em_fill_descriptors (bus_addr_t address, uint32_t length,
4697		PDESC_ARRAY desc_array)
4698{
4699	/* Since issue is sensitive to length and address.*/
4700	/* Let us first check the address...*/
4701	uint32_t safe_terminator;
4702	if (length <= 4) {
4703		desc_array->descriptor[0].address = address;
4704		desc_array->descriptor[0].length = length;
4705		desc_array->elements = 1;
4706		return (desc_array->elements);
4707	}
4708	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4709	    (length & 0xF)) & 0xF);
4710	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4711	if (safe_terminator == 0   ||
4712	(safe_terminator > 4   &&
4713	safe_terminator < 9)   ||
4714	(safe_terminator > 0xC &&
4715	safe_terminator <= 0xF)) {
4716		desc_array->descriptor[0].address = address;
4717		desc_array->descriptor[0].length = length;
4718		desc_array->elements = 1;
4719		return (desc_array->elements);
4720	}
4721
4722	desc_array->descriptor[0].address = address;
4723	desc_array->descriptor[0].length = length - 4;
4724	desc_array->descriptor[1].address = address + (length - 4);
4725	desc_array->descriptor[1].length = 4;
4726	desc_array->elements = 2;
4727	return (desc_array->elements);
4728}
4729
4730/**********************************************************************
4731 *
4732 *  Update the board statistics counters.
4733 *
4734 **********************************************************************/
4735static void
4736em_update_stats_counters(struct adapter *adapter)
4737{
4738	struct ifnet   *ifp;
4739
4740	if(adapter->hw.media_type == e1000_media_type_copper ||
4741	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4742		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4743		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4744	}
4745	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4746	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4747	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4748	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4749
4750	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4751	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4752	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4753	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4754	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4755	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4756	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4757	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4758	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4759	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4760	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4761	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4762	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4763	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4764	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4765	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4766	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4767	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4768	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4769	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4770
4771	/* For the 64-bit byte counters the low dword must be read first. */
4772	/* Both registers clear on the read of the high dword */
4773
4774	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4775	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4776	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4777	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4778
4779	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4780	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4781	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4782	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4783	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4784
4785	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4786	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4787	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4788	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4789
4790	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4791	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4792	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4793	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4794	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4795	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4796	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4797	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4798	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4799	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4800
4801	if (adapter->hw.mac.type >= e1000_82543) {
4802		adapter->stats.algnerrc +=
4803		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4804		adapter->stats.rxerrc +=
4805		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4806		adapter->stats.tncrs +=
4807		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4808		adapter->stats.cexterr +=
4809		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4810		adapter->stats.tsctc +=
4811		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4812		adapter->stats.tsctfc +=
4813		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4814	}
4815	ifp = adapter->ifp;
4816
4817	ifp->if_collisions = adapter->stats.colc;
4818
4819	/* Rx Errors */
4820	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4821	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4822	    adapter->stats.ruc + adapter->stats.roc +
4823	    adapter->stats.mpc + adapter->stats.cexterr;
4824
4825	/* Tx Errors */
4826	ifp->if_oerrors = adapter->stats.ecol +
4827	    adapter->stats.latecol + adapter->watchdog_events;
4828}
4829
4830
4831/**********************************************************************
4832 *
4833 *  This routine is called only when em_display_debug_stats is enabled.
4834 *  This routine provides a way to take a look at important statistics
4835 *  maintained by the driver and hardware.
4836 *
4837 **********************************************************************/
4838static void
4839em_print_debug_info(struct adapter *adapter)
4840{
4841	device_t dev = adapter->dev;
4842	uint8_t *hw_addr = adapter->hw.hw_addr;
4843
4844	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4845	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4846	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4847	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4848	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4849	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4850	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4851	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4852	    adapter->hw.mac.fc_high_water,
4853	    adapter->hw.mac.fc_low_water);
4854	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4855	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4856	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4857	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4858	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4859	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4860	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4861	    (long long)adapter->tx_fifo_wrk_cnt,
4862	    (long long)adapter->tx_fifo_reset_cnt);
4863	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4864	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4865	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4866	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4867	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4868	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4869	device_printf(dev, "Num Tx descriptors avail = %d\n",
4870	    adapter->num_tx_desc_avail);
4871	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4872	    adapter->no_tx_desc_avail1);
4873	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4874	    adapter->no_tx_desc_avail2);
4875	device_printf(dev, "Std mbuf failed = %ld\n",
4876	    adapter->mbuf_alloc_failed);
4877	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4878	    adapter->mbuf_cluster_failed);
4879	device_printf(dev, "Driver dropped packets = %ld\n",
4880	    adapter->dropped_pkts);
4881	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4882		adapter->no_tx_dma_setup);
4883}
4884
4885static void
4886em_print_hw_stats(struct adapter *adapter)
4887{
4888	device_t dev = adapter->dev;
4889
4890	device_printf(dev, "Excessive collisions = %lld\n",
4891	    (long long)adapter->stats.ecol);
4892#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4893	device_printf(dev, "Symbol errors = %lld\n",
4894	    (long long)adapter->stats.symerrs);
4895#endif
4896	device_printf(dev, "Sequence errors = %lld\n",
4897	    (long long)adapter->stats.sec);
4898	device_printf(dev, "Defer count = %lld\n",
4899	    (long long)adapter->stats.dc);
4900	device_printf(dev, "Missed Packets = %lld\n",
4901	    (long long)adapter->stats.mpc);
4902	device_printf(dev, "Receive No Buffers = %lld\n",
4903	    (long long)adapter->stats.rnbc);
4904	/* RLEC is inaccurate on some hardware, calculate our own. */
4905	device_printf(dev, "Receive Length Errors = %lld\n",
4906	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4907	device_printf(dev, "Receive errors = %lld\n",
4908	    (long long)adapter->stats.rxerrc);
4909	device_printf(dev, "Crc errors = %lld\n",
4910	    (long long)adapter->stats.crcerrs);
4911	device_printf(dev, "Alignment errors = %lld\n",
4912	    (long long)adapter->stats.algnerrc);
4913	device_printf(dev, "Carrier extension errors = %lld\n",
4914	    (long long)adapter->stats.cexterr);
4915	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4916	device_printf(dev, "watchdog timeouts = %ld\n",
4917	    adapter->watchdog_events);
4918	device_printf(dev, "XON Rcvd = %lld\n",
4919	    (long long)adapter->stats.xonrxc);
4920	device_printf(dev, "XON Xmtd = %lld\n",
4921	    (long long)adapter->stats.xontxc);
4922	device_printf(dev, "XOFF Rcvd = %lld\n",
4923	    (long long)adapter->stats.xoffrxc);
4924	device_printf(dev, "XOFF Xmtd = %lld\n",
4925	    (long long)adapter->stats.xofftxc);
4926	device_printf(dev, "Good Packets Rcvd = %lld\n",
4927	    (long long)adapter->stats.gprc);
4928	device_printf(dev, "Good Packets Xmtd = %lld\n",
4929	    (long long)adapter->stats.gptc);
4930	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4931	    (long long)adapter->stats.tsctc);
4932	device_printf(dev, "TSO Contexts Failed = %lld\n",
4933	    (long long)adapter->stats.tsctfc);
4934}
4935
4936static int
4937em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4938{
4939	struct adapter *adapter;
4940	int error;
4941	int result;
4942
4943	result = -1;
4944	error = sysctl_handle_int(oidp, &result, 0, req);
4945
4946	if (error || !req->newptr)
4947		return (error);
4948
4949	if (result == 1) {
4950		adapter = (struct adapter *)arg1;
4951		em_print_debug_info(adapter);
4952	}
4953
4954	return (error);
4955}
4956
4957
4958static int
4959em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4960{
4961	struct adapter *adapter;
4962	int error;
4963	int result;
4964
4965	result = -1;
4966	error = sysctl_handle_int(oidp, &result, 0, req);
4967
4968	if (error || !req->newptr)
4969		return (error);
4970
4971	if (result == 1) {
4972		adapter = (struct adapter *)arg1;
4973		em_print_hw_stats(adapter);
4974	}
4975
4976	return (error);
4977}
4978
4979static int
4980em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4981{
4982	struct em_int_delay_info *info;
4983	struct adapter *adapter;
4984	uint32_t regval;
4985	int error;
4986	int usecs;
4987	int ticks;
4988
4989	info = (struct em_int_delay_info *)arg1;
4990	usecs = info->value;
4991	error = sysctl_handle_int(oidp, &usecs, 0, req);
4992	if (error != 0 || req->newptr == NULL)
4993		return (error);
4994	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4995		return (EINVAL);
4996	info->value = usecs;
4997	ticks = EM_USECS_TO_TICKS(usecs);
4998
4999	adapter = info->adapter;
5000
5001	EM_LOCK(adapter);
5002	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5003	regval = (regval & ~0xffff) | (ticks & 0xffff);
5004	/* Handle a few special cases. */
5005	switch (info->offset) {
5006	case E1000_RDTR:
5007		break;
5008	case E1000_TIDV:
5009		if (ticks == 0) {
5010			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5011			/* Don't write 0 into the TIDV register. */
5012			regval++;
5013		} else
5014			if (adapter->hw.mac.type != e1000_82575)
5015				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5016		break;
5017	}
5018	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5019	EM_UNLOCK(adapter);
5020	return (0);
5021}
5022
5023static void
5024em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5025	const char *description, struct em_int_delay_info *info,
5026	int offset, int value)
5027{
5028	info->adapter = adapter;
5029	info->offset = offset;
5030	info->value = value;
5031	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5032	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5033	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5034	    info, 0, em_sysctl_int_delay, "I", description);
5035}
5036
5037#ifndef DEVICE_POLLING
5038static void
5039em_add_rx_process_limit(struct adapter *adapter, const char *name,
5040	const char *description, int *limit, int value)
5041{
5042	*limit = value;
5043	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5044	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5045	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5046}
5047#endif
5048