if_em.c revision 170171
1/**************************************************************************
2
3Copyright (c) 2001-2007, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 170171 2007-05-31 23:36:21Z jfv $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79
80#include "e1000_api.h"
81#include "e1000_82575.h"
82#include "if_em.h"
83
84/*********************************************************************
85 *  Set this to one to display debug statistics
86 *********************************************************************/
87int	em_display_debug_stats = 0;
88
89/*********************************************************************
90 *  Driver version:
91 *********************************************************************/
92char em_driver_version[] = "Version - 6.5.3";
93
94
95/*********************************************************************
96 *  PCI Device ID Table
97 *
98 *  Used by probe to select devices to load on
99 *  Last field stores an index into e1000_strings
100 *  Last entry must be all 0s
101 *
102 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
103 *********************************************************************/
104
105static em_vendor_info_t em_vendor_info_array[] =
106{
107	/* Intel(R) PRO/1000 Network Connection */
108	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
113
114	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126
127	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137
138	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148
149	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
152
153	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166
167	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
177						PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
185
186	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
191
192	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	/* required last entry */
201	{ 0, 0, 0, 0, 0}
202};
203
204/*********************************************************************
205 *  Table of branding strings for all supported NICs.
206 *********************************************************************/
207
208static char *em_strings[] = {
209	"Intel(R) PRO/1000 Network Connection"
210};
211
212/*********************************************************************
213 *  Function prototypes
214 *********************************************************************/
215static int	em_probe(device_t);
216static int	em_attach(device_t);
217static int	em_detach(device_t);
218static int	em_shutdown(device_t);
219static int	em_suspend(device_t);
220static int	em_resume(device_t);
221static void	em_start(struct ifnet *);
222static void	em_start_locked(struct ifnet *ifp);
223static int	em_ioctl(struct ifnet *, u_long, caddr_t);
224static void	em_watchdog(struct adapter *);
225static void	em_init(void *);
226static void	em_init_locked(struct adapter *);
227static void	em_stop(void *);
228static void	em_media_status(struct ifnet *, struct ifmediareq *);
229static int	em_media_change(struct ifnet *);
230static void	em_identify_hardware(struct adapter *);
231static int	em_allocate_pci_resources(struct adapter *);
232static int	em_allocate_intr(struct adapter *);
233static void	em_free_intr(struct adapter *);
234static void	em_free_pci_resources(struct adapter *);
235static void	em_local_timer(void *);
236static int	em_hardware_init(struct adapter *);
237static void	em_setup_interface(device_t, struct adapter *);
238static int	em_setup_transmit_structures(struct adapter *);
239static void	em_initialize_transmit_unit(struct adapter *);
240static int	em_setup_receive_structures(struct adapter *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_enable_intr(struct adapter *);
243static void	em_disable_intr(struct adapter *);
244static void	em_free_transmit_structures(struct adapter *);
245static void	em_free_receive_structures(struct adapter *);
246static void	em_update_stats_counters(struct adapter *);
247static void	em_txeof(struct adapter *);
248static int	em_allocate_receive_structures(struct adapter *);
249static int	em_allocate_transmit_structures(struct adapter *);
250static int	em_rxeof(struct adapter *, int);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct adapter *);
253#endif
254static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
255		    struct mbuf *);
256static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
257		    uint32_t *, uint32_t *);
258static boolean_t em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
259static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
260		    uint32_t *);
261static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
262static void	em_set_promisc(struct adapter *);
263static void	em_disable_promisc(struct adapter *);
264static void	em_set_multi(struct adapter *);
265static void	em_print_hw_stats(struct adapter *);
266static void	em_update_link_status(struct adapter *);
267static int	em_get_buf(struct adapter *, int);
268static void	em_enable_vlans(struct adapter *);
269static int	em_encap(struct adapter *, struct mbuf **);
270static int	em_adv_encap(struct adapter *, struct mbuf **);
271static void	em_smartspeed(struct adapter *);
272static int	em_82547_fifo_workaround(struct adapter *, int);
273static void	em_82547_update_fifo_head(struct adapter *, int);
274static int	em_82547_tx_fifo_reset(struct adapter *);
275static void	em_82547_move_tail(void *);
276static int	em_dma_malloc(struct adapter *, bus_size_t,
277		    struct em_dma_alloc *, int);
278static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
279static void	em_print_debug_info(struct adapter *);
280static int 	em_is_valid_ether_addr(uint8_t *);
281static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
282static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
284		    PDESC_ARRAY desc_array);
285static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287		    const char *, struct em_int_delay_info *, int, int);
288/* Management and WOL Support */
289static void	em_init_manageability(struct adapter *);
290static void	em_release_manageability(struct adapter *);
291static void     em_get_hw_control(struct adapter *);
292static void     em_release_hw_control(struct adapter *);
293static void     em_enable_wakeup(device_t);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297static void	em_intr(void *);
298#else
299static int	em_intr_fast(void *);
300static void	em_add_rx_process_limit(struct adapter *, const char *,
301		    const char *, int *, int);
302static void	em_handle_rxtx(void *context, int pending);
303static void	em_handle_link(void *context, int pending);
304#endif
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	{0, 0}
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325static devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342static int em_rxd = EM_DEFAULT_RXD;
343static int em_txd = EM_DEFAULT_TXD;
344static int em_smart_pwr_down = FALSE;
345
346TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
348TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rxd", &em_rxd);
351TUNABLE_INT("hw.em.txd", &em_txd);
352TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
353#ifndef DEVICE_POLLING
354/* How many packets rxeof tries to clean at a time */
355static int em_rx_process_limit = 100;
356TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357#endif
358/* Global used in WOL setup with multiport cards */
359static int global_quad_port_a = 0;
360
361/*********************************************************************
362 *  Device identification routine
363 *
364 *  em_probe determines if the driver should be loaded on
365 *  adapter based on PCI vendor/device id of the adapter.
366 *
367 *  return BUS_PROBE_DEFAULT on success, positive on failure
368 *********************************************************************/
369
370static int
371em_probe(device_t dev)
372{
373	char		adapter_name[60];
374	uint16_t	pci_vendor_id = 0;
375	uint16_t	pci_device_id = 0;
376	uint16_t	pci_subvendor_id = 0;
377	uint16_t	pci_subdevice_id = 0;
378	em_vendor_info_t *ent;
379
380	INIT_DEBUGOUT("em_probe: begin");
381
382	pci_vendor_id = pci_get_vendor(dev);
383	if (pci_vendor_id != EM_VENDOR_ID)
384		return (ENXIO);
385
386	pci_device_id = pci_get_device(dev);
387	pci_subvendor_id = pci_get_subvendor(dev);
388	pci_subdevice_id = pci_get_subdevice(dev);
389
390	ent = em_vendor_info_array;
391	while (ent->vendor_id != 0) {
392		if ((pci_vendor_id == ent->vendor_id) &&
393		    (pci_device_id == ent->device_id) &&
394
395		    ((pci_subvendor_id == ent->subvendor_id) ||
396		    (ent->subvendor_id == PCI_ANY_ID)) &&
397
398		    ((pci_subdevice_id == ent->subdevice_id) ||
399		    (ent->subdevice_id == PCI_ANY_ID))) {
400			sprintf(adapter_name, "%s %s",
401				em_strings[ent->index],
402				em_driver_version);
403			device_set_desc_copy(dev, adapter_name);
404			return (BUS_PROBE_DEFAULT);
405		}
406		ent++;
407	}
408
409	return (ENXIO);
410}
411
412/*********************************************************************
413 *  Device initialization routine
414 *
415 *  The attach entry point is called when the driver is being loaded.
416 *  This routine identifies the type of hardware, allocates all resources
417 *  and initializes the hardware.
418 *
419 *  return 0 on success, positive on failure
420 *********************************************************************/
421
422static int
423em_attach(device_t dev)
424{
425	struct adapter	*adapter;
426	int		tsize, rsize;
427	int		error = 0;
428	u16		eeprom_data, device_id;
429
430	INIT_DEBUGOUT("em_attach: begin");
431
432	adapter = device_get_softc(dev);
433	adapter->dev = adapter->osdep.dev = dev;
434	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
435
436	/* SYSCTL stuff */
437	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
438	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
439	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
440	    em_sysctl_debug_info, "I", "Debug Information");
441
442	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
443	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
444	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
445	    em_sysctl_stats, "I", "Statistics");
446
447	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
448	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
449
450	/* Determine hardware and mac info */
451	em_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (em_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/*
461	** For ICH8 and family we need to
462	** map the flash memory, and this
463	** must happen after the MAC is
464	** identified
465	*/
466	if ((adapter->hw.mac.type == e1000_ich8lan) ||
467	    (adapter->hw.mac.type == e1000_ich9lan)) {
468		int rid = EM_BAR_TYPE_FLASH;
469		adapter->flash_mem = bus_alloc_resource_any(dev,
470		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
471		/* This is used in the shared code */
472		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
473		adapter->osdep.flash_bus_space_tag =
474		    rman_get_bustag(adapter->flash_mem);
475		adapter->osdep.flash_bus_space_handle =
476		    rman_get_bushandle(adapter->flash_mem);
477	}
478
479	/* Do Shared Code initialization */
480	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
481		device_printf(dev, "Setup of Shared code failed\n");
482		error = ENXIO;
483		goto err_pci;
484	}
485
486	e1000_get_bus_info(&adapter->hw);
487
488	/* Set up some sysctls for the tunable interrupt delays */
489	em_add_int_delay_sysctl(adapter, "rx_int_delay",
490	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
491	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
492	em_add_int_delay_sysctl(adapter, "tx_int_delay",
493	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
494	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
495	if (adapter->hw.mac.type >= e1000_82540) {
496		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
497		    "receive interrupt delay limit in usecs",
498		    &adapter->rx_abs_int_delay,
499		    E1000_REGISTER(&adapter->hw, E1000_RADV),
500		    em_rx_abs_int_delay_dflt);
501		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
502		    "transmit interrupt delay limit in usecs",
503		    &adapter->tx_abs_int_delay,
504		    E1000_REGISTER(&adapter->hw, E1000_TADV),
505		    em_tx_abs_int_delay_dflt);
506	}
507
508#ifndef DEVICE_POLLING
509	/* Sysctls for limiting the amount of work done in the taskqueue */
510	em_add_rx_process_limit(adapter, "rx_processing_limit",
511	    "max number of rx packets to process", &adapter->rx_process_limit,
512	    em_rx_process_limit);
513#endif
514
515	/*
516	 * Validate number of transmit and receive descriptors. It
517	 * must not exceed hardware maximum, and must be multiple
518	 * of E1000_DBA_ALIGN.
519	 */
520	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
521	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
522	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
523	    (em_txd < EM_MIN_TXD)) {
524		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
525		    EM_DEFAULT_TXD, em_txd);
526		adapter->num_tx_desc = EM_DEFAULT_TXD;
527	} else
528		adapter->num_tx_desc = em_txd;
529	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
530	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
531	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
532	    (em_rxd < EM_MIN_RXD)) {
533		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
534		    EM_DEFAULT_RXD, em_rxd);
535		adapter->num_rx_desc = EM_DEFAULT_RXD;
536	} else
537		adapter->num_rx_desc = em_rxd;
538
539	adapter->hw.mac.autoneg = DO_AUTO_NEG;
540	adapter->hw.phy.wait_for_link = FALSE;
541	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
542	adapter->rx_buffer_len = 2048;
543
544	e1000_init_script_state_82541(&adapter->hw, TRUE);
545	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
546
547	/* Copper options */
548	if (adapter->hw.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the max frame size assuming standard ethernet
556	 * sized frames.
557	 */
558	adapter->hw.mac.max_frame_size =
559	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560
561	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
562
563	/*
564	 * This controls when hardware reports transmit completion
565	 * status.
566	 */
567	adapter->hw.mac.report_tx_early = 1;
568
569	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
570	    EM_DBA_ALIGN);
571
572	/* Allocate Transmit Descriptor ring */
573	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
574		device_printf(dev, "Unable to allocate tx_desc memory\n");
575		error = ENOMEM;
576		goto err_tx_desc;
577	}
578	adapter->tx_desc_base =
579	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
580
581	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
582	    EM_DBA_ALIGN);
583
584	/* Allocate Receive Descriptor ring */
585	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
586		device_printf(dev, "Unable to allocate rx_desc memory\n");
587		error = ENOMEM;
588		goto err_rx_desc;
589	}
590	adapter->rx_desc_base =
591	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_hw_init;
605		}
606	}
607
608	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
609		device_printf(dev, "EEPROM read error "
610		    "reading part number\n");
611		error = EIO;
612		goto err_hw_init;
613	}
614
615	/* Initialize the hardware */
616	if (em_hardware_init(adapter)) {
617		device_printf(dev, "Unable to initialize the hardware\n");
618		error = EIO;
619		goto err_hw_init;
620	}
621
622	/* Copy the permanent MAC address out of the EEPROM */
623	if (e1000_read_mac_addr(&adapter->hw) < 0) {
624		device_printf(dev, "EEPROM read error while reading MAC"
625		    " address\n");
626		error = EIO;
627		goto err_hw_init;
628	}
629
630	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
631		device_printf(dev, "Invalid MAC address\n");
632		error = EIO;
633		goto err_hw_init;
634	}
635
636	/* Setup OS specific network interface */
637	em_setup_interface(dev, adapter);
638
639	em_allocate_intr(adapter);
640
641	/* Initialize statistics */
642	em_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	em_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	switch (adapter->hw.mac.type) {
659
660	case e1000_82542:
661	case e1000_82543:
662		break;
663	case e1000_82546:
664	case e1000_82546_rev_3:
665	case e1000_82571:
666	case e1000_80003es2lan:
667		if (adapter->hw.bus.func == 1)
668			e1000_read_nvm(&adapter->hw,
669			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
670		else
671			e1000_read_nvm(&adapter->hw,
672			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
673		eeprom_data &= EM_EEPROM_APME;
674		break;
675	default:
676		/* APME bit in EEPROM is mapped to WUC.APME */
677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
678		    E1000_WUC_APME;
679		break;
680	}
681	if (eeprom_data)
682		adapter->wol = E1000_WUFC_MAG;
683	/*
684         * We have the eeprom settings, now apply the special cases
685         * where the eeprom may be wrong or the board won't support
686         * wake on lan on a particular port
687	 */
688	device_id = pci_get_device(dev);
689        switch (device_id) {
690	case E1000_DEV_ID_82546GB_PCIE:
691		adapter->wol = 0;
692		break;
693	case E1000_DEV_ID_82546EB_FIBER:
694	case E1000_DEV_ID_82546GB_FIBER:
695	case E1000_DEV_ID_82571EB_FIBER:
696		/* Wake events only supported on port A for dual fiber
697		 * regardless of eeprom setting */
698		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
699		    E1000_STATUS_FUNC_1)
700			adapter->wol = 0;
701		break;
702	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
706                /* if quad port adapter, disable WoL on all but port A */
707		if (global_quad_port_a != 0)
708			adapter->wol = 0;
709		/* Reset for multiple quad port adapters */
710		if (++global_quad_port_a == 4)
711			global_quad_port_a = 0;
712                break;
713	}
714
715	/* Do we need workaround for 82544 PCI-X adapter? */
716	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
717	    adapter->hw.mac.type == e1000_82544)
718		adapter->pcix_82544 = TRUE;
719	else
720		adapter->pcix_82544 = FALSE;
721
722	/* Tell the stack that the interface is not active */
723	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
724
725	INIT_DEBUGOUT("em_attach: end");
726
727	return (0);
728
729err_hw_init:
730	em_release_hw_control(adapter);
731	e1000_remove_device(&adapter->hw);
732	em_dma_free(adapter, &adapter->rxdma);
733err_rx_desc:
734	em_dma_free(adapter, &adapter->txdma);
735err_tx_desc:
736err_pci:
737	em_free_intr(adapter);
738	em_free_pci_resources(adapter);
739	EM_LOCK_DESTROY(adapter);
740
741	return (error);
742}
743
744/*********************************************************************
745 *  Device removal routine
746 *
747 *  The detach entry point is called when the driver is being removed.
748 *  This routine stops the adapter and deallocates all the resources
749 *  that were allocated for driver operation.
750 *
751 *  return 0 on success, positive on failure
752 *********************************************************************/
753
754static int
755em_detach(device_t dev)
756{
757	struct adapter	*adapter = device_get_softc(dev);
758	struct ifnet	*ifp = adapter->ifp;
759
760	INIT_DEBUGOUT("em_detach: begin");
761
762#ifdef DEVICE_POLLING
763	if (ifp->if_capenable & IFCAP_POLLING)
764		ether_poll_deregister(ifp);
765#endif
766
767	em_disable_intr(adapter);
768	em_free_intr(adapter);
769	EM_LOCK(adapter);
770	adapter->in_detach = 1;
771	em_stop(adapter);
772	e1000_phy_hw_reset(&adapter->hw);
773
774	em_release_manageability(adapter);
775
776	if (((adapter->hw.mac.type == e1000_82573) ||
777	    (adapter->hw.mac.type == e1000_ich8lan) ||
778	    (adapter->hw.mac.type == e1000_ich9lan)) &&
779	    e1000_check_mng_mode(&adapter->hw))
780		em_release_hw_control(adapter);
781
782	if (adapter->wol) {
783		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
784		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
785		em_enable_wakeup(dev);
786	}
787
788	EM_UNLOCK(adapter);
789	ether_ifdetach(adapter->ifp);
790
791	callout_drain(&adapter->timer);
792	callout_drain(&adapter->tx_fifo_timer);
793
794	em_free_pci_resources(adapter);
795	bus_generic_detach(dev);
796	if_free(ifp);
797
798	e1000_remove_device(&adapter->hw);
799	em_free_transmit_structures(adapter);
800	em_free_receive_structures(adapter);
801
802	/* Free Transmit Descriptor ring */
803	if (adapter->tx_desc_base) {
804		em_dma_free(adapter, &adapter->txdma);
805		adapter->tx_desc_base = NULL;
806	}
807
808	/* Free Receive Descriptor ring */
809	if (adapter->rx_desc_base) {
810		em_dma_free(adapter, &adapter->rxdma);
811		adapter->rx_desc_base = NULL;
812	}
813
814	EM_LOCK_DESTROY(adapter);
815
816	return (0);
817}
818
819/*********************************************************************
820 *
821 *  Shutdown entry point
822 *
823 **********************************************************************/
824
825static int
826em_shutdown(device_t dev)
827{
828	return em_suspend(dev);
829}
830
831/*
832 * Suspend/resume device methods.
833 */
834static int
835em_suspend(device_t dev)
836{
837	struct adapter *adapter = device_get_softc(dev);
838
839	EM_LOCK(adapter);
840	em_stop(adapter);
841
842        em_release_manageability(adapter);
843
844        if (((adapter->hw.mac.type == e1000_82573) ||
845            (adapter->hw.mac.type == e1000_ich8lan) ||
846            (adapter->hw.mac.type == e1000_ich9lan)) &&
847            e1000_check_mng_mode(&adapter->hw))
848                em_release_hw_control(adapter);
849
850        if (adapter->wol) {
851                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
852                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
853                em_enable_wakeup(dev);
854        }
855
856	EM_UNLOCK(adapter);
857
858	return bus_generic_suspend(dev);
859}
860
861static int
862em_resume(device_t dev)
863{
864	struct adapter *adapter = device_get_softc(dev);
865	struct ifnet *ifp = adapter->ifp;
866
867	EM_LOCK(adapter);
868	em_init_locked(adapter);
869	em_init_manageability(adapter);
870
871	if ((ifp->if_flags & IFF_UP) &&
872	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
873		em_start_locked(ifp);
874
875	EM_UNLOCK(adapter);
876
877	return bus_generic_resume(dev);
878}
879
880
881/*********************************************************************
882 *  Transmit entry point
883 *
884 *  em_start is called by the stack to initiate a transmit.
885 *  The driver will remain in this routine as long as there are
886 *  packets to transmit and transmit resources are available.
887 *  In case resources are not available stack is notified and
888 *  the packet is requeued.
889 **********************************************************************/
890
891static void
892em_start_locked(struct ifnet *ifp)
893{
894	struct adapter	*adapter = ifp->if_softc;
895	struct mbuf	*m_head;
896
897	EM_LOCK_ASSERT(adapter);
898
899	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
900	    IFF_DRV_RUNNING)
901		return;
902	if (!adapter->link_active)
903		return;
904
905	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
906
907		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
908		if (m_head == NULL)
909			break;
910		/*
911		 *  Encapsulation can modify our pointer, and or make it
912		 *  NULL on failure.  In that event, we can't requeue.
913		 *
914		 *  We now use a pointer to accomodate legacy and
915		 *  advanced transmit functions.
916		 */
917		if (adapter->em_xmit(adapter, &m_head)) {
918			if (m_head == NULL)
919				break;
920			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
921			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
922			break;
923		}
924
925		/* Send a copy of the frame to the BPF listener */
926		ETHER_BPF_MTAP(ifp, m_head);
927
928		/* Set timeout in case hardware has problems transmitting. */
929		adapter->watchdog_timer = EM_TX_TIMEOUT;
930	}
931}
932
933static void
934em_start(struct ifnet *ifp)
935{
936	struct adapter *adapter = ifp->if_softc;
937
938	EM_LOCK(adapter);
939	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
940		em_start_locked(ifp);
941	EM_UNLOCK(adapter);
942}
943
944/*********************************************************************
945 *  Ioctl entry point
946 *
947 *  em_ioctl is called when the user wants to configure the
948 *  interface.
949 *
950 *  return 0 on success, positive on failure
951 **********************************************************************/
952
953static int
954em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
955{
956	struct adapter	*adapter = ifp->if_softc;
957	struct ifreq *ifr = (struct ifreq *)data;
958	struct ifaddr *ifa = (struct ifaddr *)data;
959	int error = 0;
960
961	if (adapter->in_detach)
962		return (error);
963
964	switch (command) {
965	case SIOCSIFADDR:
966		if (ifa->ifa_addr->sa_family == AF_INET) {
967			/*
968			 * XXX
969			 * Since resetting hardware takes a very long time
970			 * and results in link renegotiation we only
971			 * initialize the hardware only when it is absolutely
972			 * required.
973			 */
974			ifp->if_flags |= IFF_UP;
975			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
976				EM_LOCK(adapter);
977				em_init_locked(adapter);
978				EM_UNLOCK(adapter);
979			}
980			arp_ifinit(ifp, ifa);
981		} else
982			error = ether_ioctl(ifp, command, data);
983		break;
984	case SIOCSIFMTU:
985	    {
986		int max_frame_size;
987		uint16_t eeprom_data = 0;
988
989		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
990
991		EM_LOCK(adapter);
992		switch (adapter->hw.mac.type) {
993		case e1000_82573:
994			/*
995			 * 82573 only supports jumbo frames
996			 * if ASPM is disabled.
997			 */
998			e1000_read_nvm(&adapter->hw,
999			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1000			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1001				max_frame_size = ETHER_MAX_LEN;
1002				break;
1003			}
1004			/* Allow Jumbo frames - fall thru */
1005		case e1000_82571:
1006		case e1000_82572:
1007		case e1000_ich9lan:
1008		case e1000_82575:
1009		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1010			max_frame_size = 9234;
1011			break;
1012			/* Adapters that do not support jumbo frames */
1013		case e1000_82542:
1014		case e1000_ich8lan:
1015			max_frame_size = ETHER_MAX_LEN;
1016			break;
1017		default:
1018			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1019		}
1020		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1021		    ETHER_CRC_LEN) {
1022			EM_UNLOCK(adapter);
1023			error = EINVAL;
1024			break;
1025		}
1026
1027		ifp->if_mtu = ifr->ifr_mtu;
1028		adapter->hw.mac.max_frame_size =
1029		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1030		em_init_locked(adapter);
1031		EM_UNLOCK(adapter);
1032		break;
1033	    }
1034	case SIOCSIFFLAGS:
1035		IOCTL_DEBUGOUT("ioctl rcv'd:\
1036		    SIOCSIFFLAGS (Set Interface Flags)");
1037		EM_LOCK(adapter);
1038		if (ifp->if_flags & IFF_UP) {
1039			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1040				if ((ifp->if_flags ^ adapter->if_flags) &
1041				    IFF_PROMISC) {
1042					em_disable_promisc(adapter);
1043					em_set_promisc(adapter);
1044				}
1045			} else
1046				em_init_locked(adapter);
1047		} else
1048			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1049				em_stop(adapter);
1050		adapter->if_flags = ifp->if_flags;
1051		EM_UNLOCK(adapter);
1052		break;
1053	case SIOCADDMULTI:
1054	case SIOCDELMULTI:
1055		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1056		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1057			EM_LOCK(adapter);
1058			em_disable_intr(adapter);
1059			em_set_multi(adapter);
1060			if (adapter->hw.mac.type == e1000_82542 &&
1061	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1062				em_initialize_receive_unit(adapter);
1063			}
1064#ifdef DEVICE_POLLING
1065			if (!(ifp->if_capenable & IFCAP_POLLING))
1066#endif
1067				em_enable_intr(adapter);
1068			EM_UNLOCK(adapter);
1069		}
1070		break;
1071	case SIOCSIFMEDIA:
1072		/* Check SOL/IDER usage */
1073		EM_LOCK(adapter);
1074		if (e1000_check_reset_block(&adapter->hw)) {
1075			EM_UNLOCK(adapter);
1076			device_printf(adapter->dev, "Media change is"
1077			    " blocked due to SOL/IDER session.\n");
1078			break;
1079		}
1080		EM_UNLOCK(adapter);
1081	case SIOCGIFMEDIA:
1082		IOCTL_DEBUGOUT("ioctl rcv'd: \
1083		    SIOCxIFMEDIA (Get/Set Interface Media)");
1084		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1085		break;
1086	case SIOCSIFCAP:
1087	    {
1088		int mask, reinit;
1089
1090		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1091		reinit = 0;
1092		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1093#ifdef DEVICE_POLLING
1094		if (mask & IFCAP_POLLING) {
1095			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1096				error = ether_poll_register(em_poll, ifp);
1097				if (error)
1098					return (error);
1099				EM_LOCK(adapter);
1100				em_disable_intr(adapter);
1101				ifp->if_capenable |= IFCAP_POLLING;
1102				EM_UNLOCK(adapter);
1103			} else {
1104				error = ether_poll_deregister(ifp);
1105				/* Enable interrupt even in error case */
1106				EM_LOCK(adapter);
1107				em_enable_intr(adapter);
1108				ifp->if_capenable &= ~IFCAP_POLLING;
1109				EM_UNLOCK(adapter);
1110			}
1111		}
1112#endif
1113		if (mask & IFCAP_HWCSUM) {
1114			ifp->if_capenable ^= IFCAP_HWCSUM;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_TSO4) {
1118			ifp->if_capenable ^= IFCAP_TSO4;
1119			reinit = 1;
1120		}
1121		if (mask & IFCAP_VLAN_HWTAGGING) {
1122			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1123			reinit = 1;
1124		}
1125		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1126			em_init(adapter);
1127		VLAN_CAPABILITIES(ifp);
1128		break;
1129	    }
1130	default:
1131		error = ether_ioctl(ifp, command, data);
1132		break;
1133	}
1134
1135	return (error);
1136}
1137
1138/*********************************************************************
1139 *  Watchdog timer:
1140 *
1141 *  This routine is called from the local timer every second.
1142 *  As long as transmit descriptors are being cleaned the value
1143 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1144 *  and we then reset the device.
1145 *
1146 **********************************************************************/
1147
1148static void
1149em_watchdog(struct adapter *adapter)
1150{
1151
1152	EM_LOCK_ASSERT(adapter);
1153
1154	/*
1155	** The timer is set to 5 every time start queues a packet.
1156	** Then txeof keeps resetting to 5 as long as it cleans at
1157	** least one descriptor.
1158	** Finally, anytime all descriptors are clean the timer is
1159	** set to 0.
1160	*/
1161	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1162		return;
1163
1164	/* If we are in this routine because of pause frames, then
1165	 * don't reset the hardware.
1166	 */
1167	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1168	    E1000_STATUS_TXOFF) {
1169		adapter->watchdog_timer = EM_TX_TIMEOUT;
1170		return;
1171	}
1172
1173	if (e1000_check_for_link(&adapter->hw) == 0)
1174		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1175	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1176	adapter->watchdog_events++;
1177
1178	em_init_locked(adapter);
1179}
1180
1181/*********************************************************************
1182 *  Init entry point
1183 *
1184 *  This routine is used in two ways. It is used by the stack as
1185 *  init entry point in network interface structure. It is also used
1186 *  by the driver as a hw/sw initialization routine to get to a
1187 *  consistent state.
1188 *
1189 *  return 0 on success, positive on failure
1190 **********************************************************************/
1191
1192static void
1193em_init_locked(struct adapter *adapter)
1194{
1195	struct ifnet	*ifp = adapter->ifp;
1196	device_t	dev = adapter->dev;
1197	uint32_t	pba;
1198
1199	INIT_DEBUGOUT("em_init: begin");
1200
1201	EM_LOCK_ASSERT(adapter);
1202
1203	em_stop(adapter);
1204
1205	/*
1206	 * Packet Buffer Allocation (PBA)
1207	 * Writing PBA sets the receive portion of the buffer
1208	 * the remainder is used for the transmit buffer.
1209	 *
1210	 * Devices before the 82547 had a Packet Buffer of 64K.
1211	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1212	 * After the 82547 the buffer was reduced to 40K.
1213	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1214	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1215	 */
1216	switch (adapter->hw.mac.type) {
1217	case e1000_82547:
1218	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1219		if (adapter->hw.mac.max_frame_size > 8192)
1220			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1221		else
1222			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1223		adapter->tx_fifo_head = 0;
1224		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1225		adapter->tx_fifo_size =
1226		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1227		break;
1228	/* Total Packet Buffer on these is 48K */
1229	case e1000_82571:
1230	case e1000_82572:
1231	case e1000_82575:
1232	case e1000_80003es2lan:
1233			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1234		break;
1235	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1236			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1237		break;
1238	case e1000_ich9lan:
1239#define E1000_PBA_10K	0x000A
1240		pba = E1000_PBA_10K;
1241		break;
1242	case e1000_ich8lan:
1243		pba = E1000_PBA_8K;
1244		break;
1245	default:
1246		/* Devices before 82547 had a Packet Buffer of 64K.   */
1247		if (adapter->hw.mac.max_frame_size > 8192)
1248			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1249		else
1250			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1251	}
1252
1253	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1254	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1255
1256	/* Get the latest mac address, User can use a LAA */
1257        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1258              ETHER_ADDR_LEN);
1259
1260	/* Put the address into the Receive Address Array */
1261	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1262
1263	/*
1264	 * With 82571 controllers, LAA may be overwritten
1265	 * due to controller reset from the other port.
1266	 */
1267	if (adapter->hw.mac.type == e1000_82571)
1268                e1000_set_laa_state_82571(&adapter->hw, TRUE);
1269
1270	/* Initialize the hardware */
1271	if (em_hardware_init(adapter)) {
1272		device_printf(dev, "Unable to initialize the hardware\n");
1273		return;
1274	}
1275	em_update_link_status(adapter);
1276
1277	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1278		em_enable_vlans(adapter);
1279
1280	/* Set hardware offload abilities */
1281	ifp->if_hwassist = 0;
1282	if (adapter->hw.mac.type >= e1000_82543) {
1283		if (ifp->if_capenable & IFCAP_TXCSUM)
1284			ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1285		if (ifp->if_capenable & IFCAP_TSO4)
1286			ifp->if_hwassist |= CSUM_TSO;
1287	}
1288
1289	/* Configure for OS presence */
1290	em_init_manageability(adapter);
1291
1292	/* Prepare transmit descriptors and buffers */
1293	if (em_setup_transmit_structures(adapter)) {
1294		device_printf(dev, "Could not setup transmit structures\n");
1295		em_stop(adapter);
1296		return;
1297	}
1298	em_initialize_transmit_unit(adapter);
1299
1300	/* Setup Multicast table */
1301	em_set_multi(adapter);
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Don't lose promiscuous settings */
1312	em_set_promisc(adapter);
1313
1314	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1315	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1316
1317	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1318	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1319
1320#ifdef DEVICE_POLLING
1321	/*
1322	 * Only enable interrupts if we are not polling, make sure
1323	 * they are off otherwise.
1324	 */
1325	if (ifp->if_capenable & IFCAP_POLLING)
1326		em_disable_intr(adapter);
1327	else
1328#endif /* DEVICE_POLLING */
1329		em_enable_intr(adapter);
1330
1331	/* Don't reset the phy next time init gets called */
1332	adapter->hw.phy.reset_disable = TRUE;
1333}
1334
1335static void
1336em_init(void *arg)
1337{
1338	struct adapter *adapter = arg;
1339
1340	EM_LOCK(adapter);
1341	em_init_locked(adapter);
1342	EM_UNLOCK(adapter);
1343}
1344
1345
1346#ifdef DEVICE_POLLING
1347/*********************************************************************
1348 *
1349 *  Legacy polling routine
1350 *
1351 *********************************************************************/
1352static void
1353em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1354{
1355	struct adapter *adapter = ifp->if_softc;
1356	uint32_t reg_icr;
1357
1358	EM_LOCK(adapter);
1359	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1360		EM_UNLOCK(adapter);
1361		return;
1362	}
1363
1364	if (cmd == POLL_AND_CHECK_STATUS) {
1365		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1366		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1367			callout_stop(&adapter->timer);
1368			adapter->hw.mac.get_link_status = 1;
1369			e1000_check_for_link(&adapter->hw);
1370			em_update_link_status(adapter);
1371			callout_reset(&adapter->timer, hz,
1372			    em_local_timer, adapter);
1373		}
1374	}
1375	em_rxeof(adapter, count);
1376	em_txeof(adapter);
1377
1378	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1379		em_start_locked(ifp);
1380	EM_UNLOCK(adapter);
1381}
1382
1383/*********************************************************************
1384 *
1385 *  Legacy Interrupt Service routine
1386 *
1387 *********************************************************************/
1388
1389static void
1390em_intr(void *arg)
1391{
1392	struct adapter	*adapter = arg;
1393	struct ifnet	*ifp;
1394	uint32_t	reg_icr;
1395
1396	EM_LOCK(adapter);
1397	ifp = adapter->ifp;
1398
1399	if (ifp->if_capenable & IFCAP_POLLING) {
1400		EM_UNLOCK(adapter);
1401		return;
1402	}
1403
1404	for (;;) {
1405		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1406
1407		if (adapter->hw.mac.type >= e1000_82571 &&
1408	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1409			break;
1410		else if (reg_icr == 0)
1411			break;
1412
1413		/*
1414		 * XXX: some laptops trigger several spurious interrupts
1415		 * on em(4) when in the resume cycle. The ICR register
1416		 * reports all-ones value in this case. Processing such
1417		 * interrupts would lead to a freeze. I don't know why.
1418		 */
1419		if (reg_icr == 0xffffffff)
1420			break;
1421
1422		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1423			em_rxeof(adapter, -1);
1424			em_txeof(adapter);
1425		}
1426
1427		/* Link status change */
1428		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1429			callout_stop(&adapter->timer);
1430			adapter->hw.mac.get_link_status = 1;
1431			e1000_check_for_link(&adapter->hw);
1432			em_update_link_status(adapter);
1433			callout_reset(&adapter->timer, hz,
1434			    em_local_timer, adapter);
1435		}
1436
1437		if (reg_icr & E1000_ICR_RXO)
1438			adapter->rx_overruns++;
1439	}
1440
1441	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1442	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443		em_start_locked(ifp);
1444	EM_UNLOCK(adapter);
1445}
1446
1447#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1448
1449static void
1450em_handle_link(void *context, int pending)
1451{
1452	struct adapter	*adapter = context;
1453	struct ifnet *ifp;
1454
1455	ifp = adapter->ifp;
1456
1457	EM_LOCK(adapter);
1458	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1459		EM_UNLOCK(adapter);
1460		return;
1461	}
1462
1463	callout_stop(&adapter->timer);
1464	adapter->hw.mac.get_link_status = 1;
1465	e1000_check_for_link(&adapter->hw);
1466	em_update_link_status(adapter);
1467	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1468	EM_UNLOCK(adapter);
1469}
1470
1471static void
1472em_handle_rxtx(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp;
1476
1477	NET_LOCK_GIANT();
1478	ifp = adapter->ifp;
1479
1480	/*
1481	 * TODO:
1482	 * It should be possible to run the tx clean loop without the lock.
1483	 */
1484	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1485		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1486			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1487		EM_LOCK(adapter);
1488		em_txeof(adapter);
1489
1490		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491			em_start_locked(ifp);
1492		EM_UNLOCK(adapter);
1493	}
1494
1495	em_enable_intr(adapter);
1496	NET_UNLOCK_GIANT();
1497}
1498
1499/*********************************************************************
1500 *
1501 *  Fast Interrupt Service routine
1502 *
1503 *********************************************************************/
1504static int
1505em_intr_fast(void *arg)
1506{
1507	struct adapter	*adapter = arg;
1508	struct ifnet	*ifp;
1509	uint32_t	reg_icr;
1510
1511	ifp = adapter->ifp;
1512
1513	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514
1515	/* Hot eject?  */
1516	if (reg_icr == 0xffffffff)
1517		return (FILTER_STRAY);
1518
1519	/* Definitely not our interrupt.  */
1520	if (reg_icr == 0x0)
1521		return (FILTER_STRAY);
1522
1523	/*
1524	 * Starting with the 82571 chip, bit 31 should be used to
1525	 * determine whether the interrupt belongs to us.
1526	 */
1527	if (adapter->hw.mac.type >= e1000_82571 &&
1528	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1529		return (FILTER_STRAY);
1530
1531	/*
1532	 * Mask interrupts until the taskqueue is finished running.  This is
1533	 * cheap, just assume that it is needed.  This also works around the
1534	 * MSI message reordering errata on certain systems.
1535	 */
1536	em_disable_intr(adapter);
1537	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1538
1539	/* Link status change */
1540	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1541		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1542
1543	if (reg_icr & E1000_ICR_RXO)
1544		adapter->rx_overruns++;
1545	return (FILTER_HANDLED);
1546}
1547#endif /* ! DEVICE_POLLING */
1548
1549/*********************************************************************
1550 *
1551 *  Media Ioctl callback
1552 *
1553 *  This routine is called whenever the user queries the status of
1554 *  the interface using ifconfig.
1555 *
1556 **********************************************************************/
1557static void
1558em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1559{
1560	struct adapter *adapter = ifp->if_softc;
1561	u_char fiber_type = IFM_1000_SX;
1562
1563	INIT_DEBUGOUT("em_media_status: begin");
1564
1565	EM_LOCK(adapter);
1566	e1000_check_for_link(&adapter->hw);
1567	em_update_link_status(adapter);
1568
1569	ifmr->ifm_status = IFM_AVALID;
1570	ifmr->ifm_active = IFM_ETHER;
1571
1572	if (!adapter->link_active) {
1573		EM_UNLOCK(adapter);
1574		return;
1575	}
1576
1577	ifmr->ifm_status |= IFM_ACTIVE;
1578
1579	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1580	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1581		if (adapter->hw.mac.type == e1000_82545)
1582			fiber_type = IFM_1000_LX;
1583		ifmr->ifm_active |= fiber_type | IFM_FDX;
1584	} else {
1585		switch (adapter->link_speed) {
1586		case 10:
1587			ifmr->ifm_active |= IFM_10_T;
1588			break;
1589		case 100:
1590			ifmr->ifm_active |= IFM_100_TX;
1591			break;
1592		case 1000:
1593			ifmr->ifm_active |= IFM_1000_T;
1594			break;
1595		}
1596		if (adapter->link_duplex == FULL_DUPLEX)
1597			ifmr->ifm_active |= IFM_FDX;
1598		else
1599			ifmr->ifm_active |= IFM_HDX;
1600	}
1601	EM_UNLOCK(adapter);
1602}
1603
1604/*********************************************************************
1605 *
1606 *  Media Ioctl callback
1607 *
1608 *  This routine is called when the user changes speed/duplex using
1609 *  media/mediopt option with ifconfig.
1610 *
1611 **********************************************************************/
1612static int
1613em_media_change(struct ifnet *ifp)
1614{
1615	struct adapter *adapter = ifp->if_softc;
1616	struct ifmedia  *ifm = &adapter->media;
1617
1618	INIT_DEBUGOUT("em_media_change: begin");
1619
1620	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1621		return (EINVAL);
1622
1623	EM_LOCK(adapter);
1624	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1625	case IFM_AUTO:
1626		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1627		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1628		break;
1629	case IFM_1000_LX:
1630	case IFM_1000_SX:
1631	case IFM_1000_T:
1632		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1633		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1634		break;
1635	case IFM_100_TX:
1636		adapter->hw.mac.autoneg = FALSE;
1637		adapter->hw.phy.autoneg_advertised = 0;
1638		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1639			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1640		else
1641			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1642		break;
1643	case IFM_10_T:
1644		adapter->hw.mac.autoneg = FALSE;
1645		adapter->hw.phy.autoneg_advertised = 0;
1646		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1647			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1648		else
1649			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1650		break;
1651	default:
1652		device_printf(adapter->dev, "Unsupported media type\n");
1653	}
1654
1655	/* As the speed/duplex settings my have changed we need to
1656	 * reset the PHY.
1657	 */
1658	adapter->hw.phy.reset_disable = FALSE;
1659
1660	em_init_locked(adapter);
1661	EM_UNLOCK(adapter);
1662
1663	return (0);
1664}
1665
1666/*********************************************************************
1667 *
1668 *  This routine maps the mbufs to tx descriptors.
1669 *
1670 *  return 0 on success, positive on failure
1671 **********************************************************************/
1672
1673static int
1674em_encap(struct adapter *adapter, struct mbuf **m_headp)
1675{
1676	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1677	bus_dmamap_t		map;
1678	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1679	struct e1000_tx_desc	*ctxd = NULL;
1680	struct mbuf		*m_head;
1681	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1682	int			nsegs, i, j, first, last = 0;
1683	int			error, do_tso, tso_desc = 0;
1684
1685	m_head = *m_headp;
1686	txd_upper = txd_lower = txd_used = txd_saved = 0;
1687
1688	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1689
1690        /*
1691         * Force a cleanup if number of TX descriptors
1692         * available hits the threshold
1693         */
1694	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1695		em_txeof(adapter);
1696		/* Now do we at least have a minimal? */
1697		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1698			adapter->no_tx_desc_avail1++;
1699			return (ENOBUFS);
1700		}
1701	}
1702
1703
1704	/*
1705	 * TSO workaround:
1706	 *  If an mbuf is only header we need
1707	 *     to pull 4 bytes of data into it.
1708	 */
1709	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1710		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1711		*m_headp = m_head;
1712		if (m_head == NULL)
1713			return (ENOBUFS);
1714	}
1715
1716	/*
1717	 * Map the packet for DMA
1718	 *
1719	 * Capture the first descriptor index,
1720	 * this descriptor will have the index
1721	 * of the EOP which is the only one that
1722	 * now gets a DONE bit writeback.
1723	 */
1724	first = adapter->next_avail_tx_desc;
1725	tx_buffer = &adapter->tx_buffer_area[first];
1726	tx_buffer_mapped = tx_buffer;
1727	map = tx_buffer->map;
1728
1729	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1730	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1731
1732	/*
1733	 * There are two types of errors we can (try) to handle:
1734	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1735	 *   out of segments.  Defragment the mbuf chain and try again.
1736	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1737	 *   at this point in time.  Defer sending and try again later.
1738	 * All other errors, in particular EINVAL, are fatal and prevent the
1739	 * mbuf chain from ever going through.  Drop it and report error.
1740	 */
1741	if (error == EFBIG) {
1742		struct mbuf *m;
1743
1744		m = m_defrag(*m_headp, M_DONTWAIT);
1745		if (m == NULL) {
1746			adapter->mbuf_alloc_failed++;
1747			m_freem(*m_headp);
1748			*m_headp = NULL;
1749			return (ENOBUFS);
1750		}
1751		*m_headp = m;
1752
1753		/* Try it again */
1754		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1755		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1756
1757		if (error == ENOMEM) {
1758			adapter->no_tx_dma_setup++;
1759			return (error);
1760		} else if (error != 0) {
1761			adapter->no_tx_dma_setup++;
1762			m_freem(*m_headp);
1763			*m_headp = NULL;
1764			return (error);
1765		}
1766	} else if (error == ENOMEM) {
1767		adapter->no_tx_dma_setup++;
1768		return (error);
1769	} else if (error != 0) {
1770		adapter->no_tx_dma_setup++;
1771		m_freem(*m_headp);
1772		*m_headp = NULL;
1773		return (error);
1774	}
1775
1776	/*
1777	 * TSO Hardware workaround, if this packet is not
1778	 * TSO, and is only a single descriptor long, and
1779	 * it follows a TSO burst, then we need to add a
1780	 * sentinel descriptor to prevent premature writeback.
1781	 */
1782	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1783		if (nsegs == 1)
1784			tso_desc = TRUE;
1785		adapter->tx_tso = FALSE;
1786	}
1787
1788        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1789                adapter->no_tx_desc_avail2++;
1790		bus_dmamap_unload(adapter->txtag, map);
1791		return (ENOBUFS);
1792        }
1793	m_head = *m_headp;
1794
1795	/* Do hardware assists */
1796	if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower))
1797		/* we need to make a final sentinel transmit desc */
1798		tso_desc = TRUE;
1799	else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1800		em_transmit_checksum_setup(adapter,  m_head,
1801		    &txd_upper, &txd_lower);
1802
1803	i = adapter->next_avail_tx_desc;
1804	if (adapter->pcix_82544)
1805		txd_saved = i;
1806
1807	/* Set up our transmit descriptors */
1808	for (j = 0; j < nsegs; j++) {
1809		bus_size_t seg_len;
1810		bus_addr_t seg_addr;
1811		/* If adapter is 82544 and on PCIX bus */
1812		if(adapter->pcix_82544) {
1813			DESC_ARRAY	desc_array;
1814			uint32_t	array_elements, counter;
1815			/*
1816			 * Check the Address and Length combination and
1817			 * split the data accordingly
1818			 */
1819			array_elements = em_fill_descriptors(segs[j].ds_addr,
1820			    segs[j].ds_len, &desc_array);
1821			for (counter = 0; counter < array_elements; counter++) {
1822				if (txd_used == adapter->num_tx_desc_avail) {
1823					adapter->next_avail_tx_desc = txd_saved;
1824					adapter->no_tx_desc_avail2++;
1825					bus_dmamap_unload(adapter->txtag, map);
1826					return (ENOBUFS);
1827				}
1828				tx_buffer = &adapter->tx_buffer_area[i];
1829				ctxd = &adapter->tx_desc_base[i];
1830				ctxd->buffer_addr = htole64(
1831				    desc_array.descriptor[counter].address);
1832				ctxd->lower.data = htole32(
1833				    (adapter->txd_cmd | txd_lower | (uint16_t)
1834				    desc_array.descriptor[counter].length));
1835				ctxd->upper.data =
1836				    htole32((txd_upper));
1837				last = i;
1838				if (++i == adapter->num_tx_desc)
1839                                         i = 0;
1840				tx_buffer->m_head = NULL;
1841				tx_buffer->next_eop = -1;
1842				txd_used++;
1843                        }
1844		} else {
1845			tx_buffer = &adapter->tx_buffer_area[i];
1846			ctxd = &adapter->tx_desc_base[i];
1847			seg_addr = segs[j].ds_addr;
1848			seg_len  = segs[j].ds_len;
1849			/*
1850			** TSO Workaround:
1851			** If this is the last descriptor, we want to
1852			** split it so we have a small final sentinel
1853			*/
1854			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1855				seg_len -= 4;
1856				ctxd->buffer_addr = htole64(seg_addr);
1857				ctxd->lower.data = htole32(
1858				adapter->txd_cmd | txd_lower | seg_len);
1859				ctxd->upper.data =
1860				    htole32(txd_upper);
1861				if (++i == adapter->num_tx_desc)
1862					i = 0;
1863				/* Now make the sentinel */
1864				++txd_used; /* using an extra txd */
1865				ctxd = &adapter->tx_desc_base[i];
1866				tx_buffer = &adapter->tx_buffer_area[i];
1867				ctxd->buffer_addr =
1868				    htole64(seg_addr + seg_len);
1869				ctxd->lower.data = htole32(
1870				adapter->txd_cmd | txd_lower | 4);
1871				ctxd->upper.data =
1872				    htole32(txd_upper);
1873				last = i;
1874				if (++i == adapter->num_tx_desc)
1875					i = 0;
1876			} else {
1877				ctxd->buffer_addr = htole64(seg_addr);
1878				ctxd->lower.data = htole32(
1879				adapter->txd_cmd | txd_lower | seg_len);
1880				ctxd->upper.data =
1881				    htole32(txd_upper);
1882				last = i;
1883				if (++i == adapter->num_tx_desc)
1884					i = 0;
1885			}
1886			tx_buffer->m_head = NULL;
1887			tx_buffer->next_eop = -1;
1888		}
1889	}
1890
1891	adapter->next_avail_tx_desc = i;
1892	if (adapter->pcix_82544)
1893		adapter->num_tx_desc_avail -= txd_used;
1894	else {
1895		adapter->num_tx_desc_avail -= nsegs;
1896		if (tso_desc) /* TSO used an extra for sentinel */
1897			adapter->num_tx_desc_avail -= txd_used;
1898	}
1899
1900	if (m_head->m_flags & M_VLANTAG) {
1901		/* Set the vlan id. */
1902		ctxd->upper.fields.special =
1903		    htole16(m_head->m_pkthdr.ether_vtag);
1904                /* Tell hardware to add tag */
1905                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1906        }
1907
1908        tx_buffer->m_head = m_head;
1909	tx_buffer_mapped->map = tx_buffer->map;
1910	tx_buffer->map = map;
1911        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1912
1913        /*
1914         * Last Descriptor of Packet
1915	 * needs End Of Packet (EOP)
1916	 * and Report Status (RS)
1917         */
1918        ctxd->lower.data |=
1919	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1920	/*
1921	 * Keep track in the first buffer which
1922	 * descriptor will be written back
1923	 */
1924	tx_buffer = &adapter->tx_buffer_area[first];
1925	tx_buffer->next_eop = last;
1926
1927	/*
1928	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1929	 * that this frame is available to transmit.
1930	 */
1931	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1932	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1933	if (adapter->hw.mac.type == e1000_82547 &&
1934	    adapter->link_duplex == HALF_DUPLEX)
1935		em_82547_move_tail(adapter);
1936	else {
1937		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1938		if (adapter->hw.mac.type == e1000_82547)
1939			em_82547_update_fifo_head(adapter,
1940			    m_head->m_pkthdr.len);
1941	}
1942
1943	return (0);
1944}
1945
1946/*********************************************************************
1947 *
1948 *  This routine maps the mbufs to Advanced TX descriptors.
1949 *  used by the 82575 adapter. It also needs no workarounds.
1950 *
1951 **********************************************************************/
1952
1953static int
1954em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1955{
1956	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1957	bus_dmamap_t		map;
1958	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1959	union e1000_adv_tx_desc	*txd = NULL;
1960	struct mbuf		*m_head;
1961	u32			olinfo_status = 0, cmd_type_len = 0;
1962	u32			paylen = 0;
1963	int			nsegs, i, j, error, first, last = 0;
1964
1965	m_head = *m_headp;
1966
1967
1968	/* Set basic descriptor constants */
1969	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1970	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1971
1972        /*
1973         * Force a cleanup if number of TX descriptors
1974         * available hits the threshold
1975         */
1976	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1977		em_txeof(adapter);
1978		/* Now do we at least have a minimal? */
1979		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1980			adapter->no_tx_desc_avail1++;
1981			return (ENOBUFS);
1982		}
1983	}
1984
1985	/*
1986         * Map the packet for DMA.
1987	 *
1988	 * Capture the first descriptor index,
1989	 * this descriptor will have the index
1990	 * of the EOP which is the only one that
1991	 * now gets a DONE bit writeback.
1992	 */
1993	first = adapter->next_avail_tx_desc;
1994	tx_buffer = &adapter->tx_buffer_area[first];
1995	tx_buffer_mapped = tx_buffer;
1996	map = tx_buffer->map;
1997
1998	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1999	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2000
2001	if (error == EFBIG) {
2002		struct mbuf *m;
2003
2004		m = m_defrag(*m_headp, M_DONTWAIT);
2005		if (m == NULL) {
2006			adapter->mbuf_alloc_failed++;
2007			m_freem(*m_headp);
2008			*m_headp = NULL;
2009			return (ENOBUFS);
2010		}
2011		*m_headp = m;
2012
2013		/* Try it again */
2014		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2015		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2016
2017		if (error == ENOMEM) {
2018			adapter->no_tx_dma_setup++;
2019			return (error);
2020		} else if (error != 0) {
2021			adapter->no_tx_dma_setup++;
2022			m_freem(*m_headp);
2023			*m_headp = NULL;
2024			return (error);
2025		}
2026	} else if (error == ENOMEM) {
2027		adapter->no_tx_dma_setup++;
2028		return (error);
2029	} else if (error != 0) {
2030		adapter->no_tx_dma_setup++;
2031		m_freem(*m_headp);
2032		*m_headp = NULL;
2033		return (error);
2034	}
2035
2036	/* Check again to be sure we have enough descriptors */
2037        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2038                adapter->no_tx_desc_avail2++;
2039		bus_dmamap_unload(adapter->txtag, map);
2040		return (ENOBUFS);
2041        }
2042	m_head = *m_headp;
2043
2044        /*
2045         * Set up the context descriptor:
2046         * used when any hardware offload is done.
2047	 * This includes CSUM, VLAN, and TSO. It
2048	 * will use the first descriptor.
2049         */
2050	/* First try TSO */
2051	if (em_tso_adv_setup(adapter, m_head, &paylen)) {
2052		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2053		olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2054		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2055		olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2056	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
2057		if (em_tx_adv_ctx_setup(adapter, m_head))
2058			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2059	}
2060
2061	/* Set up our transmit descriptors */
2062	i = adapter->next_avail_tx_desc;
2063	for (j = 0; j < nsegs; j++) {
2064		bus_size_t seg_len;
2065		bus_addr_t seg_addr;
2066
2067		tx_buffer = &adapter->tx_buffer_area[i];
2068		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2069		seg_addr = segs[j].ds_addr;
2070		seg_len  = segs[j].ds_len;
2071
2072		txd->read.buffer_addr = htole64(seg_addr);
2073		txd->read.cmd_type_len = htole32(
2074		    adapter->txd_cmd | cmd_type_len | seg_len);
2075		txd->read.olinfo_status = htole32(olinfo_status);
2076		last = i;
2077		if (++i == adapter->num_tx_desc)
2078			i = 0;
2079		tx_buffer->m_head = NULL;
2080		tx_buffer->next_eop = -1;
2081	}
2082
2083	adapter->next_avail_tx_desc = i;
2084	adapter->num_tx_desc_avail -= nsegs;
2085
2086        tx_buffer->m_head = m_head;
2087	tx_buffer_mapped->map = tx_buffer->map;
2088	tx_buffer->map = map;
2089        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2090
2091        /*
2092         * Last Descriptor of Packet
2093	 * needs End Of Packet (EOP)
2094	 * and Report Status (RS)
2095         */
2096        txd->read.cmd_type_len |=
2097	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2098	/*
2099	 * Keep track in the first buffer which
2100	 * descriptor will be written back
2101	 */
2102	tx_buffer = &adapter->tx_buffer_area[first];
2103	tx_buffer->next_eop = last;
2104
2105	/*
2106	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2107	 * that this frame is available to transmit.
2108	 */
2109	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2110	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2111	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2112
2113	return (0);
2114
2115}
2116
2117/*********************************************************************
2118 *
2119 * 82547 workaround to avoid controller hang in half-duplex environment.
2120 * The workaround is to avoid queuing a large packet that would span
2121 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2122 * in this case. We do that only when FIFO is quiescent.
2123 *
2124 **********************************************************************/
2125static void
2126em_82547_move_tail(void *arg)
2127{
2128	struct adapter *adapter = arg;
2129	uint16_t hw_tdt;
2130	uint16_t sw_tdt;
2131	struct e1000_tx_desc *tx_desc;
2132	uint16_t length = 0;
2133	boolean_t eop = 0;
2134
2135	EM_LOCK_ASSERT(adapter);
2136
2137	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2138	sw_tdt = adapter->next_avail_tx_desc;
2139
2140	while (hw_tdt != sw_tdt) {
2141		tx_desc = &adapter->tx_desc_base[hw_tdt];
2142		length += tx_desc->lower.flags.length;
2143		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2144		if (++hw_tdt == adapter->num_tx_desc)
2145			hw_tdt = 0;
2146
2147		if (eop) {
2148			if (em_82547_fifo_workaround(adapter, length)) {
2149				adapter->tx_fifo_wrk_cnt++;
2150				callout_reset(&adapter->tx_fifo_timer, 1,
2151					em_82547_move_tail, adapter);
2152				break;
2153			}
2154			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2155			em_82547_update_fifo_head(adapter, length);
2156			length = 0;
2157		}
2158	}
2159}
2160
2161static int
2162em_82547_fifo_workaround(struct adapter *adapter, int len)
2163{
2164	int fifo_space, fifo_pkt_len;
2165
2166	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2167
2168	if (adapter->link_duplex == HALF_DUPLEX) {
2169		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2170
2171		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2172			if (em_82547_tx_fifo_reset(adapter))
2173				return (0);
2174			else
2175				return (1);
2176		}
2177	}
2178
2179	return (0);
2180}
2181
2182static void
2183em_82547_update_fifo_head(struct adapter *adapter, int len)
2184{
2185	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2186
2187	/* tx_fifo_head is always 16 byte aligned */
2188	adapter->tx_fifo_head += fifo_pkt_len;
2189	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2190		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2191	}
2192}
2193
2194
2195static int
2196em_82547_tx_fifo_reset(struct adapter *adapter)
2197{
2198	uint32_t tctl;
2199
2200	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2201	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2202	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2203	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2204	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2205	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2206	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2207		/* Disable TX unit */
2208		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2209		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2210		    tctl & ~E1000_TCTL_EN);
2211
2212		/* Reset FIFO pointers */
2213		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2214		    adapter->tx_head_addr);
2215		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2216		    adapter->tx_head_addr);
2217		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2218		    adapter->tx_head_addr);
2219		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2220		    adapter->tx_head_addr);
2221
2222		/* Re-enable TX unit */
2223		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2224		E1000_WRITE_FLUSH(&adapter->hw);
2225
2226		adapter->tx_fifo_head = 0;
2227		adapter->tx_fifo_reset_cnt++;
2228
2229		return (TRUE);
2230	}
2231	else {
2232		return (FALSE);
2233	}
2234}
2235
2236static void
2237em_set_promisc(struct adapter *adapter)
2238{
2239	struct ifnet	*ifp = adapter->ifp;
2240	uint32_t	reg_rctl;
2241
2242	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243
2244	if (ifp->if_flags & IFF_PROMISC) {
2245		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2246		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2247	} else if (ifp->if_flags & IFF_ALLMULTI) {
2248		reg_rctl |= E1000_RCTL_MPE;
2249		reg_rctl &= ~E1000_RCTL_UPE;
2250		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2251	}
2252}
2253
2254static void
2255em_disable_promisc(struct adapter *adapter)
2256{
2257	uint32_t	reg_rctl;
2258
2259	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2260
2261	reg_rctl &=  (~E1000_RCTL_UPE);
2262	reg_rctl &=  (~E1000_RCTL_MPE);
2263	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2264}
2265
2266
2267/*********************************************************************
2268 *  Multicast Update
2269 *
2270 *  This routine is called whenever multicast address list is updated.
2271 *
2272 **********************************************************************/
2273
2274static void
2275em_set_multi(struct adapter *adapter)
2276{
2277	struct ifnet	*ifp = adapter->ifp;
2278	struct ifmultiaddr *ifma;
2279	uint32_t reg_rctl = 0;
2280	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2281	int mcnt = 0;
2282
2283	IOCTL_DEBUGOUT("em_set_multi: begin");
2284
2285	if (adapter->hw.mac.type == e1000_82542 &&
2286	    adapter->hw.revision_id == E1000_REVISION_2) {
2287		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2288		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2289			e1000_pci_clear_mwi(&adapter->hw);
2290		reg_rctl |= E1000_RCTL_RST;
2291		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2292		msec_delay(5);
2293	}
2294
2295	IF_ADDR_LOCK(ifp);
2296	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2297		if (ifma->ifma_addr->sa_family != AF_LINK)
2298			continue;
2299
2300		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2301			break;
2302
2303		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2304		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2305		mcnt++;
2306	}
2307	IF_ADDR_UNLOCK(ifp);
2308
2309	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2310		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2311		reg_rctl |= E1000_RCTL_MPE;
2312		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2313	} else
2314		e1000_mc_addr_list_update(&adapter->hw, mta,
2315		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2316
2317	if (adapter->hw.mac.type == e1000_82542 &&
2318	    adapter->hw.revision_id == E1000_REVISION_2) {
2319		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2320		reg_rctl &= ~E1000_RCTL_RST;
2321		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2322		msec_delay(5);
2323		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2324			e1000_pci_set_mwi(&adapter->hw);
2325	}
2326}
2327
2328
2329/*********************************************************************
2330 *  Timer routine
2331 *
2332 *  This routine checks for link status and updates statistics.
2333 *
2334 **********************************************************************/
2335
2336static void
2337em_local_timer(void *arg)
2338{
2339	struct adapter	*adapter = arg;
2340	struct ifnet	*ifp = adapter->ifp;
2341
2342	EM_LOCK_ASSERT(adapter);
2343
2344	e1000_check_for_link(&adapter->hw);
2345	em_update_link_status(adapter);
2346	em_update_stats_counters(adapter);
2347
2348	/* Check for 82571 LAA reset by other port */
2349	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2350		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2351
2352	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2353		em_print_hw_stats(adapter);
2354
2355	em_smartspeed(adapter);
2356
2357	/*
2358	 * Each second we check the watchdog to
2359	 * protect against hardware hangs.
2360	 */
2361	em_watchdog(adapter);
2362
2363	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2364}
2365
2366static void
2367em_update_link_status(struct adapter *adapter)
2368{
2369	struct ifnet *ifp = adapter->ifp;
2370	device_t dev = adapter->dev;
2371
2372	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2373	    E1000_STATUS_LU) {
2374		if (adapter->link_active == 0) {
2375			e1000_get_speed_and_duplex(&adapter->hw,
2376			    &adapter->link_speed, &adapter->link_duplex);
2377			/* Check if we must disable SPEED_MODE bit on PCI-E */
2378			if ((adapter->link_speed != SPEED_1000) &&
2379			    ((adapter->hw.mac.type == e1000_82571) ||
2380			    (adapter->hw.mac.type == e1000_82572))) {
2381				int tarc0;
2382
2383				tarc0 = E1000_READ_REG(&adapter->hw,
2384				    E1000_TARC0);
2385				tarc0 &= ~SPEED_MODE_BIT;
2386				E1000_WRITE_REG(&adapter->hw,
2387				    E1000_TARC0, tarc0);
2388			}
2389			if (bootverbose)
2390				device_printf(dev, "Link is up %d Mbps %s\n",
2391				    adapter->link_speed,
2392				    ((adapter->link_duplex == FULL_DUPLEX) ?
2393				    "Full Duplex" : "Half Duplex"));
2394			adapter->link_active = 1;
2395			adapter->smartspeed = 0;
2396			ifp->if_baudrate = adapter->link_speed * 1000000;
2397			if_link_state_change(ifp, LINK_STATE_UP);
2398		}
2399	} else {
2400		if (adapter->link_active == 1) {
2401			ifp->if_baudrate = adapter->link_speed = 0;
2402			adapter->link_duplex = 0;
2403			if (bootverbose)
2404				device_printf(dev, "Link is Down\n");
2405			adapter->link_active = 0;
2406			if_link_state_change(ifp, LINK_STATE_DOWN);
2407		}
2408	}
2409}
2410
2411/*********************************************************************
2412 *
2413 *  This routine disables all traffic on the adapter by issuing a
2414 *  global reset on the MAC and deallocates TX/RX buffers.
2415 *
2416 **********************************************************************/
2417
2418static void
2419em_stop(void *arg)
2420{
2421	struct adapter	*adapter = arg;
2422	struct ifnet	*ifp = adapter->ifp;
2423
2424	EM_LOCK_ASSERT(adapter);
2425
2426	INIT_DEBUGOUT("em_stop: begin");
2427
2428	em_disable_intr(adapter);
2429	callout_stop(&adapter->timer);
2430	callout_stop(&adapter->tx_fifo_timer);
2431	em_free_transmit_structures(adapter);
2432	em_free_receive_structures(adapter);
2433
2434	/* Tell the stack that the interface is no longer active */
2435	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2436
2437	e1000_reset_hw(&adapter->hw);
2438	if (adapter->hw.mac.type >= e1000_82544)
2439		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2440}
2441
2442
2443/*********************************************************************
2444 *
2445 *  Determine hardware revision.
2446 *
2447 **********************************************************************/
2448static void
2449em_identify_hardware(struct adapter *adapter)
2450{
2451	device_t dev = adapter->dev;
2452
2453	/* Make sure our PCI config space has the necessary stuff set */
2454	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2455	if ((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
2456	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN)) {
2457		device_printf(dev, "Memory Access and/or Bus Master bits "
2458		    "were not set!\n");
2459		adapter->hw.bus.pci_cmd_word |=
2460		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2461		pci_write_config(dev, PCIR_COMMAND,
2462		    adapter->hw.bus.pci_cmd_word, 2);
2463	}
2464
2465	/* Save off the information about this board */
2466	adapter->hw.vendor_id = pci_get_vendor(dev);
2467	adapter->hw.device_id = pci_get_device(dev);
2468	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2469	adapter->hw.subsystem_vendor_id =
2470	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2471	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2472
2473	/* Do Shared Code Init and Setup */
2474	if (e1000_set_mac_type(&adapter->hw)) {
2475		device_printf(dev, "Setup init failure\n");
2476		return;
2477	}
2478}
2479
2480static int
2481em_allocate_pci_resources(struct adapter *adapter)
2482{
2483	device_t	dev = adapter->dev;
2484	int		val, rid;
2485
2486	rid = PCIR_BAR(0);
2487	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2488	    &rid, RF_ACTIVE);
2489	if (adapter->res_memory == NULL) {
2490		device_printf(dev, "Unable to allocate bus resource: memory\n");
2491		return (ENXIO);
2492	}
2493	adapter->osdep.mem_bus_space_tag =
2494	    rman_get_bustag(adapter->res_memory);
2495	adapter->osdep.mem_bus_space_handle =
2496	    rman_get_bushandle(adapter->res_memory);
2497	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2498
2499	/* Only older adapters use IO mapping */
2500	if ((adapter->hw.mac.type > e1000_82542) &&
2501	    (adapter->hw.mac.type < e1000_82571)) {
2502		/* Figure our where our IO BAR is ? */
2503		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2504			val = pci_read_config(dev, rid, 4);
2505			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2506				adapter->io_rid = rid;
2507				break;
2508			}
2509			rid += 4;
2510			/* check for 64bit BAR */
2511			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2512				rid += 4;
2513		}
2514		if (rid >= PCIR_CIS) {
2515			device_printf(dev, "Unable to locate IO BAR\n");
2516			return (ENXIO);
2517		}
2518		adapter->res_ioport = bus_alloc_resource_any(dev,
2519		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2520		if (adapter->res_ioport == NULL) {
2521			device_printf(dev, "Unable to allocate bus resource: "
2522			    "ioport\n");
2523			return (ENXIO);
2524		}
2525		adapter->hw.io_base = 0;
2526		adapter->osdep.io_bus_space_tag =
2527		    rman_get_bustag(adapter->res_ioport);
2528		adapter->osdep.io_bus_space_handle =
2529		    rman_get_bushandle(adapter->res_ioport);
2530	}
2531
2532	/*
2533	 * Setup MSI/X or MSI if PCI Express
2534	 * only the latest can use MSI/X and
2535	 * real support for it is forthcoming
2536	 */
2537	adapter->msi = 0; /* Set defaults */
2538	rid = 0x0;
2539	if (adapter->hw.mac.type >= e1000_82575) {
2540		/*
2541		 * Setup MSI/X
2542		 */
2543		rid = PCIR_BAR(EM_MSIX_BAR);
2544		adapter->msix_mem = bus_alloc_resource_any(dev,
2545		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2546        	if (!adapter->msix_mem) {
2547                	device_printf(dev,"Unable to map MSIX table \n");
2548                        return (ENXIO);
2549        	}
2550		/*
2551		 * Eventually this may be used
2552		 * for Multiqueue, for now we will
2553		 * just use one vector.
2554		 *
2555        	 * val = pci_msix_count(dev);
2556		 */
2557		val = 1;
2558		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2559                	rid = 1;
2560                	adapter->msi = 1;
2561		}
2562	} else if (adapter->hw.mac.type > e1000_82571) {
2563        	val = pci_msi_count(dev);
2564        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2565                	rid = 1;
2566                	adapter->msi = 1;
2567        	}
2568	}
2569	adapter->res_interrupt = bus_alloc_resource_any(dev,
2570	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2571	if (adapter->res_interrupt == NULL) {
2572		device_printf(dev, "Unable to allocate bus resource: "
2573		    "interrupt\n");
2574		return (ENXIO);
2575	}
2576
2577	adapter->hw.back = &adapter->osdep;
2578
2579	return (0);
2580}
2581
2582/*********************************************************************
2583 *
2584 *  Setup the appropriate Interrupt handlers.
2585 *
2586 **********************************************************************/
2587int
2588em_allocate_intr(struct adapter *adapter)
2589{
2590	device_t dev = adapter->dev;
2591	int error;
2592
2593	/* Manually turn off all interrupts */
2594	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2595
2596#ifdef DEVICE_POLLING
2597	/* We do Legacy setup */
2598	if (adapter->int_handler_tag == NULL &&
2599	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2600	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2601	    &adapter->int_handler_tag)) != 0) {
2602		device_printf(dev, "Failed to register interrupt handler");
2603		return (error);
2604	}
2605
2606#else
2607	/*
2608	 * Try allocating a fast interrupt and the associated deferred
2609	 * processing contexts.
2610	 */
2611	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2612	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2613	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2614	    taskqueue_thread_enqueue, &adapter->tq);
2615	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2616	    device_get_nameunit(adapter->dev));
2617	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2618	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2619	    &adapter->int_handler_tag)) != 0) {
2620		device_printf(dev, "Failed to register fast interrupt "
2621			    "handler: %d\n", error);
2622		taskqueue_free(adapter->tq);
2623		adapter->tq = NULL;
2624		return (error);
2625	}
2626#endif
2627
2628	em_enable_intr(adapter);
2629	return (0);
2630}
2631
2632static void
2633em_free_intr(struct adapter *adapter)
2634{
2635	device_t dev = adapter->dev;
2636
2637	if (adapter->res_interrupt != NULL) {
2638		bus_teardown_intr(dev, adapter->res_interrupt,
2639			adapter->int_handler_tag);
2640		adapter->int_handler_tag = NULL;
2641	}
2642	if (adapter->tq != NULL) {
2643		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2644		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2645		taskqueue_free(adapter->tq);
2646		adapter->tq = NULL;
2647	}
2648}
2649
2650static void
2651em_free_pci_resources(struct adapter *adapter)
2652{
2653	device_t dev = adapter->dev;
2654
2655	if (adapter->res_interrupt != NULL)
2656		bus_release_resource(dev, SYS_RES_IRQ,
2657		    adapter->msi ? 1 : 0, adapter->res_interrupt);
2658
2659	if (adapter->msix_mem != NULL)
2660		bus_release_resource(dev, SYS_RES_MEMORY,
2661		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2662
2663	if (adapter->msi)
2664		pci_release_msi(dev);
2665
2666	if (adapter->res_memory != NULL)
2667		bus_release_resource(dev, SYS_RES_MEMORY,
2668		    PCIR_BAR(0), adapter->res_memory);
2669
2670	if (adapter->flash_mem != NULL)
2671		bus_release_resource(dev, SYS_RES_MEMORY,
2672		    EM_FLASH, adapter->flash_mem);
2673
2674	if (adapter->res_ioport != NULL)
2675		bus_release_resource(dev, SYS_RES_IOPORT,
2676		    adapter->io_rid, adapter->res_ioport);
2677}
2678
2679/*********************************************************************
2680 *
2681 *  Initialize the hardware to a configuration
2682 *  as specified by the adapter structure.
2683 *
2684 **********************************************************************/
2685static int
2686em_hardware_init(struct adapter *adapter)
2687{
2688	device_t dev = adapter->dev;
2689	uint16_t rx_buffer_size;
2690
2691	INIT_DEBUGOUT("em_hardware_init: begin");
2692
2693	/* Issue a global reset */
2694	e1000_reset_hw(&adapter->hw);
2695
2696	/* Get control from any management/hw control */
2697	if (((adapter->hw.mac.type == e1000_82573) ||
2698	    (adapter->hw.mac.type == e1000_ich8lan) ||
2699	    (adapter->hw.mac.type == e1000_ich9lan)) &&
2700	    e1000_check_mng_mode(&adapter->hw))
2701		em_get_hw_control(adapter);
2702
2703	/* When hardware is reset, fifo_head is also reset */
2704	adapter->tx_fifo_head = 0;
2705
2706	/* Set up smart power down as default off on newer adapters. */
2707	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2708	    adapter->hw.mac.type == e1000_82572)) {
2709		uint16_t phy_tmp = 0;
2710
2711		/* Speed up time to link by disabling smart power down. */
2712		e1000_read_phy_reg(&adapter->hw,
2713		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2714		phy_tmp &= ~IGP02E1000_PM_SPD;
2715		e1000_write_phy_reg(&adapter->hw,
2716		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2717	}
2718
2719	/*
2720	 * These parameters control the automatic generation (Tx) and
2721	 * response (Rx) to Ethernet PAUSE frames.
2722	 * - High water mark should allow for at least two frames to be
2723	 *   received after sending an XOFF.
2724	 * - Low water mark works best when it is very near the high water mark.
2725	 *   This allows the receiver to restart by sending XON when it has
2726	 *   drained a bit. Here we use an arbitary value of 1500 which will
2727	 *   restart after one full frame is pulled from the buffer. There
2728	 *   could be several smaller frames in the buffer and if so they will
2729	 *   not trigger the XON until their total number reduces the buffer
2730	 *   by 1500.
2731	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2732	 */
2733	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2734	    0xffff) << 10 );
2735
2736	adapter->hw.mac.fc_high_water = rx_buffer_size -
2737	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2738	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2739	if (adapter->hw.mac.type == e1000_80003es2lan)
2740		adapter->hw.mac.fc_pause_time = 0xFFFF;
2741	else
2742		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2743	adapter->hw.mac.fc_send_xon = TRUE;
2744	adapter->hw.mac.fc = e1000_fc_full;
2745
2746	if (e1000_init_hw(&adapter->hw) < 0) {
2747		device_printf(dev, "Hardware Initialization Failed\n");
2748		return (EIO);
2749	}
2750
2751	e1000_check_for_link(&adapter->hw);
2752
2753	return (0);
2754}
2755
2756/*********************************************************************
2757 *
2758 *  Setup networking device structure and register an interface.
2759 *
2760 **********************************************************************/
2761static void
2762em_setup_interface(device_t dev, struct adapter *adapter)
2763{
2764	struct ifnet   *ifp;
2765
2766	INIT_DEBUGOUT("em_setup_interface: begin");
2767
2768	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2769	if (ifp == NULL)
2770		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2771	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2772	ifp->if_mtu = ETHERMTU;
2773	ifp->if_init =  em_init;
2774	ifp->if_softc = adapter;
2775	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2776	ifp->if_ioctl = em_ioctl;
2777	ifp->if_start = em_start;
2778	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2779	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2780	IFQ_SET_READY(&ifp->if_snd);
2781
2782	ether_ifattach(ifp, adapter->hw.mac.addr);
2783
2784	ifp->if_capabilities = ifp->if_capenable = 0;
2785
2786	if (adapter->hw.mac.type >= e1000_82543) {
2787		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2788		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2789	}
2790
2791	/* Identify TSO capable adapters */
2792	if ((adapter->hw.mac.type > e1000_82544) &&
2793	    (adapter->hw.mac.type != e1000_82547))
2794		ifp->if_capabilities |= IFCAP_TSO4;
2795	/*
2796	 * By default only enable on PCI-E, this
2797	 * can be overriden by ifconfig.
2798	 */
2799	if (adapter->hw.mac.type >= e1000_82571)
2800		ifp->if_capenable |= IFCAP_TSO4;
2801
2802	/*
2803	 * Tell the upper layer(s) we support long frames.
2804	 */
2805	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2806	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2807	ifp->if_capenable |= IFCAP_VLAN_MTU;
2808
2809#ifdef DEVICE_POLLING
2810	ifp->if_capabilities |= IFCAP_POLLING;
2811#endif
2812
2813	/*
2814	 * Specify the media types supported by this adapter and register
2815	 * callbacks to update media and link information
2816	 */
2817	ifmedia_init(&adapter->media, IFM_IMASK,
2818	    em_media_change, em_media_status);
2819	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2820	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2821		u_char fiber_type = IFM_1000_SX;	/* default type */
2822
2823		if (adapter->hw.mac.type == e1000_82545)
2824			fiber_type = IFM_1000_LX;
2825		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2826			    0, NULL);
2827		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2828	} else {
2829		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2830		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2831			    0, NULL);
2832		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2833			    0, NULL);
2834		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2835			    0, NULL);
2836		if (adapter->hw.phy.type != e1000_phy_ife) {
2837			ifmedia_add(&adapter->media,
2838				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2839			ifmedia_add(&adapter->media,
2840				IFM_ETHER | IFM_1000_T, 0, NULL);
2841		}
2842	}
2843	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2844	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2845}
2846
2847
2848/*********************************************************************
2849 *
2850 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2851 *
2852 **********************************************************************/
2853static void
2854em_smartspeed(struct adapter *adapter)
2855{
2856	uint16_t phy_tmp;
2857
2858	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2859	    adapter->hw.mac.autoneg == 0 ||
2860	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2861		return;
2862
2863	if (adapter->smartspeed == 0) {
2864		/* If Master/Slave config fault is asserted twice,
2865		 * we assume back-to-back */
2866		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2867		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2868			return;
2869		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2870		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2871			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2872			if(phy_tmp & CR_1000T_MS_ENABLE) {
2873				phy_tmp &= ~CR_1000T_MS_ENABLE;
2874				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2875				    phy_tmp);
2876				adapter->smartspeed++;
2877				if(adapter->hw.mac.autoneg &&
2878				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2879				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2880				    &phy_tmp)) {
2881					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2882						    MII_CR_RESTART_AUTO_NEG);
2883					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2884					    phy_tmp);
2885				}
2886			}
2887		}
2888		return;
2889	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2890		/* If still no link, perhaps using 2/3 pair cable */
2891		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2892		phy_tmp |= CR_1000T_MS_ENABLE;
2893		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2894		if(adapter->hw.mac.autoneg &&
2895		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2896		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2897			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2898				    MII_CR_RESTART_AUTO_NEG);
2899			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2900		}
2901	}
2902	/* Restart process after EM_SMARTSPEED_MAX iterations */
2903	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2904		adapter->smartspeed = 0;
2905}
2906
2907
2908/*
2909 * Manage DMA'able memory.
2910 */
2911static void
2912em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2913{
2914	if (error)
2915		return;
2916	*(bus_addr_t *) arg = segs[0].ds_addr;
2917}
2918
2919static int
2920em_dma_malloc(struct adapter *adapter, bus_size_t size,
2921        struct em_dma_alloc *dma, int mapflags)
2922{
2923	int error;
2924
2925	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2926				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2927				BUS_SPACE_MAXADDR,	/* lowaddr */
2928				BUS_SPACE_MAXADDR,	/* highaddr */
2929				NULL, NULL,		/* filter, filterarg */
2930				size,			/* maxsize */
2931				1,			/* nsegments */
2932				size,			/* maxsegsize */
2933				0,			/* flags */
2934				NULL,			/* lockfunc */
2935				NULL,			/* lockarg */
2936				&dma->dma_tag);
2937	if (error) {
2938		device_printf(adapter->dev,
2939		    "%s: bus_dma_tag_create failed: %d\n",
2940		    __func__, error);
2941		goto fail_0;
2942	}
2943
2944	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2945	    BUS_DMA_NOWAIT, &dma->dma_map);
2946	if (error) {
2947		device_printf(adapter->dev,
2948		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2949		    __func__, (uintmax_t)size, error);
2950		goto fail_2;
2951	}
2952
2953	dma->dma_paddr = 0;
2954	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2955	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2956	if (error || dma->dma_paddr == 0) {
2957		device_printf(adapter->dev,
2958		    "%s: bus_dmamap_load failed: %d\n",
2959		    __func__, error);
2960		goto fail_3;
2961	}
2962
2963	return (0);
2964
2965fail_3:
2966	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2967fail_2:
2968	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2969	bus_dma_tag_destroy(dma->dma_tag);
2970fail_0:
2971	dma->dma_map = NULL;
2972	dma->dma_tag = NULL;
2973
2974	return (error);
2975}
2976
2977static void
2978em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2979{
2980	if (dma->dma_tag == NULL)
2981		return;
2982	if (dma->dma_map != NULL) {
2983		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2984		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2985		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2986		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2987		dma->dma_map = NULL;
2988	}
2989	bus_dma_tag_destroy(dma->dma_tag);
2990	dma->dma_tag = NULL;
2991}
2992
2993
2994/*********************************************************************
2995 *
2996 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2997 *  the information needed to transmit a packet on the wire.
2998 *
2999 **********************************************************************/
3000static int
3001em_allocate_transmit_structures(struct adapter *adapter)
3002{
3003	device_t dev = adapter->dev;
3004
3005	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
3006	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3007	if (adapter->tx_buffer_area == NULL) {
3008		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3009		return (ENOMEM);
3010	}
3011
3012	bzero(adapter->tx_buffer_area,
3013	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
3014
3015	return (0);
3016}
3017
3018/*********************************************************************
3019 *
3020 *  Initialize transmit structures.
3021 *
3022 **********************************************************************/
3023static int
3024em_setup_transmit_structures(struct adapter *adapter)
3025{
3026	device_t dev = adapter->dev;
3027	struct em_buffer *tx_buffer;
3028	int error, i;
3029
3030	/*
3031	 * Create DMA tags for tx descriptors
3032	 */
3033	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3034				1, 0,			/* alignment, bounds */
3035				BUS_SPACE_MAXADDR,	/* lowaddr */
3036				BUS_SPACE_MAXADDR,	/* highaddr */
3037				NULL, NULL,		/* filter, filterarg */
3038				EM_TSO_SIZE,		/* maxsize */
3039				EM_MAX_SCATTER,		/* nsegments */
3040				EM_TSO_SEG_SIZE,	/* maxsegsize */
3041				0,			/* flags */
3042				NULL,		/* lockfunc */
3043				NULL,		/* lockarg */
3044				&adapter->txtag)) != 0) {
3045		device_printf(dev, "Unable to allocate TX DMA tag\n");
3046		goto fail;
3047	}
3048
3049	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3050		goto fail;
3051
3052	/* Clear the old ring contents */
3053	bzero(adapter->tx_desc_base,
3054	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3055
3056	/* Create the descriptor buffer dma maps */
3057	tx_buffer = adapter->tx_buffer_area;
3058	for (i = 0; i < adapter->num_tx_desc; i++) {
3059		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3060		if (error != 0) {
3061			device_printf(dev, "Unable to create TX DMA map\n");
3062			goto fail;
3063		}
3064		tx_buffer->next_eop = -1;
3065		tx_buffer++;
3066	}
3067
3068	adapter->next_avail_tx_desc = 0;
3069	adapter->next_tx_to_clean = 0;
3070
3071	/* Set number of descriptors available */
3072	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3073
3074	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3075	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3076
3077	return (0);
3078
3079fail:
3080	em_free_transmit_structures(adapter);
3081	return (error);
3082}
3083
3084/*********************************************************************
3085 *
3086 *  Enable transmit unit.
3087 *
3088 **********************************************************************/
3089static void
3090em_initialize_transmit_unit(struct adapter *adapter)
3091{
3092	uint32_t	tctl, tarc, tipg = 0;
3093	uint64_t	bus_addr;
3094
3095	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3096	/* Setup the Base and Length of the Tx Descriptor Ring */
3097	bus_addr = adapter->txdma.dma_paddr;
3098	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3099	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3100	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3101	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3102
3103	/* Setup the HW Tx Head and Tail descriptor pointers */
3104	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3105	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3106
3107	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3108	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3109	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3110
3111	/* Set the default values for the Tx Inter Packet Gap timer */
3112	switch (adapter->hw.mac.type) {
3113	case e1000_82542:
3114		tipg = DEFAULT_82542_TIPG_IPGT;
3115		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3116		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3117		break;
3118	case e1000_80003es2lan:
3119		tipg = DEFAULT_82543_TIPG_IPGR1;
3120		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3121		    E1000_TIPG_IPGR2_SHIFT;
3122		break;
3123	default:
3124		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3125		    (adapter->hw.media_type ==
3126		    e1000_media_type_internal_serdes))
3127			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3128		else
3129			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3130		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3131		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3132	}
3133
3134	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3135	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3136	if(adapter->hw.mac.type >= e1000_82540)
3137		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3138		    adapter->tx_abs_int_delay.value);
3139
3140	if ((adapter->hw.mac.type == e1000_82571) ||
3141	    (adapter->hw.mac.type == e1000_82572)) {
3142		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3143		tarc |= SPEED_MODE_BIT;
3144		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3145	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3146		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3147		tarc |= 1;
3148		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3149		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3150		tarc |= 1;
3151		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3152	}
3153
3154	/* Program the Transmit Control Register */
3155	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3156	tctl &= ~E1000_TCTL_CT;
3157	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3158		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3159
3160	if (adapter->hw.mac.type >= e1000_82571)
3161		tctl |= E1000_TCTL_MULR;
3162
3163	/* This write will effectively turn on the transmit unit. */
3164	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3165
3166	/* Setup Transmit Descriptor Base Settings */
3167	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3168
3169	if ((adapter->tx_int_delay.value > 0) &&
3170	    (adapter->hw.mac.type != e1000_82575))
3171		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3172
3173        /* Set the function pointer for the transmit routine */
3174        if (adapter->hw.mac.type >= e1000_82575)
3175                adapter->em_xmit = em_adv_encap;
3176        else
3177                adapter->em_xmit = em_encap;
3178}
3179
3180/*********************************************************************
3181 *
3182 *  Free all transmit related data structures.
3183 *
3184 **********************************************************************/
3185static void
3186em_free_transmit_structures(struct adapter *adapter)
3187{
3188	struct em_buffer *tx_buffer;
3189	int i;
3190
3191	INIT_DEBUGOUT("free_transmit_structures: begin");
3192
3193	if (adapter->tx_buffer_area != NULL) {
3194		tx_buffer = adapter->tx_buffer_area;
3195		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3196			if (tx_buffer->m_head != NULL) {
3197				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3198				    BUS_DMASYNC_POSTWRITE);
3199				bus_dmamap_unload(adapter->txtag,
3200				    tx_buffer->map);
3201				m_freem(tx_buffer->m_head);
3202				tx_buffer->m_head = NULL;
3203			} else if (tx_buffer->map != NULL)
3204				bus_dmamap_unload(adapter->txtag,
3205				    tx_buffer->map);
3206			if (tx_buffer->map != NULL) {
3207				bus_dmamap_destroy(adapter->txtag,
3208				    tx_buffer->map);
3209				tx_buffer->map = NULL;
3210			}
3211		}
3212	}
3213	if (adapter->tx_buffer_area != NULL) {
3214		free(adapter->tx_buffer_area, M_DEVBUF);
3215		adapter->tx_buffer_area = NULL;
3216	}
3217	if (adapter->txtag != NULL) {
3218		bus_dma_tag_destroy(adapter->txtag);
3219		adapter->txtag = NULL;
3220	}
3221}
3222
3223/*********************************************************************
3224 *
3225 *  The offload context needs to be set when we transfer the first
3226 *  packet of a particular protocol (TCP/UDP). This routine has been
3227 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3228 *
3229 **********************************************************************/
3230static void
3231em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3232    uint32_t *txd_upper, uint32_t *txd_lower)
3233{
3234	struct e1000_context_desc *TXD;
3235	struct em_buffer *tx_buffer;
3236	struct ether_vlan_header *eh;
3237	struct ip *ip;
3238	struct ip6_hdr *ip6;
3239	struct tcp_hdr *th;
3240	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3241	uint32_t cmd = 0;
3242	uint16_t etype;
3243	uint8_t ipproto;
3244
3245	/* Setup checksum offload context. */
3246	curr_txd = adapter->next_avail_tx_desc;
3247	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3248	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3249
3250	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3251		     E1000_TXD_DTYP_D;		/* Data descr */
3252
3253	/*
3254	 * Determine where frame payload starts.
3255	 * Jump over vlan headers if already present,
3256	 * helpful for QinQ too.
3257	 */
3258	eh = mtod(mp, struct ether_vlan_header *);
3259	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3260		etype = ntohs(eh->evl_proto);
3261		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3262	} else {
3263		etype = ntohs(eh->evl_encap_proto);
3264		ehdrlen = ETHER_HDR_LEN;
3265	}
3266
3267	/*
3268	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3269	 * TODO: Support SCTP too when it hits the tree.
3270	 */
3271	switch (etype) {
3272	case ETHERTYPE_IP:
3273		ip = (struct ip *)(mp->m_data + ehdrlen);
3274		ip_hlen = ip->ip_hl << 2;
3275
3276		/* Setup of IP header checksum. */
3277		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3278			/*
3279			 * Start offset for header checksum calculation.
3280			 * End offset for header checksum calculation.
3281			 * Offset of place to put the checksum.
3282			 */
3283			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3284			TXD->lower_setup.ip_fields.ipcse =
3285			    htole16(ehdrlen + ip_hlen);
3286			TXD->lower_setup.ip_fields.ipcso =
3287			    ehdrlen + offsetof(struct ip, ip_sum);
3288			cmd |= E1000_TXD_CMD_IP;
3289			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3290		}
3291
3292		if (mp->m_len < ehdrlen + ip_hlen)
3293			return;	/* failure */
3294
3295		hdr_len = ehdrlen + ip_hlen;
3296		ipproto = ip->ip_p;
3297
3298		break;
3299	case ETHERTYPE_IPV6:
3300		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3301		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3302
3303		if (mp->m_len < ehdrlen + ip_hlen)
3304			return;	/* failure */
3305
3306		/* IPv6 doesn't have a header checksum. */
3307
3308		hdr_len = ehdrlen + ip_hlen;
3309		ipproto = ip6->ip6_nxt;
3310
3311		break;
3312	default:
3313		*txd_upper = 0;
3314		*txd_lower = 0;
3315		return;
3316	}
3317
3318	switch (ipproto) {
3319	case IPPROTO_TCP:
3320		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3321			/*
3322			 * Start offset for payload checksum calculation.
3323			 * End offset for payload checksum calculation.
3324			 * Offset of place to put the checksum.
3325			 */
3326			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3327			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3328			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3329			TXD->upper_setup.tcp_fields.tucso =
3330			    hdr_len + offsetof(struct tcphdr, th_sum);
3331			cmd |= E1000_TXD_CMD_TCP;
3332			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3333		}
3334		break;
3335	case IPPROTO_UDP:
3336		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3337			/*
3338			 * Start offset for header checksum calculation.
3339			 * End offset for header checksum calculation.
3340			 * Offset of place to put the checksum.
3341			 */
3342			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3343			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3344			TXD->upper_setup.tcp_fields.tucso =
3345			    hdr_len + offsetof(struct udphdr, uh_sum);
3346			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3347		}
3348		break;
3349	default:
3350		break;
3351	}
3352
3353	TXD->tcp_seg_setup.data = htole32(0);
3354	TXD->cmd_and_length =
3355	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3356	tx_buffer->m_head = NULL;
3357	tx_buffer->next_eop = -1;
3358
3359	if (++curr_txd == adapter->num_tx_desc)
3360		curr_txd = 0;
3361
3362	adapter->num_tx_desc_avail--;
3363	adapter->next_avail_tx_desc = curr_txd;
3364}
3365
3366/**********************************************************************
3367 *
3368 *  Setup work for hardware segmentation offload (TSO)
3369 *
3370 **********************************************************************/
3371static boolean_t
3372em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3373   uint32_t *txd_lower)
3374{
3375	struct e1000_context_desc *TXD;
3376	struct em_buffer *tx_buffer;
3377	struct ether_vlan_header *eh;
3378	struct ip *ip;
3379	struct ip6_hdr *ip6;
3380	struct tcphdr *th;
3381	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3382	uint16_t etype;
3383
3384	/*
3385	 * XXX: This is not really correct as the stack would not have
3386	 * set up all checksums.
3387	 * XXX: Return FALSE is not sufficient as we may have to return
3388	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3389	 * and 1 (success).
3390	 */
3391	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3392	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3393		return FALSE;
3394
3395	/*
3396	 * This function could/should be extended to support IP/IPv6
3397	 * fragmentation as well.  But as they say, one step at a time.
3398	 */
3399
3400	/*
3401	 * Determine where frame payload starts.
3402	 * Jump over vlan headers if already present,
3403	 * helpful for QinQ too.
3404	 */
3405	eh = mtod(mp, struct ether_vlan_header *);
3406	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3407		etype = ntohs(eh->evl_proto);
3408		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3409	} else {
3410		etype = ntohs(eh->evl_encap_proto);
3411		ehdrlen = ETHER_HDR_LEN;
3412	}
3413
3414	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3415	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3416		return FALSE;	/* -1 */
3417
3418	/*
3419	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3420	 * TODO: Support SCTP too when it hits the tree.
3421	 */
3422	switch (etype) {
3423	case ETHERTYPE_IP:
3424		isip6 = 0;
3425		ip = (struct ip *)(mp->m_data + ehdrlen);
3426		if (ip->ip_p != IPPROTO_TCP)
3427			return FALSE;	/* 0 */
3428		ip->ip_len = 0;
3429		ip->ip_sum = 0;
3430		ip_hlen = ip->ip_hl << 2;
3431		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3432			return FALSE;	/* -1 */
3433		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3434#if 1
3435		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3436		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3437#else
3438		th->th_sum = mp->m_pkthdr.csum_data;
3439#endif
3440		break;
3441	case ETHERTYPE_IPV6:
3442		isip6 = 1;
3443		return FALSE;			/* Not supported yet. */
3444		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3445		if (ip6->ip6_nxt != IPPROTO_TCP)
3446			return FALSE;	/* 0 */
3447		ip6->ip6_plen = 0;
3448		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3449		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3450			return FALSE;	/* -1 */
3451		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3452#if 0
3453		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3454		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3455#else
3456		th->th_sum = mp->m_pkthdr.csum_data;
3457#endif
3458		break;
3459	default:
3460		return FALSE;
3461	}
3462	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3463
3464	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3465		      E1000_TXD_DTYP_D |	/* Data descr type */
3466		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3467
3468	/* IP and/or TCP header checksum calculation and insertion. */
3469	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3470		      E1000_TXD_POPTS_TXSM) << 8;
3471
3472	curr_txd = adapter->next_avail_tx_desc;
3473	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3474	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3475
3476	/* IPv6 doesn't have a header checksum. */
3477	if (!isip6) {
3478		/*
3479		 * Start offset for header checksum calculation.
3480		 * End offset for header checksum calculation.
3481		 * Offset of place put the checksum.
3482		 */
3483		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3484		TXD->lower_setup.ip_fields.ipcse =
3485		    htole16(ehdrlen + ip_hlen - 1);
3486		TXD->lower_setup.ip_fields.ipcso =
3487		    ehdrlen + offsetof(struct ip, ip_sum);
3488	}
3489	/*
3490	 * Start offset for payload checksum calculation.
3491	 * End offset for payload checksum calculation.
3492	 * Offset of place to put the checksum.
3493	 */
3494	TXD->upper_setup.tcp_fields.tucss =
3495	    ehdrlen + ip_hlen;
3496	TXD->upper_setup.tcp_fields.tucse = 0;
3497	TXD->upper_setup.tcp_fields.tucso =
3498	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3499	/*
3500	 * Payload size per packet w/o any headers.
3501	 * Length of all headers up to payload.
3502	 */
3503	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3504	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3505
3506	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3507				E1000_TXD_CMD_DEXT |	/* Extended descr */
3508				E1000_TXD_CMD_TSE |	/* TSE context */
3509				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3510				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3511				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3512
3513	tx_buffer->m_head = NULL;
3514	tx_buffer->next_eop = -1;
3515
3516	if (++curr_txd == adapter->num_tx_desc)
3517		curr_txd = 0;
3518
3519	adapter->num_tx_desc_avail--;
3520	adapter->next_avail_tx_desc = curr_txd;
3521	adapter->tx_tso = TRUE;
3522
3523	return TRUE;
3524}
3525
3526
3527/**********************************************************************
3528 *
3529 *  Setup work for hardware segmentation offload (TSO) on
3530 *  adapters using advanced tx descriptors
3531 *
3532 **********************************************************************/
3533static boolean_t
3534em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3535{
3536	struct e1000_adv_tx_context_desc *TXD;
3537	struct em_buffer        *tx_buffer;
3538	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3539	u32 mss_l4len_idx = 0;
3540	u16 vtag = 0;
3541	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3542	struct ether_vlan_header *eh;
3543	struct ip *ip;
3544	struct tcphdr *th;
3545
3546	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3547	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3548		return FALSE;
3549
3550	/*
3551	 * Determine where frame payload starts.
3552	 * Jump over vlan headers if already present
3553	 */
3554	eh = mtod(mp, struct ether_vlan_header *);
3555	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3556		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3557	else
3558		ehdrlen = ETHER_HDR_LEN;
3559
3560	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3561	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3562		return FALSE;
3563
3564	/* Only supports IPV4 for now */
3565	ctxd = adapter->next_avail_tx_desc;
3566	tx_buffer = &adapter->tx_buffer_area[ctxd];
3567	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3568
3569	ip = (struct ip *)(mp->m_data + ehdrlen);
3570	if (ip->ip_p != IPPROTO_TCP)
3571                return FALSE;   /* 0 */
3572	ip->ip_len = 0;
3573	ip->ip_sum = 0;
3574	ip_hlen = ip->ip_hl << 2;
3575	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3576	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3577	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3578	tcp_hlen = th->th_off << 2;
3579	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3580	/* Calculate payload, this is used in the transmit desc in encap */
3581	*paylen = mp->m_pkthdr.len - hdrlen;
3582
3583	/* VLAN MACLEN IPLEN */
3584	if (mp->m_flags & M_VLANTAG) {
3585		vtag = htole16(mp->m_pkthdr.ether_vtag);
3586		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3587	}
3588	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3589	vlan_macip_lens |= ip_hlen;
3590	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3591
3592	/* ADV DTYPE TUCMD */
3593	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3594	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3595	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3596	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3597
3598	/* MSS L4LEN IDX */
3599	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3600	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3601	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3602
3603	TXD->seqnum_seed = htole32(0);
3604	tx_buffer->m_head = NULL;
3605	tx_buffer->next_eop = -1;
3606
3607	if (++ctxd == adapter->num_tx_desc)
3608		ctxd = 0;
3609
3610	adapter->num_tx_desc_avail--;
3611	adapter->next_avail_tx_desc = ctxd;
3612	return TRUE;
3613}
3614
3615
3616/*********************************************************************
3617 *
3618 *  Advanced Context Descriptor setup for VLAN or CSUM
3619 *
3620 **********************************************************************/
3621
3622static boolean_t
3623em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3624{
3625	struct e1000_adv_tx_context_desc *TXD;
3626	struct em_buffer        *tx_buffer;
3627	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3628	struct ether_vlan_header *eh;
3629	struct ip *ip;
3630	struct ip6_hdr *ip6;
3631	int  ehdrlen, ip_hlen;
3632	u16	etype;
3633	u8	ipproto;
3634
3635	int ctxd = adapter->next_avail_tx_desc;
3636	u16 vtag = 0;
3637
3638	tx_buffer = &adapter->tx_buffer_area[ctxd];
3639	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3640
3641	/*
3642	** In advanced descriptors the vlan tag must
3643	** be placed into the descriptor itself.
3644	*/
3645	if (mp->m_flags & M_VLANTAG) {
3646		vtag = htole16(mp->m_pkthdr.ether_vtag);
3647		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3648	}
3649
3650	/*
3651	 * Determine where frame payload starts.
3652	 * Jump over vlan headers if already present,
3653	 * helpful for QinQ too.
3654	 */
3655	eh = mtod(mp, struct ether_vlan_header *);
3656	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3657		etype = ntohs(eh->evl_proto);
3658		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3659	} else {
3660		etype = ntohs(eh->evl_encap_proto);
3661		ehdrlen = ETHER_HDR_LEN;
3662	}
3663
3664	/* Set the ether header length */
3665	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3666
3667	switch (etype) {
3668		case ETHERTYPE_IP:
3669			ip = (struct ip *)(mp->m_data + ehdrlen);
3670			ip_hlen = ip->ip_hl << 2;
3671			if (mp->m_len < ehdrlen + ip_hlen)
3672				return FALSE; /* failure */
3673			ipproto = ip->ip_p;
3674			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3675			break;
3676		case ETHERTYPE_IPV6:
3677			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3678			ip_hlen = sizeof(struct ip6_hdr);
3679			if (mp->m_len < ehdrlen + ip_hlen)
3680				return FALSE; /* failure */
3681			ipproto = ip6->ip6_nxt;
3682			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3683			break;
3684		default:
3685			return FALSE;
3686	}
3687
3688	vlan_macip_lens |= ip_hlen;
3689	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3690
3691	switch (ipproto) {
3692		case IPPROTO_TCP:
3693			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3694				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3695			break;
3696		case IPPROTO_UDP:
3697			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3698				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3699			break;
3700	}
3701
3702	/* Now copy bits into descriptor */
3703	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3704	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3705	TXD->seqnum_seed = htole32(0);
3706	TXD->mss_l4len_idx = htole32(0);
3707
3708	tx_buffer->m_head = NULL;
3709	tx_buffer->next_eop = -1;
3710
3711	/* We've consumed the first desc, adjust counters */
3712	if (++ctxd == adapter->num_tx_desc)
3713		ctxd = 0;
3714	adapter->next_avail_tx_desc = ctxd;
3715	--adapter->num_tx_desc_avail;
3716
3717        return TRUE;
3718}
3719
3720
3721/**********************************************************************
3722 *
3723 *  Examine each tx_buffer in the used queue. If the hardware is done
3724 *  processing the packet then free associated resources. The
3725 *  tx_buffer is put back on the free queue.
3726 *
3727 **********************************************************************/
3728static void
3729em_txeof(struct adapter *adapter)
3730{
3731        int first, last, done, num_avail;
3732        struct em_buffer *tx_buffer;
3733        struct e1000_tx_desc   *tx_desc, *eop_desc;
3734	struct ifnet   *ifp = adapter->ifp;
3735
3736	EM_LOCK_ASSERT(adapter);
3737
3738        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3739                return;
3740
3741        num_avail = adapter->num_tx_desc_avail;
3742        first = adapter->next_tx_to_clean;
3743        tx_desc = &adapter->tx_desc_base[first];
3744        tx_buffer = &adapter->tx_buffer_area[first];
3745	last = tx_buffer->next_eop;
3746        eop_desc = &adapter->tx_desc_base[last];
3747
3748	/*
3749	 * What this does is get the index of the
3750	 * first descriptor AFTER the EOP of the
3751	 * first packet, that way we can do the
3752	 * simple comparison on the inner while loop.
3753	 */
3754	if (++last == adapter->num_tx_desc)
3755 		last = 0;
3756	done = last;
3757
3758        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3759            BUS_DMASYNC_POSTREAD);
3760
3761        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3762		/* We clean the range of the packet */
3763		while (first != done) {
3764                	tx_desc->upper.data = 0;
3765                	tx_desc->lower.data = 0;
3766                	tx_desc->buffer_addr = 0;
3767                	num_avail++;
3768
3769			if (tx_buffer->m_head) {
3770				ifp->if_opackets++;
3771				bus_dmamap_sync(adapter->txtag,
3772				    tx_buffer->map,
3773				    BUS_DMASYNC_POSTWRITE);
3774				bus_dmamap_unload(adapter->txtag,
3775				    tx_buffer->map);
3776
3777                        	m_freem(tx_buffer->m_head);
3778                        	tx_buffer->m_head = NULL;
3779                	}
3780			tx_buffer->next_eop = -1;
3781
3782	                if (++first == adapter->num_tx_desc)
3783				first = 0;
3784
3785	                tx_buffer = &adapter->tx_buffer_area[first];
3786			tx_desc = &adapter->tx_desc_base[first];
3787		}
3788		/* See if we can continue to the next packet */
3789		last = tx_buffer->next_eop;
3790		if (last != -1) {
3791        		eop_desc = &adapter->tx_desc_base[last];
3792			/* Get new done point */
3793			if (++last == adapter->num_tx_desc) last = 0;
3794			done = last;
3795		} else
3796			break;
3797        }
3798        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3799            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3800
3801        adapter->next_tx_to_clean = first;
3802
3803        /*
3804         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3805         * that it is OK to send packets.
3806         * If there are no pending descriptors, clear the timeout. Otherwise,
3807         * if some descriptors have been freed, restart the timeout.
3808         */
3809        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3810                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3811		/* All clean, turn off the timer */
3812                if (num_avail == adapter->num_tx_desc)
3813			adapter->watchdog_timer = 0;
3814		/* Some cleaned, reset the timer */
3815                else if (num_avail != adapter->num_tx_desc_avail)
3816			adapter->watchdog_timer = EM_TX_TIMEOUT;
3817        }
3818        adapter->num_tx_desc_avail = num_avail;
3819        return;
3820}
3821
3822/*********************************************************************
3823 *
3824 *  Get a buffer from system mbuf buffer pool.
3825 *
3826 **********************************************************************/
3827static int
3828em_get_buf(struct adapter *adapter, int i)
3829{
3830	struct mbuf		*m;
3831	bus_dma_segment_t	segs[1];
3832	bus_dmamap_t		map;
3833	struct em_buffer	*rx_buffer;
3834	int			error, nsegs;
3835
3836	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3837	if (m == NULL) {
3838		adapter->mbuf_cluster_failed++;
3839		return (ENOBUFS);
3840	}
3841	m->m_len = m->m_pkthdr.len = MCLBYTES;
3842
3843	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3844		m_adj(m, ETHER_ALIGN);
3845
3846	/*
3847	 * Using memory from the mbuf cluster pool, invoke the
3848	 * bus_dma machinery to arrange the memory mapping.
3849	 */
3850	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3851	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3852	if (error != 0) {
3853		m_free(m);
3854		return (error);
3855	}
3856
3857	/* If nsegs is wrong then the stack is corrupt. */
3858	KASSERT(nsegs == 1, ("Too many segments returned!"));
3859
3860	rx_buffer = &adapter->rx_buffer_area[i];
3861	if (rx_buffer->m_head != NULL)
3862		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3863
3864	map = rx_buffer->map;
3865	rx_buffer->map = adapter->rx_sparemap;
3866	adapter->rx_sparemap = map;
3867	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3868	rx_buffer->m_head = m;
3869
3870	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3871	return (0);
3872}
3873
3874/*********************************************************************
3875 *
3876 *  Allocate memory for rx_buffer structures. Since we use one
3877 *  rx_buffer per received packet, the maximum number of rx_buffer's
3878 *  that we'll need is equal to the number of receive descriptors
3879 *  that we've allocated.
3880 *
3881 **********************************************************************/
3882static int
3883em_allocate_receive_structures(struct adapter *adapter)
3884{
3885	device_t dev = adapter->dev;
3886	struct em_buffer *rx_buffer;
3887	int i, error;
3888
3889	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3890	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3891	if (adapter->rx_buffer_area == NULL) {
3892		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3893		return (ENOMEM);
3894	}
3895
3896	bzero(adapter->rx_buffer_area,
3897	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3898
3899	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3900				1, 0,			/* alignment, bounds */
3901				BUS_SPACE_MAXADDR,	/* lowaddr */
3902				BUS_SPACE_MAXADDR,	/* highaddr */
3903				NULL, NULL,		/* filter, filterarg */
3904				MCLBYTES,		/* maxsize */
3905				1,			/* nsegments */
3906				MCLBYTES,		/* maxsegsize */
3907				0,			/* flags */
3908				NULL,			/* lockfunc */
3909				NULL,			/* lockarg */
3910				&adapter->rxtag);
3911	if (error) {
3912		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3913		    __func__, error);
3914		goto fail;
3915	}
3916
3917	/* Create the spare map (used by getbuf) */
3918	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3919	     &adapter->rx_sparemap);
3920	if (error) {
3921		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3922		    __func__, error);
3923		goto fail;
3924	}
3925
3926	rx_buffer = adapter->rx_buffer_area;
3927	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3928		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3929		    &rx_buffer->map);
3930		if (error) {
3931			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3932			    __func__, error);
3933			goto fail;
3934		}
3935	}
3936
3937	/* Setup the initial buffers */
3938	for (i = 0; i < adapter->num_rx_desc; i++) {
3939		error = em_get_buf(adapter, i);
3940		if (error)
3941			goto fail;
3942	}
3943	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3944	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3945
3946	return (0);
3947
3948fail:
3949	em_free_receive_structures(adapter);
3950	return (error);
3951}
3952
3953/*********************************************************************
3954 *
3955 *  Allocate and initialize receive structures.
3956 *
3957 **********************************************************************/
3958static int
3959em_setup_receive_structures(struct adapter *adapter)
3960{
3961	int error;
3962
3963	bzero(adapter->rx_desc_base,
3964	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3965
3966	if ((error = em_allocate_receive_structures(adapter)) !=0)
3967		return (error);
3968
3969	/* Setup our descriptor pointers */
3970	adapter->next_rx_desc_to_check = 0;
3971
3972	return (0);
3973}
3974
3975/*********************************************************************
3976 *
3977 *  Enable receive unit.
3978 *
3979 **********************************************************************/
3980static void
3981em_initialize_receive_unit(struct adapter *adapter)
3982{
3983	struct ifnet	*ifp = adapter->ifp;
3984	uint64_t	bus_addr;
3985	uint32_t	reg_rctl;
3986	uint32_t	reg_rxcsum;
3987
3988	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3989
3990	/*
3991	 * Make sure receives are disabled while setting
3992	 * up the descriptor ring
3993	 */
3994	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3995	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
3996
3997	if(adapter->hw.mac.type >= e1000_82540) {
3998		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3999		    adapter->rx_abs_int_delay.value);
4000		/*
4001		 * Set the interrupt throttling rate. Value is calculated
4002		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4003		 */
4004#define MAX_INTS_PER_SEC	8000
4005#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4006		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
4007	}
4008
4009	/* Setup the Base and Length of the Rx Descriptor Ring */
4010	bus_addr = adapter->rxdma.dma_paddr;
4011	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
4012			sizeof(struct e1000_rx_desc));
4013	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
4014	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
4015
4016	/* Setup the Receive Control Register */
4017	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4018	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4019		   E1000_RCTL_RDMTS_HALF |
4020		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4021
4022	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
4023		reg_rctl |= E1000_RCTL_SBP;
4024	else
4025		reg_rctl &= ~E1000_RCTL_SBP;
4026
4027	switch (adapter->rx_buffer_len) {
4028	default:
4029	case 2048:
4030		reg_rctl |= E1000_RCTL_SZ_2048;
4031		break;
4032	case 4096:
4033		reg_rctl |= E1000_RCTL_SZ_4096 |
4034		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4035		break;
4036	case 8192:
4037		reg_rctl |= E1000_RCTL_SZ_8192 |
4038		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4039		break;
4040	case 16384:
4041		reg_rctl |= E1000_RCTL_SZ_16384 |
4042		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4043		break;
4044	}
4045
4046	if (ifp->if_mtu > ETHERMTU)
4047		reg_rctl |= E1000_RCTL_LPE;
4048	else
4049		reg_rctl &= ~E1000_RCTL_LPE;
4050
4051	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4052	if ((adapter->hw.mac.type >= e1000_82543) &&
4053	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4054		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4055		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4056		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4057	}
4058
4059	/*
4060	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4061	** long latencies are observed, like Lenovo X60. This
4062	** change eliminates the problem, but since having positive
4063	** values in RDTR is a known source of problems on other
4064	** platforms another solution is being sought.
4065	*/
4066	if (adapter->hw.mac.type == e1000_82573)
4067		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4068
4069	/* Enable Receives */
4070	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4071
4072	/*
4073	 * Setup the HW Rx Head and
4074	 * Tail Descriptor Pointers
4075	 */
4076	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4077	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4078
4079	return;
4080}
4081
4082/*********************************************************************
4083 *
4084 *  Free receive related data structures.
4085 *
4086 **********************************************************************/
4087static void
4088em_free_receive_structures(struct adapter *adapter)
4089{
4090	struct em_buffer *rx_buffer;
4091	int i;
4092
4093	INIT_DEBUGOUT("free_receive_structures: begin");
4094
4095	if (adapter->rx_sparemap) {
4096		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4097		adapter->rx_sparemap = NULL;
4098	}
4099
4100	/* Cleanup any existing buffers */
4101	if (adapter->rx_buffer_area != NULL) {
4102		rx_buffer = adapter->rx_buffer_area;
4103		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4104			if (rx_buffer->m_head != NULL) {
4105				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4106				    BUS_DMASYNC_POSTREAD);
4107				bus_dmamap_unload(adapter->rxtag,
4108				    rx_buffer->map);
4109				m_freem(rx_buffer->m_head);
4110				rx_buffer->m_head = NULL;
4111			} else if (rx_buffer->map != NULL)
4112				bus_dmamap_unload(adapter->rxtag,
4113				    rx_buffer->map);
4114			if (rx_buffer->map != NULL) {
4115				bus_dmamap_destroy(adapter->rxtag,
4116				    rx_buffer->map);
4117				rx_buffer->map = NULL;
4118			}
4119		}
4120	}
4121
4122	if (adapter->rx_buffer_area != NULL) {
4123		free(adapter->rx_buffer_area, M_DEVBUF);
4124		adapter->rx_buffer_area = NULL;
4125	}
4126
4127	if (adapter->rxtag != NULL) {
4128		bus_dma_tag_destroy(adapter->rxtag);
4129		adapter->rxtag = NULL;
4130	}
4131}
4132
4133/*********************************************************************
4134 *
4135 *  This routine executes in interrupt context. It replenishes
4136 *  the mbufs in the descriptor and sends data which has been
4137 *  dma'ed into host memory to upper layer.
4138 *
4139 *  We loop at most count times if count is > 0, or until done if
4140 *  count < 0.
4141 *
4142 *********************************************************************/
4143static int
4144em_rxeof(struct adapter *adapter, int count)
4145{
4146	struct ifnet	*ifp;
4147	struct mbuf	*mp;
4148	uint8_t		accept_frame = 0;
4149	uint8_t		eop = 0;
4150	uint16_t 	len, desc_len, prev_len_adj;
4151	int		i;
4152
4153	/* Pointer to the receive descriptor being examined. */
4154	struct e1000_rx_desc   *current_desc;
4155	uint8_t		status;
4156
4157	ifp = adapter->ifp;
4158	i = adapter->next_rx_desc_to_check;
4159	current_desc = &adapter->rx_desc_base[i];
4160	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4161	    BUS_DMASYNC_POSTREAD);
4162
4163	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4164		return (0);
4165
4166	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4167	    (count != 0) &&
4168	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4169		struct mbuf *m = NULL;
4170
4171		mp = adapter->rx_buffer_area[i].m_head;
4172		/*
4173		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4174		 * needs to access the last received byte in the mbuf.
4175		 */
4176		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4177		    BUS_DMASYNC_POSTREAD);
4178
4179		accept_frame = 1;
4180		prev_len_adj = 0;
4181		desc_len = le16toh(current_desc->length);
4182		status = current_desc->status;
4183		if (status & E1000_RXD_STAT_EOP) {
4184			count--;
4185			eop = 1;
4186			if (desc_len < ETHER_CRC_LEN) {
4187				len = 0;
4188				prev_len_adj = ETHER_CRC_LEN - desc_len;
4189			} else
4190				len = desc_len - ETHER_CRC_LEN;
4191		} else {
4192			eop = 0;
4193			len = desc_len;
4194		}
4195
4196		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4197			uint8_t		last_byte;
4198			uint32_t	pkt_len = desc_len;
4199
4200			if (adapter->fmp != NULL)
4201				pkt_len += adapter->fmp->m_pkthdr.len;
4202
4203			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4204			if (TBI_ACCEPT(&adapter->hw, status,
4205			    current_desc->errors, pkt_len, last_byte)) {
4206				e1000_tbi_adjust_stats_82543(&adapter->hw,
4207				    &adapter->stats, pkt_len,
4208				    adapter->hw.mac.addr);
4209				if (len > 0)
4210					len--;
4211			} else
4212				accept_frame = 0;
4213		}
4214
4215		if (accept_frame) {
4216			if (em_get_buf(adapter, i) != 0) {
4217				ifp->if_iqdrops++;
4218				goto discard;
4219			}
4220
4221			/* Assign correct length to the current fragment */
4222			mp->m_len = len;
4223
4224			if (adapter->fmp == NULL) {
4225				mp->m_pkthdr.len = len;
4226				adapter->fmp = mp; /* Store the first mbuf */
4227				adapter->lmp = mp;
4228			} else {
4229				/* Chain mbuf's together */
4230				mp->m_flags &= ~M_PKTHDR;
4231				/*
4232				 * Adjust length of previous mbuf in chain if
4233				 * we received less than 4 bytes in the last
4234				 * descriptor.
4235				 */
4236				if (prev_len_adj > 0) {
4237					adapter->lmp->m_len -= prev_len_adj;
4238					adapter->fmp->m_pkthdr.len -=
4239					    prev_len_adj;
4240				}
4241				adapter->lmp->m_next = mp;
4242				adapter->lmp = adapter->lmp->m_next;
4243				adapter->fmp->m_pkthdr.len += len;
4244			}
4245
4246			if (eop) {
4247				adapter->fmp->m_pkthdr.rcvif = ifp;
4248				ifp->if_ipackets++;
4249				em_receive_checksum(adapter, current_desc,
4250				    adapter->fmp);
4251#ifndef __NO_STRICT_ALIGNMENT
4252				if (adapter->hw.mac.max_frame_size >
4253				    (MCLBYTES - ETHER_ALIGN) &&
4254				    em_fixup_rx(adapter) != 0)
4255					goto skip;
4256#endif
4257				if (status & E1000_RXD_STAT_VP) {
4258					adapter->fmp->m_pkthdr.ether_vtag =
4259					    (le16toh(current_desc->special) &
4260					    E1000_RXD_SPC_VLAN_MASK);
4261					adapter->fmp->m_flags |= M_VLANTAG;
4262				}
4263#ifndef __NO_STRICT_ALIGNMENT
4264skip:
4265#endif
4266				m = adapter->fmp;
4267				adapter->fmp = NULL;
4268				adapter->lmp = NULL;
4269			}
4270		} else {
4271			ifp->if_ierrors++;
4272discard:
4273			/* Reuse loaded DMA map and just update mbuf chain */
4274			mp = adapter->rx_buffer_area[i].m_head;
4275			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4276			mp->m_data = mp->m_ext.ext_buf;
4277			mp->m_next = NULL;
4278			if (adapter->hw.mac.max_frame_size <=
4279			    (MCLBYTES - ETHER_ALIGN))
4280				m_adj(mp, ETHER_ALIGN);
4281			if (adapter->fmp != NULL) {
4282				m_freem(adapter->fmp);
4283				adapter->fmp = NULL;
4284				adapter->lmp = NULL;
4285			}
4286			m = NULL;
4287		}
4288
4289		/* Zero out the receive descriptors status. */
4290		current_desc->status = 0;
4291		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4292		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4293
4294		/* Advance our pointers to the next descriptor. */
4295		if (++i == adapter->num_rx_desc)
4296			i = 0;
4297		if (m != NULL) {
4298			adapter->next_rx_desc_to_check = i;
4299#ifdef DEVICE_POLLING
4300			EM_UNLOCK(adapter);
4301			(*ifp->if_input)(ifp, m);
4302			EM_LOCK(adapter);
4303#else
4304			/* Already running unlocked */
4305			(*ifp->if_input)(ifp, m);
4306#endif
4307			i = adapter->next_rx_desc_to_check;
4308		}
4309		current_desc = &adapter->rx_desc_base[i];
4310	}
4311	adapter->next_rx_desc_to_check = i;
4312
4313	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4314	if (--i < 0)
4315		i = adapter->num_rx_desc - 1;
4316	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4317	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4318		return (0);
4319
4320	return (1);
4321}
4322
4323#ifndef __NO_STRICT_ALIGNMENT
4324/*
4325 * When jumbo frames are enabled we should realign entire payload on
4326 * architecures with strict alignment. This is serious design mistake of 8254x
4327 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4328 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4329 * payload. On architecures without strict alignment restrictions 8254x still
4330 * performs unaligned memory access which would reduce the performance too.
4331 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4332 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4333 * existing mbuf chain.
4334 *
4335 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4336 * not used at all on architectures with strict alignment.
4337 */
4338static int
4339em_fixup_rx(struct adapter *adapter)
4340{
4341	struct mbuf *m, *n;
4342	int error;
4343
4344	error = 0;
4345	m = adapter->fmp;
4346	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4347		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4348		m->m_data += ETHER_HDR_LEN;
4349	} else {
4350		MGETHDR(n, M_DONTWAIT, MT_DATA);
4351		if (n != NULL) {
4352			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4353			m->m_data += ETHER_HDR_LEN;
4354			m->m_len -= ETHER_HDR_LEN;
4355			n->m_len = ETHER_HDR_LEN;
4356			M_MOVE_PKTHDR(n, m);
4357			n->m_next = m;
4358			adapter->fmp = n;
4359		} else {
4360			adapter->dropped_pkts++;
4361			m_freem(adapter->fmp);
4362			adapter->fmp = NULL;
4363			error = ENOMEM;
4364		}
4365	}
4366
4367	return (error);
4368}
4369#endif
4370
4371/*********************************************************************
4372 *
4373 *  Verify that the hardware indicated that the checksum is valid.
4374 *  Inform the stack about the status of checksum so that stack
4375 *  doesn't spend time verifying the checksum.
4376 *
4377 *********************************************************************/
4378static void
4379em_receive_checksum(struct adapter *adapter,
4380	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4381{
4382	/* 82543 or newer only */
4383	if ((adapter->hw.mac.type < e1000_82543) ||
4384	    /* Ignore Checksum bit is set */
4385	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4386		mp->m_pkthdr.csum_flags = 0;
4387		return;
4388	}
4389
4390	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4391		/* Did it pass? */
4392		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4393			/* IP Checksum Good */
4394			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4395			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4396
4397		} else {
4398			mp->m_pkthdr.csum_flags = 0;
4399		}
4400	}
4401
4402	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4403		/* Did it pass? */
4404		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4405			mp->m_pkthdr.csum_flags |=
4406			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4407			mp->m_pkthdr.csum_data = htons(0xffff);
4408		}
4409	}
4410}
4411
4412
4413static void
4414em_enable_vlans(struct adapter *adapter)
4415{
4416	uint32_t ctrl;
4417
4418	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4419
4420	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4421	ctrl |= E1000_CTRL_VME;
4422	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4423}
4424
4425static void
4426em_enable_intr(struct adapter *adapter)
4427{
4428	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4429	    (IMS_ENABLE_MASK));
4430}
4431
4432static void
4433em_disable_intr(struct adapter *adapter)
4434{
4435	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4436}
4437
4438/*
4439 * Bit of a misnomer, what this really means is
4440 * to enable OS management of the system... aka
4441 * to disable special hardware management features
4442 */
4443static void
4444em_init_manageability(struct adapter *adapter)
4445{
4446	/* A shared code workaround */
4447#define E1000_82542_MANC2H E1000_MANC2H
4448	if (adapter->has_manage) {
4449		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4450		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4451
4452		/* disable hardware interception of ARP */
4453		manc &= ~(E1000_MANC_ARP_EN);
4454
4455                /* enable receiving management packets to the host */
4456                if (adapter->hw.mac.type >= e1000_82571) {
4457			manc |= E1000_MANC_EN_MNG2HOST;
4458#define E1000_MNG2HOST_PORT_623 (1 << 5)
4459#define E1000_MNG2HOST_PORT_664 (1 << 6)
4460			manc2h |= E1000_MNG2HOST_PORT_623;
4461			manc2h |= E1000_MNG2HOST_PORT_664;
4462			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4463		}
4464
4465		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4466	}
4467}
4468
4469/*
4470 * Give control back to hardware management
4471 * controller if there is one.
4472 */
4473static void
4474em_release_manageability(struct adapter *adapter)
4475{
4476	if (adapter->has_manage) {
4477		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4478
4479		/* re-enable hardware interception of ARP */
4480		manc |= E1000_MANC_ARP_EN;
4481
4482		if (adapter->hw.mac.type >= e1000_82571)
4483			manc &= ~E1000_MANC_EN_MNG2HOST;
4484
4485		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4486	}
4487}
4488
4489/*
4490 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4491 * For ASF and Pass Through versions of f/w this means that
4492 * the driver is loaded. For AMT version (only with 82573)
4493 * of the f/w this means that the network i/f is open.
4494 *
4495 */
4496static void
4497em_get_hw_control(struct adapter *adapter)
4498{
4499	u32 ctrl_ext, swsm;
4500
4501	/* Let firmware know the driver has taken over */
4502	switch (adapter->hw.mac.type) {
4503	case e1000_82573:
4504		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4505		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4506		    swsm | E1000_SWSM_DRV_LOAD);
4507		break;
4508	case e1000_82571:
4509	case e1000_82572:
4510	case e1000_80003es2lan:
4511	case e1000_ich8lan:
4512	case e1000_ich9lan:
4513		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4514		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4515		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4516		break;
4517	default:
4518		break;
4519	}
4520}
4521
4522/*
4523 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4524 * For ASF and Pass Through versions of f/w this means that the
4525 * driver is no longer loaded. For AMT version (only with 82573) i
4526 * of the f/w this means that the network i/f is closed.
4527 *
4528 */
4529static void
4530em_release_hw_control(struct adapter *adapter)
4531{
4532	u32 ctrl_ext, swsm;
4533
4534	/* Let firmware taken over control of h/w */
4535	switch (adapter->hw.mac.type) {
4536	case e1000_82573:
4537		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4538		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4539		    swsm & ~E1000_SWSM_DRV_LOAD);
4540		break;
4541	case e1000_82571:
4542	case e1000_82572:
4543	case e1000_80003es2lan:
4544	case e1000_ich8lan:
4545	case e1000_ich9lan:
4546		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4547		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4548		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4549		break;
4550	default:
4551		break;
4552
4553	}
4554}
4555
4556static int
4557em_is_valid_ether_addr(uint8_t *addr)
4558{
4559	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4560
4561	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4562		return (FALSE);
4563	}
4564
4565	return (TRUE);
4566}
4567
4568/*
4569 * NOTE: the following routines using the e1000
4570 * 	naming style are provided to the shared
4571 *	code which expects that rather than 'em'
4572 */
4573
4574void
4575e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4576{
4577	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4578}
4579
4580void
4581e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4582{
4583	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4584}
4585
4586void
4587e1000_pci_set_mwi(struct e1000_hw *hw)
4588{
4589	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4590	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4591}
4592
4593void
4594e1000_pci_clear_mwi(struct e1000_hw *hw)
4595{
4596	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4597	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4598}
4599
4600/*
4601 * Read the PCI Express capabilities
4602 */
4603int32_t
4604e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4605{
4606	int32_t		error = E1000_SUCCESS;
4607	uint16_t	cap_off;
4608
4609	switch (hw->mac.type) {
4610
4611		case e1000_82571:
4612		case e1000_82572:
4613		case e1000_82573:
4614		case e1000_80003es2lan:
4615			cap_off = 0xE0;
4616			e1000_read_pci_cfg(hw, cap_off + reg, value);
4617			break;
4618		default:
4619			error = ~E1000_NOT_IMPLEMENTED;
4620			break;
4621	}
4622
4623	return (error);
4624}
4625
4626int32_t
4627e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4628{
4629	int32_t error = 0;
4630
4631	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4632	if (hw->dev_spec == NULL)
4633		error = ENOMEM;
4634
4635	return (error);
4636}
4637
4638void
4639e1000_free_dev_spec_struct(struct e1000_hw *hw)
4640{
4641	if (hw->dev_spec != NULL)
4642		free(hw->dev_spec, M_DEVBUF);
4643	return;
4644}
4645
4646/*
4647 * Enable PCI Wake On Lan capability
4648 */
4649void
4650em_enable_wakeup(device_t dev)
4651{
4652	u16     cap, status;
4653	u8      id;
4654
4655	/* First find the capabilities pointer*/
4656	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4657	/* Read the PM Capabilities */
4658	id = pci_read_config(dev, cap, 1);
4659	if (id != PCIY_PMG)     /* Something wrong */
4660		return;
4661	/* OK, we have the power capabilities, so
4662	   now get the status register */
4663	cap += PCIR_POWER_STATUS;
4664	status = pci_read_config(dev, cap, 2);
4665	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4666	pci_write_config(dev, cap, status, 2);
4667	return;
4668}
4669
4670
4671/*********************************************************************
4672* 82544 Coexistence issue workaround.
4673*    There are 2 issues.
4674*       1. Transmit Hang issue.
4675*    To detect this issue, following equation can be used...
4676*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4677*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4678*
4679*       2. DAC issue.
4680*    To detect this issue, following equation can be used...
4681*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4682*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4683*
4684*
4685*    WORKAROUND:
4686*	  Make sure we do not have ending address
4687*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4688*
4689*************************************************************************/
4690static uint32_t
4691em_fill_descriptors (bus_addr_t address, uint32_t length,
4692		PDESC_ARRAY desc_array)
4693{
4694	/* Since issue is sensitive to length and address.*/
4695	/* Let us first check the address...*/
4696	uint32_t safe_terminator;
4697	if (length <= 4) {
4698		desc_array->descriptor[0].address = address;
4699		desc_array->descriptor[0].length = length;
4700		desc_array->elements = 1;
4701		return (desc_array->elements);
4702	}
4703	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4704	    (length & 0xF)) & 0xF);
4705	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4706	if (safe_terminator == 0   ||
4707	(safe_terminator > 4   &&
4708	safe_terminator < 9)   ||
4709	(safe_terminator > 0xC &&
4710	safe_terminator <= 0xF)) {
4711		desc_array->descriptor[0].address = address;
4712		desc_array->descriptor[0].length = length;
4713		desc_array->elements = 1;
4714		return (desc_array->elements);
4715	}
4716
4717	desc_array->descriptor[0].address = address;
4718	desc_array->descriptor[0].length = length - 4;
4719	desc_array->descriptor[1].address = address + (length - 4);
4720	desc_array->descriptor[1].length = 4;
4721	desc_array->elements = 2;
4722	return (desc_array->elements);
4723}
4724
4725/**********************************************************************
4726 *
4727 *  Update the board statistics counters.
4728 *
4729 **********************************************************************/
4730static void
4731em_update_stats_counters(struct adapter *adapter)
4732{
4733	struct ifnet   *ifp;
4734
4735	if(adapter->hw.media_type == e1000_media_type_copper ||
4736	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4737		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4738		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4739	}
4740	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4741	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4742	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4743	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4744
4745	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4746	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4747	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4748	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4749	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4750	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4751	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4752	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4753	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4754	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4755	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4756	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4757	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4758	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4759	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4760	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4761	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4762	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4763	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4764	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4765
4766	/* For the 64-bit byte counters the low dword must be read first. */
4767	/* Both registers clear on the read of the high dword */
4768
4769	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4770	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4771	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4772	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4773
4774	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4775	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4776	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4777	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4778	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4779
4780	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4781	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4782	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4783	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4784
4785	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4786	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4787	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4788	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4789	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4790	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4791	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4792	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4793	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4794	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4795
4796	if (adapter->hw.mac.type >= e1000_82543) {
4797		adapter->stats.algnerrc +=
4798		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4799		adapter->stats.rxerrc +=
4800		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4801		adapter->stats.tncrs +=
4802		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4803		adapter->stats.cexterr +=
4804		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4805		adapter->stats.tsctc +=
4806		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4807		adapter->stats.tsctfc +=
4808		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4809	}
4810	ifp = adapter->ifp;
4811
4812	ifp->if_collisions = adapter->stats.colc;
4813
4814	/* Rx Errors */
4815	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4816	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4817	    adapter->stats.ruc + adapter->stats.roc +
4818	    adapter->stats.mpc + adapter->stats.cexterr;
4819
4820	/* Tx Errors */
4821	ifp->if_oerrors = adapter->stats.ecol +
4822	    adapter->stats.latecol + adapter->watchdog_events;
4823}
4824
4825
4826/**********************************************************************
4827 *
4828 *  This routine is called only when em_display_debug_stats is enabled.
4829 *  This routine provides a way to take a look at important statistics
4830 *  maintained by the driver and hardware.
4831 *
4832 **********************************************************************/
4833static void
4834em_print_debug_info(struct adapter *adapter)
4835{
4836	device_t dev = adapter->dev;
4837	uint8_t *hw_addr = adapter->hw.hw_addr;
4838
4839	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4840	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4841	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4842	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4843	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4844	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4845	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4846	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4847	    adapter->hw.mac.fc_high_water,
4848	    adapter->hw.mac.fc_low_water);
4849	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4850	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4851	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4852	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4853	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4854	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4855	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4856	    (long long)adapter->tx_fifo_wrk_cnt,
4857	    (long long)adapter->tx_fifo_reset_cnt);
4858	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4859	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4860	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4861	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4862	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4863	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4864	device_printf(dev, "Num Tx descriptors avail = %d\n",
4865	    adapter->num_tx_desc_avail);
4866	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4867	    adapter->no_tx_desc_avail1);
4868	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4869	    adapter->no_tx_desc_avail2);
4870	device_printf(dev, "Std mbuf failed = %ld\n",
4871	    adapter->mbuf_alloc_failed);
4872	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4873	    adapter->mbuf_cluster_failed);
4874	device_printf(dev, "Driver dropped packets = %ld\n",
4875	    adapter->dropped_pkts);
4876	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4877		adapter->no_tx_dma_setup);
4878}
4879
4880static void
4881em_print_hw_stats(struct adapter *adapter)
4882{
4883	device_t dev = adapter->dev;
4884
4885	device_printf(dev, "Excessive collisions = %lld\n",
4886	    (long long)adapter->stats.ecol);
4887#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4888	device_printf(dev, "Symbol errors = %lld\n",
4889	    (long long)adapter->stats.symerrs);
4890#endif
4891	device_printf(dev, "Sequence errors = %lld\n",
4892	    (long long)adapter->stats.sec);
4893	device_printf(dev, "Defer count = %lld\n",
4894	    (long long)adapter->stats.dc);
4895	device_printf(dev, "Missed Packets = %lld\n",
4896	    (long long)adapter->stats.mpc);
4897	device_printf(dev, "Receive No Buffers = %lld\n",
4898	    (long long)adapter->stats.rnbc);
4899	/* RLEC is inaccurate on some hardware, calculate our own. */
4900	device_printf(dev, "Receive Length Errors = %lld\n",
4901	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4902	device_printf(dev, "Receive errors = %lld\n",
4903	    (long long)adapter->stats.rxerrc);
4904	device_printf(dev, "Crc errors = %lld\n",
4905	    (long long)adapter->stats.crcerrs);
4906	device_printf(dev, "Alignment errors = %lld\n",
4907	    (long long)adapter->stats.algnerrc);
4908	device_printf(dev, "Carrier extension errors = %lld\n",
4909	    (long long)adapter->stats.cexterr);
4910	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4911	device_printf(dev, "watchdog timeouts = %ld\n",
4912	    adapter->watchdog_events);
4913	device_printf(dev, "XON Rcvd = %lld\n",
4914	    (long long)adapter->stats.xonrxc);
4915	device_printf(dev, "XON Xmtd = %lld\n",
4916	    (long long)adapter->stats.xontxc);
4917	device_printf(dev, "XOFF Rcvd = %lld\n",
4918	    (long long)adapter->stats.xoffrxc);
4919	device_printf(dev, "XOFF Xmtd = %lld\n",
4920	    (long long)adapter->stats.xofftxc);
4921	device_printf(dev, "Good Packets Rcvd = %lld\n",
4922	    (long long)adapter->stats.gprc);
4923	device_printf(dev, "Good Packets Xmtd = %lld\n",
4924	    (long long)adapter->stats.gptc);
4925	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4926	    (long long)adapter->stats.tsctc);
4927	device_printf(dev, "TSO Contexts Failed = %lld\n",
4928	    (long long)adapter->stats.tsctfc);
4929}
4930
4931static int
4932em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4933{
4934	struct adapter *adapter;
4935	int error;
4936	int result;
4937
4938	result = -1;
4939	error = sysctl_handle_int(oidp, &result, 0, req);
4940
4941	if (error || !req->newptr)
4942		return (error);
4943
4944	if (result == 1) {
4945		adapter = (struct adapter *)arg1;
4946		em_print_debug_info(adapter);
4947	}
4948
4949	return (error);
4950}
4951
4952
4953static int
4954em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4955{
4956	struct adapter *adapter;
4957	int error;
4958	int result;
4959
4960	result = -1;
4961	error = sysctl_handle_int(oidp, &result, 0, req);
4962
4963	if (error || !req->newptr)
4964		return (error);
4965
4966	if (result == 1) {
4967		adapter = (struct adapter *)arg1;
4968		em_print_hw_stats(adapter);
4969	}
4970
4971	return (error);
4972}
4973
4974static int
4975em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4976{
4977	struct em_int_delay_info *info;
4978	struct adapter *adapter;
4979	uint32_t regval;
4980	int error;
4981	int usecs;
4982	int ticks;
4983
4984	info = (struct em_int_delay_info *)arg1;
4985	usecs = info->value;
4986	error = sysctl_handle_int(oidp, &usecs, 0, req);
4987	if (error != 0 || req->newptr == NULL)
4988		return (error);
4989	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4990		return (EINVAL);
4991	info->value = usecs;
4992	ticks = EM_USECS_TO_TICKS(usecs);
4993
4994	adapter = info->adapter;
4995
4996	EM_LOCK(adapter);
4997	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4998	regval = (regval & ~0xffff) | (ticks & 0xffff);
4999	/* Handle a few special cases. */
5000	switch (info->offset) {
5001	case E1000_RDTR:
5002		break;
5003	case E1000_TIDV:
5004		if (ticks == 0) {
5005			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5006			/* Don't write 0 into the TIDV register. */
5007			regval++;
5008		} else
5009			if (adapter->hw.mac.type != e1000_82575)
5010				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5011		break;
5012	}
5013	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5014	EM_UNLOCK(adapter);
5015	return (0);
5016}
5017
5018static void
5019em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5020	const char *description, struct em_int_delay_info *info,
5021	int offset, int value)
5022{
5023	info->adapter = adapter;
5024	info->offset = offset;
5025	info->value = value;
5026	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5027	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5028	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5029	    info, 0, em_sysctl_int_delay, "I", description);
5030}
5031
5032#ifndef DEVICE_POLLING
5033static void
5034em_add_rx_process_limit(struct adapter *adapter, const char *name,
5035	const char *description, int *limit, int value)
5036{
5037	*limit = value;
5038	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5039	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5040	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5041}
5042#endif
5043