if_em.c revision 169248
1/**************************************************************************
2
3Copyright (c) 2001-2007, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/dev/em/if_em.c 169248 2007-05-04 13:30:44Z rwatson $");
36
37#ifdef HAVE_KERNEL_OPTION_HEADERS
38#include "opt_device_polling.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/bus.h>
44#include <sys/endian.h>
45#include <sys/kernel.h>
46#include <sys/kthread.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#include <sys/module.h>
50#include <sys/rman.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/sysctl.h>
54#include <sys/taskqueue.h>
55
56#include <machine/bus.h>
57#include <machine/resource.h>
58
59#include <net/bpf.h>
60#include <net/ethernet.h>
61#include <net/if.h>
62#include <net/if_arp.h>
63#include <net/if_dl.h>
64#include <net/if_media.h>
65
66#include <net/if_types.h>
67#include <net/if_vlan_var.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip6.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <machine/in_cksum.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pcireg.h>
80
81#include "e1000_api.h"
82#include "e1000_82575.h"
83#include "if_em.h"
84
85/*********************************************************************
86 *  Set this to one to display debug statistics
87 *********************************************************************/
88int	em_display_debug_stats = 0;
89
90/*********************************************************************
91 *  Driver version:
92 *********************************************************************/
93char em_driver_version[] = "Version - 6.5.0";
94
95
96/*********************************************************************
97 *  PCI Device ID Table
98 *
99 *  Used by probe to select devices to load on
100 *  Last field stores an index into e1000_strings
101 *  Last entry must be all 0s
102 *
103 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
104 *********************************************************************/
105
106static em_vendor_info_t em_vendor_info_array[] =
107{
108	/* Intel(R) PRO/1000 Network Connection */
109	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
114
115	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
124
125	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127
128	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
132
133	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138
139	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
148						PCI_ANY_ID, PCI_ANY_ID, 0},
149
150	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
153
154	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
158						PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
160						PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
162						PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
167
168	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
172						PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
174						PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
176						PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
178						PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
185	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
186
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
191	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
192
193	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
194	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
195						PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
197	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
198						PCI_ANY_ID, PCI_ANY_ID, 0},
199	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
200						PCI_ANY_ID, PCI_ANY_ID, 0},
201	/* required last entry */
202	{ 0, 0, 0, 0, 0}
203};
204
205/*********************************************************************
206 *  Table of branding strings for all supported NICs.
207 *********************************************************************/
208
209static char *em_strings[] = {
210	"Intel(R) PRO/1000 Network Connection"
211};
212
213/*********************************************************************
214 *  Function prototypes
215 *********************************************************************/
216static int	em_probe(device_t);
217static int	em_attach(device_t);
218static int	em_detach(device_t);
219static int	em_shutdown(device_t);
220static int	em_suspend(device_t);
221static int	em_resume(device_t);
222static void	em_start(struct ifnet *);
223static void	em_start_locked(struct ifnet *ifp);
224static int	em_ioctl(struct ifnet *, u_long, caddr_t);
225static void	em_watchdog(struct adapter *);
226static void	em_init(void *);
227static void	em_init_locked(struct adapter *);
228static void	em_stop(void *);
229static void	em_media_status(struct ifnet *, struct ifmediareq *);
230static int	em_media_change(struct ifnet *);
231static void	em_identify_hardware(struct adapter *);
232static int	em_allocate_pci_resources(struct adapter *);
233static int	em_allocate_intr(struct adapter *);
234static void	em_free_intr(struct adapter *);
235static void	em_free_pci_resources(struct adapter *);
236static void	em_local_timer(void *);
237static int	em_hardware_init(struct adapter *);
238static void	em_setup_interface(device_t, struct adapter *);
239static int	em_setup_transmit_structures(struct adapter *);
240static void	em_initialize_transmit_unit(struct adapter *);
241static int	em_setup_receive_structures(struct adapter *);
242static void	em_initialize_receive_unit(struct adapter *);
243static void	em_enable_intr(struct adapter *);
244static void	em_disable_intr(struct adapter *);
245static void	em_free_transmit_structures(struct adapter *);
246static void	em_free_receive_structures(struct adapter *);
247static void	em_update_stats_counters(struct adapter *);
248static void	em_txeof(struct adapter *);
249static int	em_allocate_receive_structures(struct adapter *);
250static int	em_allocate_transmit_structures(struct adapter *);
251static int	em_rxeof(struct adapter *, int);
252#ifndef __NO_STRICT_ALIGNMENT
253static int	em_fixup_rx(struct adapter *);
254#endif
255static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
256		    struct mbuf *);
257static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
258		    uint32_t *, uint32_t *);
259static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
260		    uint32_t *);
261static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
262static void	em_set_promisc(struct adapter *);
263static void	em_disable_promisc(struct adapter *);
264static void	em_set_multi(struct adapter *);
265static void	em_print_hw_stats(struct adapter *);
266static void	em_update_link_status(struct adapter *);
267static int	em_get_buf(struct adapter *, int);
268static void	em_enable_vlans(struct adapter *);
269static int	em_encap(struct adapter *, struct mbuf **);
270static int	em_adv_encap(struct adapter *, struct mbuf **);
271static void	em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
272static void	em_smartspeed(struct adapter *);
273static int	em_82547_fifo_workaround(struct adapter *, int);
274static void	em_82547_update_fifo_head(struct adapter *, int);
275static int	em_82547_tx_fifo_reset(struct adapter *);
276static void	em_82547_move_tail(void *);
277static int	em_dma_malloc(struct adapter *, bus_size_t,
278		    struct em_dma_alloc *, int);
279static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
280static void	em_print_debug_info(struct adapter *);
281static int 	em_is_valid_ether_addr(uint8_t *);
282static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
283static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
284static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
285		    PDESC_ARRAY desc_array);
286static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
287static void	em_add_int_delay_sysctl(struct adapter *, const char *,
288		    const char *, struct em_int_delay_info *, int, int);
289/* Management and WOL Support */
290static void	em_init_manageability(struct adapter *);
291static void	em_release_manageability(struct adapter *);
292static void     em_get_hw_control(struct adapter *);
293static void     em_release_hw_control(struct adapter *);
294static void     em_enable_wakeup(device_t);
295
296#ifdef DEVICE_POLLING
297static poll_handler_t em_poll;
298static void	em_intr(void *);
299#else
300static int	em_intr_fast(void *);
301static void	em_add_rx_process_limit(struct adapter *, const char *,
302		    const char *, int *, int);
303static void	em_handle_rxtx(void *context, int pending);
304static void	em_handle_link(void *context, int pending);
305#endif
306
307/*********************************************************************
308 *  FreeBSD Device Interface Entry Points
309 *********************************************************************/
310
311static device_method_t em_methods[] = {
312	/* Device interface */
313	DEVMETHOD(device_probe, em_probe),
314	DEVMETHOD(device_attach, em_attach),
315	DEVMETHOD(device_detach, em_detach),
316	DEVMETHOD(device_shutdown, em_shutdown),
317	DEVMETHOD(device_suspend, em_suspend),
318	DEVMETHOD(device_resume, em_resume),
319	{0, 0}
320};
321
322static driver_t em_driver = {
323	"em", em_methods, sizeof(struct adapter),
324};
325
326static devclass_t em_devclass;
327DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328MODULE_DEPEND(em, pci, 1, 1, 1);
329MODULE_DEPEND(em, ether, 1, 1, 1);
330
331/*********************************************************************
332 *  Tunable default values.
333 *********************************************************************/
334
335#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337#define M_TSO_LEN			66
338
339static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
342static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
343static int em_rxd = EM_DEFAULT_RXD;
344static int em_txd = EM_DEFAULT_TXD;
345static int em_smart_pwr_down = FALSE;
346
347TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
348TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
349TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
351TUNABLE_INT("hw.em.rxd", &em_rxd);
352TUNABLE_INT("hw.em.txd", &em_txd);
353TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
354#ifndef DEVICE_POLLING
355/* How many packets rxeof tries to clean at a time */
356static int em_rx_process_limit = 100;
357TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358#endif
359/* Global used in WOL setup with multiport cards */
360static int global_quad_port_a = 0;
361
362/*********************************************************************
363 *  Device identification routine
364 *
365 *  em_probe determines if the driver should be loaded on
366 *  adapter based on PCI vendor/device id of the adapter.
367 *
368 *  return BUS_PROBE_DEFAULT on success, positive on failure
369 *********************************************************************/
370
371static int
372em_probe(device_t dev)
373{
374	char		adapter_name[60];
375	uint16_t	pci_vendor_id = 0;
376	uint16_t	pci_device_id = 0;
377	uint16_t	pci_subvendor_id = 0;
378	uint16_t	pci_subdevice_id = 0;
379	em_vendor_info_t *ent;
380
381	INIT_DEBUGOUT("em_probe: begin");
382
383	pci_vendor_id = pci_get_vendor(dev);
384	if (pci_vendor_id != EM_VENDOR_ID)
385		return (ENXIO);
386
387	pci_device_id = pci_get_device(dev);
388	pci_subvendor_id = pci_get_subvendor(dev);
389	pci_subdevice_id = pci_get_subdevice(dev);
390
391	ent = em_vendor_info_array;
392	while (ent->vendor_id != 0) {
393		if ((pci_vendor_id == ent->vendor_id) &&
394		    (pci_device_id == ent->device_id) &&
395
396		    ((pci_subvendor_id == ent->subvendor_id) ||
397		    (ent->subvendor_id == PCI_ANY_ID)) &&
398
399		    ((pci_subdevice_id == ent->subdevice_id) ||
400		    (ent->subdevice_id == PCI_ANY_ID))) {
401			sprintf(adapter_name, "%s %s",
402				em_strings[ent->index],
403				em_driver_version);
404			device_set_desc_copy(dev, adapter_name);
405			return (BUS_PROBE_DEFAULT);
406		}
407		ent++;
408	}
409
410	return (ENXIO);
411}
412
413/*********************************************************************
414 *  Device initialization routine
415 *
416 *  The attach entry point is called when the driver is being loaded.
417 *  This routine identifies the type of hardware, allocates all resources
418 *  and initializes the hardware.
419 *
420 *  return 0 on success, positive on failure
421 *********************************************************************/
422
423static int
424em_attach(device_t dev)
425{
426	struct adapter	*adapter;
427	int		tsize, rsize;
428	int		error = 0;
429	u16		eeprom_data, device_id;
430
431	INIT_DEBUGOUT("em_attach: begin");
432
433	adapter = device_get_softc(dev);
434	adapter->dev = adapter->osdep.dev = dev;
435	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
436
437	/* SYSCTL stuff */
438	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
439	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
440	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
441	    em_sysctl_debug_info, "I", "Debug Information");
442
443	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
444	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
445	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
446	    em_sysctl_stats, "I", "Statistics");
447
448	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
449	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
450
451	/* Determine hardware revision */
452	em_identify_hardware(adapter);
453
454	/* Setup PCI resources */
455	if (em_allocate_pci_resources(adapter)) {
456		device_printf(dev, "Allocation of PCI resources failed\n");
457		error = ENXIO;
458		goto err_pci;
459	}
460
461	/*
462	** For ICH8 and family we need to
463	** map the flash memory, and this
464	** must happen after the MAC is
465	** identified
466	*/
467	if ((adapter->hw.mac.type == e1000_ich8lan) ||
468	    (adapter->hw.mac.type == e1000_ich9lan)) {
469		int rid = EM_BAR_TYPE_FLASH;
470		adapter->flash_mem = bus_alloc_resource_any(dev,
471		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
472		/* This is used in the shared code */
473		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
474		adapter->osdep.flash_bus_space_tag =
475		    rman_get_bustag(adapter->flash_mem);
476		adapter->osdep.flash_bus_space_handle =
477		    rman_get_bushandle(adapter->flash_mem);
478	}
479
480	/* Do Shared Code initialization */
481	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
482		device_printf(dev, "Setup of Shared code failed\n");
483		error = ENXIO;
484		goto err_pci;
485	}
486
487	e1000_get_bus_info(&adapter->hw);
488
489	/* Set up some sysctls for the tunable interrupt delays */
490	em_add_int_delay_sysctl(adapter, "rx_int_delay",
491	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
492	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
493	em_add_int_delay_sysctl(adapter, "tx_int_delay",
494	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
495	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
496	if (adapter->hw.mac.type >= e1000_82540) {
497		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
498		    "receive interrupt delay limit in usecs",
499		    &adapter->rx_abs_int_delay,
500		    E1000_REGISTER(&adapter->hw, E1000_RADV),
501		    em_rx_abs_int_delay_dflt);
502		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
503		    "transmit interrupt delay limit in usecs",
504		    &adapter->tx_abs_int_delay,
505		    E1000_REGISTER(&adapter->hw, E1000_TADV),
506		    em_tx_abs_int_delay_dflt);
507	}
508
509#ifndef DEVICE_POLLING
510	/* Sysctls for limiting the amount of work done in the taskqueue */
511	em_add_rx_process_limit(adapter, "rx_processing_limit",
512	    "max number of rx packets to process", &adapter->rx_process_limit,
513	    em_rx_process_limit);
514#endif
515
516	/*
517	 * Validate number of transmit and receive descriptors. It
518	 * must not exceed hardware maximum, and must be multiple
519	 * of E1000_DBA_ALIGN.
520	 */
521	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
522	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
523	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
524	    (em_txd < EM_MIN_TXD)) {
525		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
526		    EM_DEFAULT_TXD, em_txd);
527		adapter->num_tx_desc = EM_DEFAULT_TXD;
528	} else
529		adapter->num_tx_desc = em_txd;
530	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
531	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
532	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
533	    (em_rxd < EM_MIN_RXD)) {
534		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
535		    EM_DEFAULT_RXD, em_rxd);
536		adapter->num_rx_desc = EM_DEFAULT_RXD;
537	} else
538		adapter->num_rx_desc = em_rxd;
539
540	adapter->hw.mac.autoneg = DO_AUTO_NEG;
541	adapter->hw.phy.wait_for_link = FALSE;
542	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
543	adapter->rx_buffer_len = 2048;
544
545	e1000_init_script_state_82541(&adapter->hw, TRUE);
546	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
547
548	/* Copper options */
549	if (adapter->hw.media_type == e1000_media_type_copper) {
550		adapter->hw.phy.mdix = AUTO_ALL_MODES;
551		adapter->hw.phy.disable_polarity_correction = FALSE;
552		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
553	}
554
555	/*
556	 * Set the max frame size assuming standard ethernet
557	 * sized frames.
558	 */
559	adapter->hw.mac.max_frame_size =
560	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
561
562	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
563
564	/*
565	 * This controls when hardware reports transmit completion
566	 * status.
567	 */
568	adapter->hw.mac.report_tx_early = 1;
569
570	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
571	    EM_DBA_ALIGN);
572
573	/* Allocate Transmit Descriptor ring */
574	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
575		device_printf(dev, "Unable to allocate tx_desc memory\n");
576		error = ENOMEM;
577		goto err_tx_desc;
578	}
579	adapter->tx_desc_base =
580	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
581
582	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
583	    EM_DBA_ALIGN);
584
585	/* Allocate Receive Descriptor ring */
586	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
587		device_printf(dev, "Unable to allocate rx_desc memory\n");
588		error = ENOMEM;
589		goto err_rx_desc;
590	}
591	adapter->rx_desc_base =
592	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
593
594	/* Make sure we have a good EEPROM before we read from it */
595	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596		/*
597		** Some PCI-E parts fail the first check due to
598		** the link being in sleep state, call it again,
599		** if it fails a second time its a real issue.
600		*/
601		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
602			device_printf(dev,
603			    "The EEPROM Checksum Is Not Valid\n");
604			error = EIO;
605			goto err_hw_init;
606		}
607	}
608
609	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
610		device_printf(dev, "EEPROM read error "
611		    "reading part number\n");
612		error = EIO;
613		goto err_hw_init;
614	}
615
616	/* Initialize the hardware */
617	if (em_hardware_init(adapter)) {
618		device_printf(dev, "Unable to initialize the hardware\n");
619		error = EIO;
620		goto err_hw_init;
621	}
622
623	/* Copy the permanent MAC address out of the EEPROM */
624	if (e1000_read_mac_addr(&adapter->hw) < 0) {
625		device_printf(dev, "EEPROM read error while reading MAC"
626		    " address\n");
627		error = EIO;
628		goto err_hw_init;
629	}
630
631	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
632		device_printf(dev, "Invalid MAC address\n");
633		error = EIO;
634		goto err_hw_init;
635	}
636
637	/* Setup OS specific network interface */
638	em_setup_interface(dev, adapter);
639
640	em_allocate_intr(adapter);
641
642	/* Initialize statistics */
643	em_update_stats_counters(adapter);
644
645	adapter->hw.mac.get_link_status = 1;
646	em_update_link_status(adapter);
647
648	/* Indicate SOL/IDER usage */
649	if (e1000_check_reset_block(&adapter->hw))
650		device_printf(dev,
651		    "PHY reset is blocked due to SOL/IDER session.\n");
652
653	/* Determine if we have to control management hardware */
654	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
655
656	/*
657	 * Setup Wake-on-Lan
658	 */
659	switch (adapter->hw.mac.type) {
660
661	case e1000_82542:
662	case e1000_82543:
663		break;
664	case e1000_82546:
665	case e1000_82546_rev_3:
666	case e1000_82571:
667	case e1000_80003es2lan:
668		if (adapter->hw.bus.func == 1)
669			e1000_read_nvm(&adapter->hw,
670			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
671		else
672			e1000_read_nvm(&adapter->hw,
673			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
674		eeprom_data &= EM_EEPROM_APME;
675		break;
676	default:
677		/* APME bit in EEPROM is mapped to WUC.APME */
678		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
679		    E1000_WUC_APME;
680		break;
681	}
682	if (eeprom_data)
683		adapter->wol = E1000_WUFC_MAG;
684	/*
685         * We have the eeprom settings, now apply the special cases
686         * where the eeprom may be wrong or the board won't support
687         * wake on lan on a particular port
688	 */
689	device_id = pci_get_device(dev);
690        switch (device_id) {
691	case E1000_DEV_ID_82546GB_PCIE:
692		adapter->wol = 0;
693		break;
694	case E1000_DEV_ID_82546EB_FIBER:
695	case E1000_DEV_ID_82546GB_FIBER:
696	case E1000_DEV_ID_82571EB_FIBER:
697		/* Wake events only supported on port A for dual fiber
698		 * regardless of eeprom setting */
699		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
700		    E1000_STATUS_FUNC_1)
701			adapter->wol = 0;
702		break;
703	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
704	case E1000_DEV_ID_82571EB_QUAD_COPPER:
705	case E1000_DEV_ID_82571EB_QUAD_FIBER:
706	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
707                /* if quad port adapter, disable WoL on all but port A */
708		if (global_quad_port_a != 0)
709			adapter->wol = 0;
710		/* Reset for multiple quad port adapters */
711		if (++global_quad_port_a == 4)
712			global_quad_port_a = 0;
713                break;
714	}
715
716	/* Do we need workaround for 82544 PCI-X adapter? */
717	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
718	    adapter->hw.mac.type == e1000_82544)
719		adapter->pcix_82544 = TRUE;
720	else
721		adapter->pcix_82544 = FALSE;
722
723	/* Get control from any management/hw control */
724	if (((adapter->hw.mac.type != e1000_82573) &&
725	    (adapter->hw.mac.type != e1000_ich8lan) &&
726	    (adapter->hw.mac.type != e1000_ich9lan)) ||
727	    !e1000_check_mng_mode(&adapter->hw))
728		em_get_hw_control(adapter);
729
730	/* Tell the stack that the interface is not active */
731	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
732
733	INIT_DEBUGOUT("em_attach: end");
734
735	return (0);
736
737err_hw_init:
738	em_release_hw_control(adapter);
739	e1000_remove_device(&adapter->hw);
740	em_dma_free(adapter, &adapter->rxdma);
741err_rx_desc:
742	em_dma_free(adapter, &adapter->txdma);
743err_tx_desc:
744err_pci:
745	em_free_intr(adapter);
746	em_free_pci_resources(adapter);
747	EM_LOCK_DESTROY(adapter);
748
749	return (error);
750}
751
752/*********************************************************************
753 *  Device removal routine
754 *
755 *  The detach entry point is called when the driver is being removed.
756 *  This routine stops the adapter and deallocates all the resources
757 *  that were allocated for driver operation.
758 *
759 *  return 0 on success, positive on failure
760 *********************************************************************/
761
762static int
763em_detach(device_t dev)
764{
765	struct adapter	*adapter = device_get_softc(dev);
766	struct ifnet	*ifp = adapter->ifp;
767
768	INIT_DEBUGOUT("em_detach: begin");
769
770#ifdef DEVICE_POLLING
771	if (ifp->if_capenable & IFCAP_POLLING)
772		ether_poll_deregister(ifp);
773#endif
774
775	em_disable_intr(adapter);
776	em_free_intr(adapter);
777	EM_LOCK(adapter);
778	adapter->in_detach = 1;
779	em_stop(adapter);
780	e1000_phy_hw_reset(&adapter->hw);
781
782	em_release_manageability(adapter);
783	if (((adapter->hw.mac.type != e1000_82573) &&
784	    (adapter->hw.mac.type != e1000_ich8lan) &&
785	    (adapter->hw.mac.type != e1000_ich9lan)) ||
786	    !e1000_check_mng_mode(&adapter->hw))
787		em_release_hw_control(adapter);
788	if (adapter->wol) {
789		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
790		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
791		em_enable_wakeup(dev);
792	}
793
794	EM_UNLOCK(adapter);
795	ether_ifdetach(adapter->ifp);
796
797	callout_drain(&adapter->timer);
798	callout_drain(&adapter->tx_fifo_timer);
799
800	em_free_pci_resources(adapter);
801	bus_generic_detach(dev);
802	if_free(ifp);
803
804	e1000_remove_device(&adapter->hw);
805	em_free_transmit_structures(adapter);
806	em_free_receive_structures(adapter);
807
808	/* Free Transmit Descriptor ring */
809	if (adapter->tx_desc_base) {
810		em_dma_free(adapter, &adapter->txdma);
811		adapter->tx_desc_base = NULL;
812	}
813
814	/* Free Receive Descriptor ring */
815	if (adapter->rx_desc_base) {
816		em_dma_free(adapter, &adapter->rxdma);
817		adapter->rx_desc_base = NULL;
818	}
819
820	EM_LOCK_DESTROY(adapter);
821
822	return (0);
823}
824
825/*********************************************************************
826 *
827 *  Shutdown entry point
828 *
829 **********************************************************************/
830
831static int
832em_shutdown(device_t dev)
833{
834	return em_suspend(dev);
835}
836
837/*
838 * Suspend/resume device methods.
839 */
840static int
841em_suspend(device_t dev)
842{
843	struct adapter *adapter = device_get_softc(dev);
844
845	EM_LOCK(adapter);
846	em_stop(adapter);
847
848        em_release_manageability(adapter);
849        if (((adapter->hw.mac.type != e1000_82573) &&
850            (adapter->hw.mac.type != e1000_ich8lan) &&
851            (adapter->hw.mac.type != e1000_ich9lan)) ||
852            !e1000_check_mng_mode(&adapter->hw))
853                em_release_hw_control(adapter);
854        if (adapter->wol) {
855                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
856                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
857                em_enable_wakeup(dev);
858        }
859
860	EM_UNLOCK(adapter);
861
862	return bus_generic_suspend(dev);
863}
864
865static int
866em_resume(device_t dev)
867{
868	struct adapter *adapter = device_get_softc(dev);
869	struct ifnet *ifp = adapter->ifp;
870
871	EM_LOCK(adapter);
872	em_init_locked(adapter);
873
874        /* Get control from any management/hw control */
875	if (((adapter->hw.mac.type != e1000_82573) &&
876	    (adapter->hw.mac.type != e1000_ich8lan) &&
877	    (adapter->hw.mac.type != e1000_ich9lan)) ||
878	    !e1000_check_mng_mode(&adapter->hw))
879		em_get_hw_control(adapter);
880	em_init_manageability(adapter);
881
882	if ((ifp->if_flags & IFF_UP) &&
883	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
884		em_start_locked(ifp);
885
886	EM_UNLOCK(adapter);
887
888	return bus_generic_resume(dev);
889}
890
891
892/*********************************************************************
893 *  Transmit entry point
894 *
895 *  em_start is called by the stack to initiate a transmit.
896 *  The driver will remain in this routine as long as there are
897 *  packets to transmit and transmit resources are available.
898 *  In case resources are not available stack is notified and
899 *  the packet is requeued.
900 **********************************************************************/
901
902static void
903em_start_locked(struct ifnet *ifp)
904{
905	struct adapter	*adapter = ifp->if_softc;
906	struct mbuf	*m_head;
907
908	EM_LOCK_ASSERT(adapter);
909
910	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
911	    IFF_DRV_RUNNING)
912		return;
913	if (!adapter->link_active)
914		return;
915
916	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917
918		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
919		if (m_head == NULL)
920			break;
921		/*
922		 *  Encapsulation can modify our pointer, and or make it
923		 *  NULL on failure.  In that event, we can't requeue.
924		 *
925		 *  We now use a pointer to accomodate legacy and
926		 *  advanced transmit functions.
927		 */
928		if (adapter->em_xmit(adapter, &m_head)) {
929			if (m_head == NULL)
930				break;
931			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
932			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
933			break;
934		}
935
936		/* Send a copy of the frame to the BPF listener */
937		BPF_MTAP(ifp, m_head);
938
939		/* Set timeout in case hardware has problems transmitting. */
940		adapter->watchdog_timer = EM_TX_TIMEOUT;
941	}
942}
943
944static void
945em_start(struct ifnet *ifp)
946{
947	struct adapter *adapter = ifp->if_softc;
948
949	EM_LOCK(adapter);
950	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
951		em_start_locked(ifp);
952	EM_UNLOCK(adapter);
953}
954
955/*********************************************************************
956 *  Ioctl entry point
957 *
958 *  em_ioctl is called when the user wants to configure the
959 *  interface.
960 *
961 *  return 0 on success, positive on failure
962 **********************************************************************/
963
964static int
965em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
966{
967	struct adapter	*adapter = ifp->if_softc;
968	struct ifreq *ifr = (struct ifreq *)data;
969	struct ifaddr *ifa = (struct ifaddr *)data;
970	int error = 0;
971
972	if (adapter->in_detach)
973		return (error);
974
975	switch (command) {
976	case SIOCSIFADDR:
977	case SIOCGIFADDR:
978		if (ifa->ifa_addr->sa_family == AF_INET) {
979			/*
980			 * XXX
981			 * Since resetting hardware takes a very long time
982			 * and results in link renegotiation we only
983			 * initialize the hardware only when it is absolutely
984			 * required.
985			 */
986			ifp->if_flags |= IFF_UP;
987			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
988				EM_LOCK(adapter);
989				em_init_locked(adapter);
990				EM_UNLOCK(adapter);
991			}
992			arp_ifinit(ifp, ifa);
993		} else
994			error = ether_ioctl(ifp, command, data);
995		break;
996	case SIOCSIFMTU:
997	    {
998		int max_frame_size;
999		uint16_t eeprom_data = 0;
1000
1001		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1002
1003		EM_LOCK(adapter);
1004		switch (adapter->hw.mac.type) {
1005		case e1000_82573:
1006			/*
1007			 * 82573 only supports jumbo frames
1008			 * if ASPM is disabled.
1009			 */
1010			e1000_read_nvm(&adapter->hw,
1011			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1012			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1013				max_frame_size = ETHER_MAX_LEN;
1014				break;
1015			}
1016			/* Allow Jumbo frames - fall thru */
1017		case e1000_82571:
1018		case e1000_82572:
1019		case e1000_ich9lan:
1020		case e1000_82575:
1021		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1022			max_frame_size = 9234;
1023			break;
1024		case e1000_ich8lan:
1025			/* ICH8 does not support jumbo frames */
1026			max_frame_size = ETHER_MAX_LEN;
1027			break;
1028		default:
1029			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1030		}
1031		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1032		    ETHER_CRC_LEN) {
1033			EM_UNLOCK(adapter);
1034			error = EINVAL;
1035			break;
1036		}
1037
1038		ifp->if_mtu = ifr->ifr_mtu;
1039		adapter->hw.mac.max_frame_size =
1040		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1041		em_init_locked(adapter);
1042		EM_UNLOCK(adapter);
1043		break;
1044	    }
1045	case SIOCSIFFLAGS:
1046		IOCTL_DEBUGOUT("ioctl rcv'd:\
1047		    SIOCSIFFLAGS (Set Interface Flags)");
1048		EM_LOCK(adapter);
1049		if (ifp->if_flags & IFF_UP) {
1050			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1051				if ((ifp->if_flags ^ adapter->if_flags) &
1052				    IFF_PROMISC) {
1053					em_disable_promisc(adapter);
1054					em_set_promisc(adapter);
1055				}
1056			} else
1057				em_init_locked(adapter);
1058		} else
1059			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1060				em_stop(adapter);
1061		adapter->if_flags = ifp->if_flags;
1062		EM_UNLOCK(adapter);
1063		break;
1064	case SIOCADDMULTI:
1065	case SIOCDELMULTI:
1066		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1067		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1068			EM_LOCK(adapter);
1069			em_disable_intr(adapter);
1070			em_set_multi(adapter);
1071			if (adapter->hw.mac.type == e1000_82542 &&
1072	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1073				em_initialize_receive_unit(adapter);
1074			}
1075#ifdef DEVICE_POLLING
1076			if (!(ifp->if_capenable & IFCAP_POLLING))
1077#endif
1078				em_enable_intr(adapter);
1079			EM_UNLOCK(adapter);
1080		}
1081		break;
1082	case SIOCSIFMEDIA:
1083		/* Check SOL/IDER usage */
1084		EM_LOCK(adapter);
1085		if (e1000_check_reset_block(&adapter->hw)) {
1086			EM_UNLOCK(adapter);
1087			device_printf(adapter->dev, "Media change is"
1088			    " blocked due to SOL/IDER session.\n");
1089			break;
1090		}
1091		EM_UNLOCK(adapter);
1092	case SIOCGIFMEDIA:
1093		IOCTL_DEBUGOUT("ioctl rcv'd: \
1094		    SIOCxIFMEDIA (Get/Set Interface Media)");
1095		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1096		break;
1097	case SIOCSIFCAP:
1098	    {
1099		int mask, reinit;
1100
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1102		reinit = 0;
1103		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1104#ifdef DEVICE_POLLING
1105		if (mask & IFCAP_POLLING) {
1106			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1107				error = ether_poll_register(em_poll, ifp);
1108				if (error)
1109					return (error);
1110				EM_LOCK(adapter);
1111				em_disable_intr(adapter);
1112				ifp->if_capenable |= IFCAP_POLLING;
1113				EM_UNLOCK(adapter);
1114			} else {
1115				error = ether_poll_deregister(ifp);
1116				/* Enable interrupt even in error case */
1117				EM_LOCK(adapter);
1118				em_enable_intr(adapter);
1119				ifp->if_capenable &= ~IFCAP_POLLING;
1120				EM_UNLOCK(adapter);
1121			}
1122		}
1123#endif
1124		if (mask & IFCAP_HWCSUM) {
1125			ifp->if_capenable ^= IFCAP_HWCSUM;
1126			reinit = 1;
1127		}
1128		if (mask & IFCAP_TSO4) {
1129			ifp->if_capenable ^= IFCAP_TSO4;
1130			reinit = 1;
1131		}
1132		if (mask & IFCAP_VLAN_HWTAGGING) {
1133			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1134			reinit = 1;
1135		}
1136		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1137			em_init(adapter);
1138		VLAN_CAPABILITIES(ifp);
1139		break;
1140	    }
1141	default:
1142		error = ether_ioctl(ifp, command, data);
1143		break;
1144	}
1145
1146	return (error);
1147}
1148
1149/*********************************************************************
1150 *  Watchdog timer:
1151 *
1152 *  This routine is called from the local timer every second.
1153 *  As long as transmit descriptors are being cleaned the value
1154 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1155 *  and we then reset the device.
1156 *
1157 **********************************************************************/
1158
1159static void
1160em_watchdog(struct adapter *adapter)
1161{
1162
1163	EM_LOCK_ASSERT(adapter);
1164
1165	/*
1166	** The timer is set to 5 every time start queues a packet.
1167	** Then txeof keeps resetting to 5 as long as it cleans at
1168	** least one descriptor.
1169	** Finally, anytime all descriptors are clean the timer is
1170	** set to 0.
1171	*/
1172	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1173		return;
1174
1175	/* If we are in this routine because of pause frames, then
1176	 * don't reset the hardware.
1177	 */
1178	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1179	    E1000_STATUS_TXOFF) {
1180		adapter->watchdog_timer = EM_TX_TIMEOUT;
1181		return;
1182	}
1183
1184	if (e1000_check_for_link(&adapter->hw) == 0)
1185		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1186	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1187	adapter->watchdog_events++;
1188
1189	em_init_locked(adapter);
1190}
1191
1192/*********************************************************************
1193 *  Init entry point
1194 *
1195 *  This routine is used in two ways. It is used by the stack as
1196 *  init entry point in network interface structure. It is also used
1197 *  by the driver as a hw/sw initialization routine to get to a
1198 *  consistent state.
1199 *
1200 *  return 0 on success, positive on failure
1201 **********************************************************************/
1202
1203static void
1204em_init_locked(struct adapter *adapter)
1205{
1206	struct ifnet	*ifp = adapter->ifp;
1207	device_t	dev = adapter->dev;
1208	uint32_t	pba;
1209
1210	INIT_DEBUGOUT("em_init: begin");
1211
1212	EM_LOCK_ASSERT(adapter);
1213
1214	em_stop(adapter);
1215
1216	/*
1217	 * Packet Buffer Allocation (PBA)
1218	 * Writing PBA sets the receive portion of the buffer
1219	 * the remainder is used for the transmit buffer.
1220	 *
1221	 * Devices before the 82547 had a Packet Buffer of 64K.
1222	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1223	 * After the 82547 the buffer was reduced to 40K.
1224	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1225	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1226	 */
1227	switch (adapter->hw.mac.type) {
1228	case e1000_82547:
1229	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1230		if (adapter->hw.mac.max_frame_size > 8192)
1231			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1232		else
1233			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1234		adapter->tx_fifo_head = 0;
1235		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1236		adapter->tx_fifo_size =
1237		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1238		break;
1239	/* Total Packet Buffer on these is 48K */
1240	case e1000_82571:
1241	case e1000_82572:
1242	case e1000_82575:
1243	case e1000_80003es2lan:
1244			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1245		break;
1246	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1247			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1248		break;
1249	case e1000_ich9lan:
1250#define E1000_PBA_10K	0x000A
1251		pba = E1000_PBA_10K;
1252		break;
1253	case e1000_ich8lan:
1254		pba = E1000_PBA_8K;
1255		break;
1256	default:
1257		/* Devices before 82547 had a Packet Buffer of 64K.   */
1258		if (adapter->hw.mac.max_frame_size > 8192)
1259			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1260		else
1261			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1262	}
1263
1264	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1265	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1266
1267	/* Get the latest mac address, User can use a LAA */
1268        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1269              ETHER_ADDR_LEN);
1270
1271	/* Initialize the hardware */
1272	if (em_hardware_init(adapter)) {
1273		device_printf(dev, "Unable to initialize the hardware\n");
1274		return;
1275	}
1276	em_update_link_status(adapter);
1277
1278	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1279		em_enable_vlans(adapter);
1280
1281	/* Set hardware offload abilities */
1282	ifp->if_hwassist = 0;
1283	if (adapter->hw.mac.type >= e1000_82543) {
1284		if (ifp->if_capenable & IFCAP_TXCSUM)
1285			ifp->if_hwassist |= EM_CHECKSUM_FEATURES;
1286		if (ifp->if_capenable & IFCAP_TSO)
1287			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1288	}
1289
1290	/* Configure for OS presence */
1291	em_init_manageability(adapter);
1292
1293	/* Prepare transmit descriptors and buffers */
1294	if (em_setup_transmit_structures(adapter)) {
1295		device_printf(dev, "Could not setup transmit structures\n");
1296		em_stop(adapter);
1297		return;
1298	}
1299	em_initialize_transmit_unit(adapter);
1300
1301	/* Setup Multicast table */
1302	em_set_multi(adapter);
1303
1304	/* Prepare receive descriptors and buffers */
1305	if (em_setup_receive_structures(adapter)) {
1306		device_printf(dev, "Could not setup receive structures\n");
1307		em_stop(adapter);
1308		return;
1309	}
1310	em_initialize_receive_unit(adapter);
1311
1312	/* Don't lose promiscuous settings */
1313	em_set_promisc(adapter);
1314
1315	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1316	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1317
1318	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1319	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1320
1321#ifdef DEVICE_POLLING
1322	/*
1323	 * Only enable interrupts if we are not polling, make sure
1324	 * they are off otherwise.
1325	 */
1326	if (ifp->if_capenable & IFCAP_POLLING)
1327		em_disable_intr(adapter);
1328	else
1329#endif /* DEVICE_POLLING */
1330		em_enable_intr(adapter);
1331
1332	/* Don't reset the phy next time init gets called */
1333	adapter->hw.phy.reset_disable = TRUE;
1334}
1335
1336static void
1337em_init(void *arg)
1338{
1339	struct adapter *adapter = arg;
1340
1341	EM_LOCK(adapter);
1342	em_init_locked(adapter);
1343	EM_UNLOCK(adapter);
1344}
1345
1346
1347#ifdef DEVICE_POLLING
1348/*********************************************************************
1349 *
1350 *  Legacy polling routine
1351 *
1352 *********************************************************************/
1353static void
1354em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1355{
1356	struct adapter *adapter = ifp->if_softc;
1357	uint32_t reg_icr;
1358
1359	EM_LOCK(adapter);
1360	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1361		EM_UNLOCK(adapter);
1362		return;
1363	}
1364
1365	if (cmd == POLL_AND_CHECK_STATUS) {
1366		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1367		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1368			callout_stop(&adapter->timer);
1369			adapter->hw.mac.get_link_status = 1;
1370			e1000_check_for_link(&adapter->hw);
1371			em_update_link_status(adapter);
1372			callout_reset(&adapter->timer, hz,
1373			    em_local_timer, adapter);
1374		}
1375	}
1376	em_rxeof(adapter, count);
1377	em_txeof(adapter);
1378
1379	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1380		em_start_locked(ifp);
1381	EM_UNLOCK(adapter);
1382}
1383
1384/*********************************************************************
1385 *
1386 *  Legacy Interrupt Service routine
1387 *
1388 *********************************************************************/
1389
1390static void
1391em_intr(void *arg)
1392{
1393	struct adapter	*adapter = arg;
1394	struct ifnet	*ifp;
1395	uint32_t	reg_icr;
1396
1397	EM_LOCK(adapter);
1398	ifp = adapter->ifp;
1399
1400	if (ifp->if_capenable & IFCAP_POLLING) {
1401		EM_UNLOCK(adapter);
1402		return;
1403	}
1404
1405	for (;;) {
1406		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1407
1408		if (adapter->hw.mac.type >= e1000_82571 &&
1409	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1410			break;
1411		else if (reg_icr == 0)
1412			break;
1413
1414		/*
1415		 * XXX: some laptops trigger several spurious interrupts
1416		 * on em(4) when in the resume cycle. The ICR register
1417		 * reports all-ones value in this case. Processing such
1418		 * interrupts would lead to a freeze. I don't know why.
1419		 */
1420		if (reg_icr == 0xffffffff)
1421			break;
1422
1423		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1424			em_rxeof(adapter, -1);
1425			em_txeof(adapter);
1426		}
1427
1428		/* Link status change */
1429		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1430			callout_stop(&adapter->timer);
1431			adapter->hw.mac.get_link_status = 1;
1432			e1000_check_for_link(&adapter->hw);
1433			em_update_link_status(adapter);
1434			callout_reset(&adapter->timer, hz,
1435			    em_local_timer, adapter);
1436		}
1437
1438		if (reg_icr & E1000_ICR_RXO)
1439			adapter->rx_overruns++;
1440	}
1441
1442	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1443	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1444		em_start_locked(ifp);
1445	EM_UNLOCK(adapter);
1446}
1447
1448#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1449
1450static void
1451em_handle_link(void *context, int pending)
1452{
1453	struct adapter	*adapter = context;
1454	struct ifnet *ifp;
1455
1456	ifp = adapter->ifp;
1457
1458	EM_LOCK(adapter);
1459	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1460		EM_UNLOCK(adapter);
1461		return;
1462	}
1463
1464	callout_stop(&adapter->timer);
1465	adapter->hw.mac.get_link_status = 1;
1466	e1000_check_for_link(&adapter->hw);
1467	em_update_link_status(adapter);
1468	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1469	EM_UNLOCK(adapter);
1470}
1471
1472static void
1473em_handle_rxtx(void *context, int pending)
1474{
1475	struct adapter	*adapter = context;
1476	struct ifnet	*ifp;
1477
1478	NET_LOCK_GIANT();
1479	ifp = adapter->ifp;
1480
1481	/*
1482	 * TODO:
1483	 * It should be possible to run the tx clean loop without the lock.
1484	 */
1485	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1486		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1487			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1488		EM_LOCK(adapter);
1489		em_txeof(adapter);
1490
1491		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1492			em_start_locked(ifp);
1493		EM_UNLOCK(adapter);
1494	}
1495
1496	em_enable_intr(adapter);
1497	NET_UNLOCK_GIANT();
1498}
1499
1500/*********************************************************************
1501 *
1502 *  Fast Interrupt Service routine
1503 *
1504 *********************************************************************/
1505static int
1506em_intr_fast(void *arg)
1507{
1508	struct adapter	*adapter = arg;
1509	struct ifnet	*ifp;
1510	uint32_t	reg_icr;
1511
1512	ifp = adapter->ifp;
1513
1514	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1515
1516	/* Hot eject?  */
1517	if (reg_icr == 0xffffffff)
1518		return (FILTER_STRAY);
1519
1520	/* Definitely not our interrupt.  */
1521	if (reg_icr == 0x0)
1522		return (FILTER_STRAY);
1523
1524	/*
1525	 * Starting with the 82571 chip, bit 31 should be used to
1526	 * determine whether the interrupt belongs to us.
1527	 */
1528	if (adapter->hw.mac.type >= e1000_82571 &&
1529	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1530		return (FILTER_STRAY);
1531
1532	/*
1533	 * Mask interrupts until the taskqueue is finished running.  This is
1534	 * cheap, just assume that it is needed.  This also works around the
1535	 * MSI message reordering errata on certain systems.
1536	 */
1537	em_disable_intr(adapter);
1538	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1539
1540	/* Link status change */
1541	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1542		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1543
1544	if (reg_icr & E1000_ICR_RXO)
1545		adapter->rx_overruns++;
1546	return (FILTER_HANDLED);
1547}
1548#endif /* ! DEVICE_POLLING */
1549
1550/*********************************************************************
1551 *
1552 *  Media Ioctl callback
1553 *
1554 *  This routine is called whenever the user queries the status of
1555 *  the interface using ifconfig.
1556 *
1557 **********************************************************************/
1558static void
1559em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1560{
1561	struct adapter *adapter = ifp->if_softc;
1562	u_char fiber_type = IFM_1000_SX;
1563
1564	INIT_DEBUGOUT("em_media_status: begin");
1565
1566	EM_LOCK(adapter);
1567	e1000_check_for_link(&adapter->hw);
1568	em_update_link_status(adapter);
1569
1570	ifmr->ifm_status = IFM_AVALID;
1571	ifmr->ifm_active = IFM_ETHER;
1572
1573	if (!adapter->link_active) {
1574		EM_UNLOCK(adapter);
1575		return;
1576	}
1577
1578	ifmr->ifm_status |= IFM_ACTIVE;
1579
1580	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1581	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1582		if (adapter->hw.mac.type == e1000_82545)
1583			fiber_type = IFM_1000_LX;
1584		ifmr->ifm_active |= fiber_type | IFM_FDX;
1585	} else {
1586		switch (adapter->link_speed) {
1587		case 10:
1588			ifmr->ifm_active |= IFM_10_T;
1589			break;
1590		case 100:
1591			ifmr->ifm_active |= IFM_100_TX;
1592			break;
1593		case 1000:
1594			ifmr->ifm_active |= IFM_1000_T;
1595			break;
1596		}
1597		if (adapter->link_duplex == FULL_DUPLEX)
1598			ifmr->ifm_active |= IFM_FDX;
1599		else
1600			ifmr->ifm_active |= IFM_HDX;
1601	}
1602	EM_UNLOCK(adapter);
1603}
1604
1605/*********************************************************************
1606 *
1607 *  Media Ioctl callback
1608 *
1609 *  This routine is called when the user changes speed/duplex using
1610 *  media/mediopt option with ifconfig.
1611 *
1612 **********************************************************************/
1613static int
1614em_media_change(struct ifnet *ifp)
1615{
1616	struct adapter *adapter = ifp->if_softc;
1617	struct ifmedia  *ifm = &adapter->media;
1618
1619	INIT_DEBUGOUT("em_media_change: begin");
1620
1621	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1622		return (EINVAL);
1623
1624	EM_LOCK(adapter);
1625	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1626	case IFM_AUTO:
1627		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1628		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1629		break;
1630	case IFM_1000_LX:
1631	case IFM_1000_SX:
1632	case IFM_1000_T:
1633		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1634		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1635		break;
1636	case IFM_100_TX:
1637		adapter->hw.mac.autoneg = FALSE;
1638		adapter->hw.phy.autoneg_advertised = 0;
1639		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1640			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1641		else
1642			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1643		break;
1644	case IFM_10_T:
1645		adapter->hw.mac.autoneg = FALSE;
1646		adapter->hw.phy.autoneg_advertised = 0;
1647		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1648			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1649		else
1650			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1651		break;
1652	default:
1653		device_printf(adapter->dev, "Unsupported media type\n");
1654	}
1655
1656	/* As the speed/duplex settings my have changed we need to
1657	 * reset the PHY.
1658	 */
1659	adapter->hw.phy.reset_disable = FALSE;
1660
1661	em_init_locked(adapter);
1662	EM_UNLOCK(adapter);
1663
1664	return (0);
1665}
1666
1667/*********************************************************************
1668 *
1669 *  This routine maps the mbufs to tx descriptors.
1670 *
1671 *  return 0 on success, positive on failure
1672 **********************************************************************/
1673
1674static int
1675em_encap(struct adapter *adapter, struct mbuf **m_headp)
1676{
1677	struct ifnet		*ifp = adapter->ifp;
1678	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1679	bus_dmamap_t		map;
1680	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1681	struct e1000_tx_desc	*ctxd = NULL;
1682	struct mbuf		*m_head;
1683	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1684	int			nsegs, i, j, first, last = 0;
1685	int			error, do_tso, tso_desc = 0;
1686
1687	m_head = *m_headp;
1688	txd_upper = txd_lower = txd_used = txd_saved = 0;
1689
1690	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1691
1692        /*
1693         * Force a cleanup if number of TX descriptors
1694         * available hits the threshold
1695         */
1696	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1697		em_txeof(adapter);
1698		/* Now do we at least have a minimal? */
1699		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1700			adapter->no_tx_desc_avail1++;
1701			return (ENOBUFS);
1702		}
1703	}
1704
1705
1706	/*
1707	 * TSO workaround:
1708	 *  If an mbuf is only header we need
1709	 *     to pull 4 bytes of data into it.
1710	 */
1711	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1712		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1713		*m_headp = m_head;
1714		if (m_head == NULL)
1715			return (ENOBUFS);
1716	}
1717
1718	/*
1719	 * Map the packet for DMA
1720	 *
1721	 * Capture the first descriptor index,
1722	 * this descriptor will have the index
1723	 * of the EOP which is the only one that
1724	 * now gets a DONE bit writeback.
1725	 */
1726	first = adapter->next_avail_tx_desc;
1727	tx_buffer = &adapter->tx_buffer_area[first];
1728	tx_buffer_mapped = tx_buffer;
1729	map = tx_buffer->map;
1730
1731	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1732	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1733
1734	/*
1735	 * There are two types of errors we can (try) to handle:
1736	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1737	 *   out of segments.  Defragment the mbuf chain and try again.
1738	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1739	 *   at this point in time.  Defer sending and try again later.
1740	 * All other errors, in particular EINVAL, are fatal and prevent the
1741	 * mbuf chain from ever going through.  Drop it and report error.
1742	 */
1743	if (error == EFBIG) {
1744		struct mbuf *m;
1745
1746		m = m_defrag(*m_headp, M_DONTWAIT);
1747		if (m == NULL) {
1748			adapter->mbuf_alloc_failed++;
1749			m_freem(*m_headp);
1750			*m_headp = NULL;
1751			return (ENOBUFS);
1752		}
1753		*m_headp = m;
1754
1755		/* Try it again */
1756		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1757		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1758
1759		if (error == ENOMEM) {
1760			adapter->no_tx_dma_setup++;
1761			return (error);
1762		} else if (error != 0) {
1763			adapter->no_tx_dma_setup++;
1764			m_freem(*m_headp);
1765			*m_headp = NULL;
1766			return (error);
1767		}
1768	} else if (error == ENOMEM) {
1769		adapter->no_tx_dma_setup++;
1770		return (error);
1771	} else if (error != 0) {
1772		adapter->no_tx_dma_setup++;
1773		m_freem(*m_headp);
1774		*m_headp = NULL;
1775		return (error);
1776	}
1777
1778	/*
1779	 * TSO Hardware workaround, if this packet is not
1780	 * TSO, and is only a single descriptor long, and
1781	 * it follows a TSO burst, then we need to add a
1782	 * sentinel descriptor to prevent premature writeback.
1783	 */
1784	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1785		if (nsegs == 1)
1786			tso_desc = TRUE;
1787		adapter->tx_tso = FALSE;
1788	}
1789
1790        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1791                adapter->no_tx_desc_avail2++;
1792		bus_dmamap_unload(adapter->txtag, map);
1793		return (ENOBUFS);
1794        }
1795	m_head = *m_headp;
1796
1797	/* Do hardware assists */
1798	if (ifp->if_hwassist > 0) {
1799        	if (do_tso && em_tso_setup(adapter, m_head,
1800		    &txd_upper, &txd_lower)) {
1801			/* we need to make a final sentinel transmit desc */
1802			tso_desc = TRUE;
1803		} else
1804			em_transmit_checksum_setup(adapter,  m_head,
1805			    &txd_upper, &txd_lower);
1806	}
1807
1808	i = adapter->next_avail_tx_desc;
1809	if (adapter->pcix_82544)
1810		txd_saved = i;
1811
1812	/* Set up our transmit descriptors */
1813	for (j = 0; j < nsegs; j++) {
1814		bus_size_t seg_len;
1815		bus_addr_t seg_addr;
1816		/* If adapter is 82544 and on PCIX bus */
1817		if(adapter->pcix_82544) {
1818			DESC_ARRAY	desc_array;
1819			uint32_t	array_elements, counter;
1820			/*
1821			 * Check the Address and Length combination and
1822			 * split the data accordingly
1823			 */
1824			array_elements = em_fill_descriptors(segs[j].ds_addr,
1825			    segs[j].ds_len, &desc_array);
1826			for (counter = 0; counter < array_elements; counter++) {
1827				if (txd_used == adapter->num_tx_desc_avail) {
1828					adapter->next_avail_tx_desc = txd_saved;
1829					adapter->no_tx_desc_avail2++;
1830					bus_dmamap_unload(adapter->txtag, map);
1831					return (ENOBUFS);
1832				}
1833				tx_buffer = &adapter->tx_buffer_area[i];
1834				ctxd = &adapter->tx_desc_base[i];
1835				ctxd->buffer_addr = htole64(
1836				    desc_array.descriptor[counter].address);
1837				ctxd->lower.data = htole32(
1838				    (adapter->txd_cmd | txd_lower | (uint16_t)
1839				    desc_array.descriptor[counter].length));
1840				ctxd->upper.data =
1841				    htole32((txd_upper));
1842				last = i;
1843				if (++i == adapter->num_tx_desc)
1844                                         i = 0;
1845				tx_buffer->m_head = NULL;
1846				tx_buffer->next_eop = -1;
1847				txd_used++;
1848                        }
1849		} else {
1850			tx_buffer = &adapter->tx_buffer_area[i];
1851			ctxd = &adapter->tx_desc_base[i];
1852			seg_addr = segs[j].ds_addr;
1853			seg_len  = segs[j].ds_len;
1854			/*
1855			** TSO Workaround:
1856			** If this is the last descriptor, we want to
1857			** split it so we have a small final sentinel
1858			*/
1859			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1860				seg_len -= 4;
1861				ctxd->buffer_addr = htole64(seg_addr);
1862				ctxd->lower.data = htole32(
1863				adapter->txd_cmd | txd_lower | seg_len);
1864				ctxd->upper.data =
1865				    htole32(txd_upper);
1866				if (++i == adapter->num_tx_desc)
1867					i = 0;
1868				/* Now make the sentinel */
1869				++txd_used; /* using an extra txd */
1870				ctxd = &adapter->tx_desc_base[i];
1871				tx_buffer = &adapter->tx_buffer_area[i];
1872				ctxd->buffer_addr =
1873				    htole64(seg_addr + seg_len);
1874				ctxd->lower.data = htole32(
1875				adapter->txd_cmd | txd_lower | 4);
1876				ctxd->upper.data =
1877				    htole32(txd_upper);
1878				last = i;
1879				if (++i == adapter->num_tx_desc)
1880					i = 0;
1881			} else {
1882				ctxd->buffer_addr = seg_addr;
1883				ctxd->lower.data = htole32(
1884				adapter->txd_cmd | txd_lower | seg_len);
1885				ctxd->upper.data =
1886				    htole32(txd_upper);
1887				last = i;
1888				if (++i == adapter->num_tx_desc)
1889					i = 0;
1890			}
1891			tx_buffer->m_head = NULL;
1892			tx_buffer->next_eop = -1;
1893		}
1894	}
1895
1896	adapter->next_avail_tx_desc = i;
1897	if (adapter->pcix_82544)
1898		adapter->num_tx_desc_avail -= txd_used;
1899	else {
1900		adapter->num_tx_desc_avail -= nsegs;
1901		if (tso_desc) /* TSO used an extra for sentinel */
1902			adapter->num_tx_desc_avail -= txd_used;
1903	}
1904
1905	if (m_head->m_flags & M_VLANTAG) {
1906		/* Set the vlan id. */
1907		ctxd->upper.fields.special =
1908		    htole16(m_head->m_pkthdr.ether_vtag);
1909                /* Tell hardware to add tag */
1910                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1911        }
1912
1913        tx_buffer->m_head = m_head;
1914	tx_buffer_mapped->map = tx_buffer->map;
1915	tx_buffer->map = map;
1916        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1917
1918        /*
1919         * Last Descriptor of Packet
1920	 * needs End Of Packet (EOP)
1921	 * and Report Status (RS)
1922         */
1923        ctxd->lower.data |=
1924	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1925	/*
1926	 * Keep track in the first buffer which
1927	 * descriptor will be written back
1928	 */
1929	tx_buffer = &adapter->tx_buffer_area[first];
1930	tx_buffer->next_eop = last;
1931
1932	/*
1933	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1934	 * that this frame is available to transmit.
1935	 */
1936	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1937	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1938	if (adapter->hw.mac.type == e1000_82547 &&
1939	    adapter->link_duplex == HALF_DUPLEX)
1940		em_82547_move_tail(adapter);
1941	else {
1942		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1943		if (adapter->hw.mac.type == e1000_82547)
1944			em_82547_update_fifo_head(adapter,
1945			    m_head->m_pkthdr.len);
1946	}
1947
1948	return (0);
1949}
1950
1951/*********************************************************************
1952 *
1953 *  This routine maps the mbufs to Advanced TX descriptors.
1954 *  used by the 82575 adapter. It also needs no workarounds.
1955 *
1956 **********************************************************************/
1957static int
1958em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1959{
1960	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1961	bus_dmamap_t		map;
1962	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1963	union e1000_adv_tx_desc	*txd = NULL;
1964	struct mbuf		*m_head;
1965	u32			olinfo_status = 0, cmd_type_len = 0;
1966	u32			do_tso, paylen = 0;
1967	int			nsegs, i, j, error, first, last = 0;
1968
1969	m_head = *m_headp;
1970
1971	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1972
1973	/* Set basic descriptor constants */
1974	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1975	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1976
1977        /*
1978         * Force a cleanup if number of TX descriptors
1979         * available hits the threshold
1980         */
1981	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1982		em_txeof(adapter);
1983		/* Now do we at least have a minimal? */
1984		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1985			adapter->no_tx_desc_avail1++;
1986			return (ENOBUFS);
1987		}
1988	}
1989
1990	/*
1991         * Map the packet for DMA.
1992	 *
1993	 * Capture the first descriptor index,
1994	 * this descriptor will have the index
1995	 * of the EOP which is the only one that
1996	 * now gets a DONE bit writeback.
1997	 */
1998	first = adapter->next_avail_tx_desc;
1999	tx_buffer = &adapter->tx_buffer_area[first];
2000	tx_buffer_mapped = tx_buffer;
2001	map = tx_buffer->map;
2002
2003	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2004	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2005
2006	if (error == EFBIG) {
2007		struct mbuf *m;
2008
2009		m = m_defrag(*m_headp, M_DONTWAIT);
2010		if (m == NULL) {
2011			adapter->mbuf_alloc_failed++;
2012			m_freem(*m_headp);
2013			*m_headp = NULL;
2014			return (ENOBUFS);
2015		}
2016		*m_headp = m;
2017
2018		/* Try it again */
2019		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2020		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2021
2022		if (error == ENOMEM) {
2023			adapter->no_tx_dma_setup++;
2024			return (error);
2025		} else if (error != 0) {
2026			adapter->no_tx_dma_setup++;
2027			m_freem(*m_headp);
2028			*m_headp = NULL;
2029			return (error);
2030		}
2031	} else if (error == ENOMEM) {
2032		adapter->no_tx_dma_setup++;
2033		return (error);
2034	} else if (error != 0) {
2035		adapter->no_tx_dma_setup++;
2036		m_freem(*m_headp);
2037		*m_headp = NULL;
2038		return (error);
2039	}
2040
2041	/* Check again to be sure we have enough descriptors */
2042        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2043                adapter->no_tx_desc_avail2++;
2044		bus_dmamap_unload(adapter->txtag, map);
2045		return (ENOBUFS);
2046        }
2047	m_head = *m_headp;
2048
2049        /*
2050         * Set up the context descriptor:
2051         * used when any hardware offload is done.
2052	 * This includes CSUM, VLAN, and TSO. It
2053	 * will use the first descriptor.
2054         */
2055	if (m_head->m_pkthdr.csum_flags) {
2056		/* All offloads set this */
2057		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2058		/* First try TSO */
2059		if ((do_tso) && em_tso_adv_setup(adapter, m_head, &paylen)) {
2060			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2061			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2062			olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2063		} else	/* Just checksum offload */
2064			em_tx_adv_ctx_setup(adapter, m_head);
2065	}
2066
2067	/* Set up our transmit descriptors */
2068	i = adapter->next_avail_tx_desc;
2069	for (j = 0; j < nsegs; j++) {
2070		bus_size_t seg_len;
2071		bus_addr_t seg_addr;
2072
2073		tx_buffer = &adapter->tx_buffer_area[i];
2074		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2075		seg_addr = segs[j].ds_addr;
2076		seg_len  = segs[j].ds_len;
2077
2078		txd->read.buffer_addr = htole64(seg_addr);
2079		txd->read.cmd_type_len = htole32(
2080		    adapter->txd_cmd | cmd_type_len | seg_len);
2081		txd->read.olinfo_status = htole32(olinfo_status);
2082		last = i;
2083		if (++i == adapter->num_tx_desc)
2084			i = 0;
2085		tx_buffer->m_head = NULL;
2086		tx_buffer->next_eop = -1;
2087	}
2088
2089	adapter->next_avail_tx_desc = i;
2090	adapter->num_tx_desc_avail -= nsegs;
2091
2092        tx_buffer->m_head = m_head;
2093	tx_buffer_mapped->map = tx_buffer->map;
2094	tx_buffer->map = map;
2095        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2096
2097        /*
2098         * Last Descriptor of Packet
2099	 * needs End Of Packet (EOP)
2100	 * and Report Status (RS)
2101         */
2102        txd->read.cmd_type_len |=
2103	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2104	/*
2105	 * Keep track in the first buffer which
2106	 * descriptor will be written back
2107	 */
2108	tx_buffer = &adapter->tx_buffer_area[first];
2109	tx_buffer->next_eop = last;
2110
2111	/*
2112	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2113	 * that this frame is available to transmit.
2114	 */
2115	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2116	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2117	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2118
2119	return (0);
2120
2121}
2122
2123/*********************************************************************
2124 *
2125 * 82547 workaround to avoid controller hang in half-duplex environment.
2126 * The workaround is to avoid queuing a large packet that would span
2127 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2128 * in this case. We do that only when FIFO is quiescent.
2129 *
2130 **********************************************************************/
2131static void
2132em_82547_move_tail(void *arg)
2133{
2134	struct adapter *adapter = arg;
2135	uint16_t hw_tdt;
2136	uint16_t sw_tdt;
2137	struct e1000_tx_desc *tx_desc;
2138	uint16_t length = 0;
2139	boolean_t eop = 0;
2140
2141	EM_LOCK_ASSERT(adapter);
2142
2143	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2144	sw_tdt = adapter->next_avail_tx_desc;
2145
2146	while (hw_tdt != sw_tdt) {
2147		tx_desc = &adapter->tx_desc_base[hw_tdt];
2148		length += tx_desc->lower.flags.length;
2149		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2150		if (++hw_tdt == adapter->num_tx_desc)
2151			hw_tdt = 0;
2152
2153		if (eop) {
2154			if (em_82547_fifo_workaround(adapter, length)) {
2155				adapter->tx_fifo_wrk_cnt++;
2156				callout_reset(&adapter->tx_fifo_timer, 1,
2157					em_82547_move_tail, adapter);
2158				break;
2159			}
2160			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2161			em_82547_update_fifo_head(adapter, length);
2162			length = 0;
2163		}
2164	}
2165}
2166
2167static int
2168em_82547_fifo_workaround(struct adapter *adapter, int len)
2169{
2170	int fifo_space, fifo_pkt_len;
2171
2172	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2173
2174	if (adapter->link_duplex == HALF_DUPLEX) {
2175		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2176
2177		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2178			if (em_82547_tx_fifo_reset(adapter))
2179				return (0);
2180			else
2181				return (1);
2182		}
2183	}
2184
2185	return (0);
2186}
2187
2188static void
2189em_82547_update_fifo_head(struct adapter *adapter, int len)
2190{
2191	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2192
2193	/* tx_fifo_head is always 16 byte aligned */
2194	adapter->tx_fifo_head += fifo_pkt_len;
2195	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2196		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2197	}
2198}
2199
2200
2201static int
2202em_82547_tx_fifo_reset(struct adapter *adapter)
2203{
2204	uint32_t tctl;
2205
2206	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2207	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2208	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2209	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2210	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2211	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2212	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2213		/* Disable TX unit */
2214		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2215		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2216		    tctl & ~E1000_TCTL_EN);
2217
2218		/* Reset FIFO pointers */
2219		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2220		    adapter->tx_head_addr);
2221		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2222		    adapter->tx_head_addr);
2223		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2224		    adapter->tx_head_addr);
2225		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2226		    adapter->tx_head_addr);
2227
2228		/* Re-enable TX unit */
2229		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2230		E1000_WRITE_FLUSH(&adapter->hw);
2231
2232		adapter->tx_fifo_head = 0;
2233		adapter->tx_fifo_reset_cnt++;
2234
2235		return (TRUE);
2236	}
2237	else {
2238		return (FALSE);
2239	}
2240}
2241
2242static void
2243em_set_promisc(struct adapter *adapter)
2244{
2245	struct ifnet	*ifp = adapter->ifp;
2246	uint32_t	reg_rctl;
2247
2248	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2249
2250	if (ifp->if_flags & IFF_PROMISC) {
2251		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2252		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2253	} else if (ifp->if_flags & IFF_ALLMULTI) {
2254		reg_rctl |= E1000_RCTL_MPE;
2255		reg_rctl &= ~E1000_RCTL_UPE;
2256		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2257	}
2258}
2259
2260static void
2261em_disable_promisc(struct adapter *adapter)
2262{
2263	uint32_t	reg_rctl;
2264
2265	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2266
2267	reg_rctl &=  (~E1000_RCTL_UPE);
2268	reg_rctl &=  (~E1000_RCTL_MPE);
2269	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2270}
2271
2272
2273/*********************************************************************
2274 *  Multicast Update
2275 *
2276 *  This routine is called whenever multicast address list is updated.
2277 *
2278 **********************************************************************/
2279
2280static void
2281em_set_multi(struct adapter *adapter)
2282{
2283	struct ifnet	*ifp = adapter->ifp;
2284	struct ifmultiaddr *ifma;
2285	uint32_t reg_rctl = 0;
2286	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2287	int mcnt = 0;
2288
2289	IOCTL_DEBUGOUT("em_set_multi: begin");
2290
2291	if (adapter->hw.mac.type == e1000_82542 &&
2292	    adapter->hw.revision_id == E1000_REVISION_2) {
2293		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2294		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2295			e1000_pci_clear_mwi(&adapter->hw);
2296		reg_rctl |= E1000_RCTL_RST;
2297		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2298		msec_delay(5);
2299	}
2300
2301	IF_ADDR_LOCK(ifp);
2302	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2303		if (ifma->ifma_addr->sa_family != AF_LINK)
2304			continue;
2305
2306		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2307			break;
2308
2309		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2310		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2311		mcnt++;
2312	}
2313	IF_ADDR_UNLOCK(ifp);
2314
2315	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2316		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2317		reg_rctl |= E1000_RCTL_MPE;
2318		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2319	} else
2320		e1000_mc_addr_list_update(&adapter->hw, mta,
2321		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2322
2323	if (adapter->hw.mac.type == e1000_82542 &&
2324	    adapter->hw.revision_id == E1000_REVISION_2) {
2325		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2326		reg_rctl &= ~E1000_RCTL_RST;
2327		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2328		msec_delay(5);
2329		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2330			e1000_pci_set_mwi(&adapter->hw);
2331	}
2332}
2333
2334
2335/*********************************************************************
2336 *  Timer routine
2337 *
2338 *  This routine checks for link status and updates statistics.
2339 *
2340 **********************************************************************/
2341
2342static void
2343em_local_timer(void *arg)
2344{
2345	struct adapter	*adapter = arg;
2346	struct ifnet	*ifp = adapter->ifp;
2347
2348	EM_LOCK_ASSERT(adapter);
2349
2350	e1000_check_for_link(&adapter->hw);
2351	em_update_link_status(adapter);
2352	em_update_stats_counters(adapter);
2353	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2354		em_print_hw_stats(adapter);
2355	em_smartspeed(adapter);
2356	/*
2357	 * Each second we check the watchdog to
2358	 * protect against hardware hangs.
2359	 */
2360	em_watchdog(adapter);
2361
2362	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2363
2364}
2365
2366static void
2367em_update_link_status(struct adapter *adapter)
2368{
2369	struct ifnet *ifp = adapter->ifp;
2370	device_t dev = adapter->dev;
2371
2372	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2373	    E1000_STATUS_LU) {
2374		if (adapter->link_active == 0) {
2375			e1000_get_speed_and_duplex(&adapter->hw,
2376			    &adapter->link_speed, &adapter->link_duplex);
2377			/* Check if we must disable SPEED_MODE bit on PCI-E */
2378			if ((adapter->link_speed != SPEED_1000) &&
2379			    ((adapter->hw.mac.type == e1000_82571) ||
2380			    (adapter->hw.mac.type == e1000_82572))) {
2381				int tarc0;
2382
2383				tarc0 = E1000_READ_REG(&adapter->hw,
2384				    E1000_TARC0);
2385				tarc0 &= ~SPEED_MODE_BIT;
2386				E1000_WRITE_REG(&adapter->hw,
2387				    E1000_TARC0, tarc0);
2388			}
2389			if (bootverbose)
2390				device_printf(dev, "Link is up %d Mbps %s\n",
2391				    adapter->link_speed,
2392				    ((adapter->link_duplex == FULL_DUPLEX) ?
2393				    "Full Duplex" : "Half Duplex"));
2394			adapter->link_active = 1;
2395			adapter->smartspeed = 0;
2396			ifp->if_baudrate = adapter->link_speed * 1000000;
2397			if_link_state_change(ifp, LINK_STATE_UP);
2398		}
2399	} else {
2400		if (adapter->link_active == 1) {
2401			ifp->if_baudrate = adapter->link_speed = 0;
2402			adapter->link_duplex = 0;
2403			if (bootverbose)
2404				device_printf(dev, "Link is Down\n");
2405			adapter->link_active = 0;
2406			if_link_state_change(ifp, LINK_STATE_DOWN);
2407		}
2408	}
2409}
2410
2411/*********************************************************************
2412 *
2413 *  This routine disables all traffic on the adapter by issuing a
2414 *  global reset on the MAC and deallocates TX/RX buffers.
2415 *
2416 **********************************************************************/
2417
2418static void
2419em_stop(void *arg)
2420{
2421	struct adapter	*adapter = arg;
2422	struct ifnet	*ifp = adapter->ifp;
2423
2424	EM_LOCK_ASSERT(adapter);
2425
2426	INIT_DEBUGOUT("em_stop: begin");
2427
2428	em_disable_intr(adapter);
2429	callout_stop(&adapter->timer);
2430	callout_stop(&adapter->tx_fifo_timer);
2431	em_free_transmit_structures(adapter);
2432	em_free_receive_structures(adapter);
2433
2434	/* Tell the stack that the interface is no longer active */
2435	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2436
2437	e1000_reset_hw(&adapter->hw);
2438	if (adapter->hw.mac.type >= e1000_82544)
2439		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2440}
2441
2442
2443/*********************************************************************
2444 *
2445 *  Determine hardware revision.
2446 *
2447 **********************************************************************/
2448static void
2449em_identify_hardware(struct adapter *adapter)
2450{
2451	device_t dev = adapter->dev;
2452
2453	/* Make sure our PCI config space has the necessary stuff set */
2454	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2455	if ((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
2456	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN)) {
2457		device_printf(dev, "Memory Access and/or Bus Master bits "
2458		    "were not set!\n");
2459		adapter->hw.bus.pci_cmd_word |=
2460		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2461		pci_write_config(dev, PCIR_COMMAND,
2462		    adapter->hw.bus.pci_cmd_word, 2);
2463	}
2464
2465	/* Save off the information about this board */
2466	adapter->hw.vendor_id = pci_get_vendor(dev);
2467	adapter->hw.device_id = pci_get_device(dev);
2468	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2469	adapter->hw.subsystem_vendor_id =
2470	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2471	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2472
2473	/* Do Shared Code Init and Setup */
2474	if (e1000_set_mac_type(&adapter->hw)) {
2475		device_printf(dev, "Setup init failure\n");
2476		return;
2477	}
2478}
2479
2480static int
2481em_allocate_pci_resources(struct adapter *adapter)
2482{
2483	device_t	dev = adapter->dev;
2484	int		val, rid;
2485
2486	rid = PCIR_BAR(0);
2487	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2488	    &rid, RF_ACTIVE);
2489	if (adapter->res_memory == NULL) {
2490		device_printf(dev, "Unable to allocate bus resource: memory\n");
2491		return (ENXIO);
2492	}
2493	adapter->osdep.mem_bus_space_tag =
2494	    rman_get_bustag(adapter->res_memory);
2495	adapter->osdep.mem_bus_space_handle =
2496	    rman_get_bushandle(adapter->res_memory);
2497	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2498
2499	/* Only older adapters use IO mapping */
2500	if (adapter->hw.mac.type <= e1000_82543) {
2501		/* Figure our where our IO BAR is ? */
2502		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2503			val = pci_read_config(dev, rid, 4);
2504			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2505				adapter->io_rid = rid;
2506				break;
2507			}
2508			rid += 4;
2509			/* check for 64bit BAR */
2510			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2511				rid += 4;
2512		}
2513		if (rid >= PCIR_CIS) {
2514			device_printf(dev, "Unable to locate IO BAR\n");
2515			return (ENXIO);
2516		}
2517		adapter->res_ioport = bus_alloc_resource_any(dev,
2518		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2519		if (adapter->res_ioport == NULL) {
2520			device_printf(dev, "Unable to allocate bus resource: "
2521			    "ioport\n");
2522			return (ENXIO);
2523		}
2524		adapter->hw.io_base = 0;
2525		adapter->osdep.io_bus_space_tag =
2526		    rman_get_bustag(adapter->res_ioport);
2527		adapter->osdep.io_bus_space_handle =
2528		    rman_get_bushandle(adapter->res_ioport);
2529	}
2530
2531	/*
2532	 * Setup MSI/X or MSI if PCI Express
2533	 * only the latest can use MSI/X and
2534	 * real support for it is forthcoming
2535	 */
2536	adapter->msi = 0; /* Set defaults */
2537	rid = 0x0;
2538	if (adapter->hw.mac.type >= e1000_82575) {
2539		/*
2540		 * Eventually this will be used
2541		 * for Multiqueue, for now we will
2542		 * just use one vector.
2543		 */
2544        	val = pci_msix_count(dev);
2545		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2546                	rid = 1;
2547                	adapter->msi = 1;
2548		}
2549	} else if (adapter->hw.bus.type == e1000_bus_type_pci_express) {
2550        	val = pci_msi_count(dev);
2551        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2552                	rid = 1;
2553                	adapter->msi = 1;
2554        	}
2555	}
2556	adapter->res_interrupt = bus_alloc_resource_any(dev,
2557	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2558	if (adapter->res_interrupt == NULL) {
2559		device_printf(dev, "Unable to allocate bus resource: "
2560		    "interrupt\n");
2561		return (ENXIO);
2562	}
2563
2564	adapter->hw.back = &adapter->osdep;
2565
2566	return (0);
2567}
2568
2569/*********************************************************************
2570 *
2571 *  Setup the appropriate Interrupt handlers.
2572 *
2573 **********************************************************************/
2574int
2575em_allocate_intr(struct adapter *adapter)
2576{
2577	device_t dev = adapter->dev;
2578	int error;
2579
2580	/* Manually turn off all interrupts */
2581	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2582
2583#ifdef DEVICE_POLLING
2584	/* We do Legacy setup */
2585	if (adapter->int_handler_tag == NULL &&
2586	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2587	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2588	    &adapter->int_handler_tag)) != 0) {
2589		device_printf(dev, "Failed to register interrupt handler");
2590		return (error);
2591	}
2592
2593#else
2594	/*
2595	 * Try allocating a fast interrupt and the associated deferred
2596	 * processing contexts.
2597	 */
2598	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2599	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2600	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2601	    taskqueue_thread_enqueue, &adapter->tq);
2602	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2603	    device_get_nameunit(adapter->dev));
2604	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2605	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2606	    &adapter->int_handler_tag)) != 0) {
2607		device_printf(dev, "Failed to register fast interrupt "
2608			    "handler: %d\n", error);
2609		taskqueue_free(adapter->tq);
2610		adapter->tq = NULL;
2611		return (error);
2612	}
2613#endif
2614
2615	em_enable_intr(adapter);
2616	return (0);
2617}
2618
2619static void
2620em_free_intr(struct adapter *adapter)
2621{
2622	device_t dev = adapter->dev;
2623
2624	if (adapter->res_interrupt != NULL) {
2625		bus_teardown_intr(dev, adapter->res_interrupt,
2626			adapter->int_handler_tag);
2627		adapter->int_handler_tag = NULL;
2628	}
2629	if (adapter->tq != NULL) {
2630		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2631		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2632		taskqueue_free(adapter->tq);
2633		adapter->tq = NULL;
2634	}
2635}
2636
2637static void
2638em_free_pci_resources(struct adapter *adapter)
2639{
2640	device_t dev = adapter->dev;
2641
2642	if (adapter->res_interrupt != NULL)
2643		bus_release_resource(dev, SYS_RES_IRQ,
2644		    0, adapter->res_interrupt);
2645
2646	if (adapter->msi)
2647		pci_release_msi(dev);
2648
2649	if (adapter->res_memory != NULL)
2650		bus_release_resource(dev, SYS_RES_MEMORY,
2651		    PCIR_BAR(0), adapter->res_memory);
2652
2653	if (adapter->flash_mem != NULL)
2654		bus_release_resource(dev, SYS_RES_MEMORY,
2655		    EM_FLASH, adapter->flash_mem);
2656
2657	if (adapter->res_ioport != NULL)
2658		bus_release_resource(dev, SYS_RES_IOPORT,
2659		    adapter->io_rid, adapter->res_ioport);
2660}
2661
2662/*********************************************************************
2663 *
2664 *  Initialize the hardware to a configuration
2665 *  as specified by the adapter structure.
2666 *
2667 **********************************************************************/
2668static int
2669em_hardware_init(struct adapter *adapter)
2670{
2671	device_t dev = adapter->dev;
2672	uint16_t rx_buffer_size;
2673
2674	INIT_DEBUGOUT("em_hardware_init: begin");
2675
2676	/* Issue a global reset */
2677	e1000_reset_hw(&adapter->hw);
2678
2679	/* When hardware is reset, fifo_head is also reset */
2680	adapter->tx_fifo_head = 0;
2681
2682	/* Set up smart power down as default off on newer adapters. */
2683	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2684	    adapter->hw.mac.type == e1000_82572)) {
2685		uint16_t phy_tmp = 0;
2686
2687		/* Speed up time to link by disabling smart power down. */
2688		e1000_read_phy_reg(&adapter->hw,
2689		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2690		phy_tmp &= ~IGP02E1000_PM_SPD;
2691		e1000_write_phy_reg(&adapter->hw,
2692		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2693	}
2694
2695	/*
2696	 * These parameters control the automatic generation (Tx) and
2697	 * response (Rx) to Ethernet PAUSE frames.
2698	 * - High water mark should allow for at least two frames to be
2699	 *   received after sending an XOFF.
2700	 * - Low water mark works best when it is very near the high water mark.
2701	 *   This allows the receiver to restart by sending XON when it has
2702	 *   drained a bit. Here we use an arbitary value of 1500 which will
2703	 *   restart after one full frame is pulled from the buffer. There
2704	 *   could be several smaller frames in the buffer and if so they will
2705	 *   not trigger the XON until their total number reduces the buffer
2706	 *   by 1500.
2707	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2708	 */
2709	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2710	    0xffff) << 10 );
2711
2712	adapter->hw.mac.fc_high_water = rx_buffer_size -
2713	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2714	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2715	if (adapter->hw.mac.type == e1000_80003es2lan)
2716		adapter->hw.mac.fc_pause_time = 0xFFFF;
2717	else
2718		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2719	adapter->hw.mac.fc_send_xon = TRUE;
2720	adapter->hw.mac.fc = e1000_fc_full;
2721
2722	if (e1000_init_hw(&adapter->hw) < 0) {
2723		device_printf(dev, "Hardware Initialization Failed\n");
2724		return (EIO);
2725	}
2726
2727	e1000_check_for_link(&adapter->hw);
2728
2729	return (0);
2730}
2731
2732/*********************************************************************
2733 *
2734 *  Setup networking device structure and register an interface.
2735 *
2736 **********************************************************************/
2737static void
2738em_setup_interface(device_t dev, struct adapter *adapter)
2739{
2740	struct ifnet   *ifp;
2741
2742	INIT_DEBUGOUT("em_setup_interface: begin");
2743
2744	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2745	if (ifp == NULL)
2746		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2747	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2748	ifp->if_mtu = ETHERMTU;
2749	ifp->if_init =  em_init;
2750	ifp->if_softc = adapter;
2751	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2752	ifp->if_ioctl = em_ioctl;
2753	ifp->if_start = em_start;
2754	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2755	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2756	IFQ_SET_READY(&ifp->if_snd);
2757
2758	ether_ifattach(ifp, adapter->hw.mac.addr);
2759
2760	ifp->if_capabilities = ifp->if_capenable = 0;
2761
2762	if (adapter->hw.mac.type >= e1000_82543) {
2763		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2764		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2765	}
2766
2767	/* Enable TSO if available */
2768	if ((adapter->hw.mac.type > e1000_82544) &&
2769	    (adapter->hw.mac.type != e1000_82547)) {
2770		ifp->if_capabilities |= IFCAP_TSO4;
2771		ifp->if_capenable |= IFCAP_TSO4;
2772	}
2773
2774	/*
2775	 * Tell the upper layer(s) we support long frames.
2776	 */
2777	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2778	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2779	ifp->if_capenable |= IFCAP_VLAN_MTU;
2780
2781#ifdef DEVICE_POLLING
2782	ifp->if_capabilities |= IFCAP_POLLING;
2783#endif
2784
2785	/*
2786	 * Specify the media types supported by this adapter and register
2787	 * callbacks to update media and link information
2788	 */
2789	ifmedia_init(&adapter->media, IFM_IMASK,
2790	    em_media_change, em_media_status);
2791	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2792	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2793		u_char fiber_type = IFM_1000_SX;	/* default type */
2794
2795		if (adapter->hw.mac.type == e1000_82545)
2796			fiber_type = IFM_1000_LX;
2797		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2798			    0, NULL);
2799		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2800	} else {
2801		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2802		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2803			    0, NULL);
2804		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2805			    0, NULL);
2806		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2807			    0, NULL);
2808		if (adapter->hw.phy.type != e1000_phy_ife) {
2809			ifmedia_add(&adapter->media,
2810				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2811			ifmedia_add(&adapter->media,
2812				IFM_ETHER | IFM_1000_T, 0, NULL);
2813		}
2814	}
2815	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2816	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2817}
2818
2819
2820/*********************************************************************
2821 *
2822 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2823 *
2824 **********************************************************************/
2825static void
2826em_smartspeed(struct adapter *adapter)
2827{
2828	uint16_t phy_tmp;
2829
2830	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2831	    adapter->hw.mac.autoneg == 0 ||
2832	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2833		return;
2834
2835	if (adapter->smartspeed == 0) {
2836		/* If Master/Slave config fault is asserted twice,
2837		 * we assume back-to-back */
2838		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2839		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2840			return;
2841		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2842		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2843			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2844			if(phy_tmp & CR_1000T_MS_ENABLE) {
2845				phy_tmp &= ~CR_1000T_MS_ENABLE;
2846				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2847				    phy_tmp);
2848				adapter->smartspeed++;
2849				if(adapter->hw.mac.autoneg &&
2850				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2851				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2852				    &phy_tmp)) {
2853					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2854						    MII_CR_RESTART_AUTO_NEG);
2855					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2856					    phy_tmp);
2857				}
2858			}
2859		}
2860		return;
2861	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2862		/* If still no link, perhaps using 2/3 pair cable */
2863		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2864		phy_tmp |= CR_1000T_MS_ENABLE;
2865		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2866		if(adapter->hw.mac.autoneg &&
2867		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2868		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2869			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2870				    MII_CR_RESTART_AUTO_NEG);
2871			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2872		}
2873	}
2874	/* Restart process after EM_SMARTSPEED_MAX iterations */
2875	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2876		adapter->smartspeed = 0;
2877}
2878
2879
2880/*
2881 * Manage DMA'able memory.
2882 */
2883static void
2884em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2885{
2886	if (error)
2887		return;
2888	*(bus_addr_t *) arg = segs[0].ds_addr;
2889}
2890
2891static int
2892em_dma_malloc(struct adapter *adapter, bus_size_t size,
2893        struct em_dma_alloc *dma, int mapflags)
2894{
2895	int error;
2896
2897	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2898				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2899				BUS_SPACE_MAXADDR,	/* lowaddr */
2900				BUS_SPACE_MAXADDR,	/* highaddr */
2901				NULL, NULL,		/* filter, filterarg */
2902				size,			/* maxsize */
2903				1,			/* nsegments */
2904				size,			/* maxsegsize */
2905				0,			/* flags */
2906				NULL,			/* lockfunc */
2907				NULL,			/* lockarg */
2908				&dma->dma_tag);
2909	if (error) {
2910		device_printf(adapter->dev,
2911		    "%s: bus_dma_tag_create failed: %d\n",
2912		    __func__, error);
2913		goto fail_0;
2914	}
2915
2916	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2917	    BUS_DMA_NOWAIT, &dma->dma_map);
2918	if (error) {
2919		device_printf(adapter->dev,
2920		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2921		    __func__, (uintmax_t)size, error);
2922		goto fail_2;
2923	}
2924
2925	dma->dma_paddr = 0;
2926	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2927	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2928	if (error || dma->dma_paddr == 0) {
2929		device_printf(adapter->dev,
2930		    "%s: bus_dmamap_load failed: %d\n",
2931		    __func__, error);
2932		goto fail_3;
2933	}
2934
2935	return (0);
2936
2937fail_3:
2938	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2939fail_2:
2940	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2941	bus_dma_tag_destroy(dma->dma_tag);
2942fail_0:
2943	dma->dma_map = NULL;
2944	dma->dma_tag = NULL;
2945
2946	return (error);
2947}
2948
2949static void
2950em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2951{
2952	if (dma->dma_tag == NULL)
2953		return;
2954	if (dma->dma_map != NULL) {
2955		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2956		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2957		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2958		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2959		dma->dma_map = NULL;
2960	}
2961	bus_dma_tag_destroy(dma->dma_tag);
2962	dma->dma_tag = NULL;
2963}
2964
2965
2966/*********************************************************************
2967 *
2968 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2969 *  the information needed to transmit a packet on the wire.
2970 *
2971 **********************************************************************/
2972static int
2973em_allocate_transmit_structures(struct adapter *adapter)
2974{
2975	device_t dev = adapter->dev;
2976
2977	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2978	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2979	if (adapter->tx_buffer_area == NULL) {
2980		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2981		return (ENOMEM);
2982	}
2983
2984	bzero(adapter->tx_buffer_area,
2985	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
2986
2987	return (0);
2988}
2989
2990/*********************************************************************
2991 *
2992 *  Initialize transmit structures.
2993 *
2994 **********************************************************************/
2995static int
2996em_setup_transmit_structures(struct adapter *adapter)
2997{
2998	device_t dev = adapter->dev;
2999	struct em_buffer *tx_buffer;
3000	int error, i;
3001
3002	/*
3003	 * Create DMA tags for tx descriptors
3004	 */
3005	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3006				1, 0,			/* alignment, bounds */
3007				BUS_SPACE_MAXADDR,	/* lowaddr */
3008				BUS_SPACE_MAXADDR,	/* highaddr */
3009				NULL, NULL,		/* filter, filterarg */
3010				EM_TSO_SIZE,		/* maxsize */
3011				EM_MAX_SCATTER,		/* nsegments */
3012				EM_TSO_SEG_SIZE,	/* maxsegsize */
3013				0,			/* flags */
3014				NULL,		/* lockfunc */
3015				NULL,		/* lockarg */
3016				&adapter->txtag)) != 0) {
3017		device_printf(dev, "Unable to allocate TX DMA tag\n");
3018		goto fail;
3019	}
3020
3021	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3022		goto fail;
3023
3024	/* Clear the old ring contents */
3025	bzero(adapter->tx_desc_base,
3026	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3027
3028	/* Create the descriptor buffer dma maps */
3029	tx_buffer = adapter->tx_buffer_area;
3030	for (i = 0; i < adapter->num_tx_desc; i++) {
3031		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3032		if (error != 0) {
3033			device_printf(dev, "Unable to create TX DMA map\n");
3034			goto fail;
3035		}
3036		tx_buffer->next_eop = -1;
3037		tx_buffer++;
3038	}
3039
3040	adapter->next_avail_tx_desc = 0;
3041	adapter->next_tx_to_clean = 0;
3042
3043	/* Set number of descriptors available */
3044	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3045
3046	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3047	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3048
3049	return (0);
3050
3051fail:
3052	em_free_transmit_structures(adapter);
3053	return (error);
3054}
3055
3056/*********************************************************************
3057 *
3058 *  Enable transmit unit.
3059 *
3060 **********************************************************************/
3061static void
3062em_initialize_transmit_unit(struct adapter *adapter)
3063{
3064	uint32_t	tctl, tarc, tipg = 0;
3065	uint64_t	bus_addr;
3066
3067	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3068	/* Setup the Base and Length of the Tx Descriptor Ring */
3069	bus_addr = adapter->txdma.dma_paddr;
3070	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3071	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3072	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3073	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3074
3075	/* Setup the HW Tx Head and Tail descriptor pointers */
3076	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3077	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3078
3079	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3080	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3081	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3082
3083	/* Set the default values for the Tx Inter Packet Gap timer */
3084	switch (adapter->hw.mac.type) {
3085	case e1000_82542:
3086		tipg = DEFAULT_82542_TIPG_IPGT;
3087		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3088		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3089		break;
3090	case e1000_80003es2lan:
3091		tipg = DEFAULT_82543_TIPG_IPGR1;
3092		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3093		    E1000_TIPG_IPGR2_SHIFT;
3094		break;
3095	default:
3096		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3097		    (adapter->hw.media_type ==
3098		    e1000_media_type_internal_serdes))
3099			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3100		else
3101			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3102		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3103		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3104	}
3105
3106	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3107	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3108	if(adapter->hw.mac.type >= e1000_82540)
3109		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3110		    adapter->tx_abs_int_delay.value);
3111
3112	if ((adapter->hw.mac.type == e1000_82571) ||
3113	    (adapter->hw.mac.type == e1000_82572)) {
3114		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3115		tarc |= SPEED_MODE_BIT;
3116		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3117	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3118		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3119		tarc |= 1;
3120		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3121		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3122		tarc |= 1;
3123		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3124	}
3125
3126	/* Program the Transmit Control Register */
3127	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3128	tctl &= ~E1000_TCTL_CT;
3129	tctl = E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3130		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
3131
3132	if (adapter->hw.mac.type >= e1000_82571)
3133		tctl |= E1000_TCTL_MULR;
3134
3135	/* This write will effectively turn on the transmit unit. */
3136	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3137
3138	/* Setup Transmit Descriptor Base Settings */
3139	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3140
3141	if ((adapter->tx_int_delay.value > 0) &&
3142	    (adapter->hw.mac.type != e1000_82575))
3143		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3144
3145        /* Set the function pointer for the transmit routine */
3146        if (adapter->hw.mac.type >= e1000_82575)
3147                adapter->em_xmit = em_adv_encap;
3148        else
3149                adapter->em_xmit = em_encap;
3150}
3151
3152/*********************************************************************
3153 *
3154 *  Free all transmit related data structures.
3155 *
3156 **********************************************************************/
3157static void
3158em_free_transmit_structures(struct adapter *adapter)
3159{
3160	struct em_buffer *tx_buffer;
3161	int i;
3162
3163	INIT_DEBUGOUT("free_transmit_structures: begin");
3164
3165	if (adapter->tx_buffer_area != NULL) {
3166		tx_buffer = adapter->tx_buffer_area;
3167		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3168			if (tx_buffer->m_head != NULL) {
3169				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3170				    BUS_DMASYNC_POSTWRITE);
3171				bus_dmamap_unload(adapter->txtag,
3172				    tx_buffer->map);
3173				m_freem(tx_buffer->m_head);
3174				tx_buffer->m_head = NULL;
3175			} else if (tx_buffer->map != NULL)
3176				bus_dmamap_unload(adapter->txtag,
3177				    tx_buffer->map);
3178			if (tx_buffer->map != NULL) {
3179				bus_dmamap_destroy(adapter->txtag,
3180				    tx_buffer->map);
3181				tx_buffer->map = NULL;
3182			}
3183		}
3184	}
3185	if (adapter->tx_buffer_area != NULL) {
3186		free(adapter->tx_buffer_area, M_DEVBUF);
3187		adapter->tx_buffer_area = NULL;
3188	}
3189	if (adapter->txtag != NULL) {
3190		bus_dma_tag_destroy(adapter->txtag);
3191		adapter->txtag = NULL;
3192	}
3193}
3194
3195/*********************************************************************
3196 *
3197 *  The offload context needs to be set when we transfer the first
3198 *  packet of a particular protocol (TCP/UDP). This routine has been
3199 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3200 *
3201 **********************************************************************/
3202static void
3203em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3204    uint32_t *txd_upper, uint32_t *txd_lower)
3205{
3206	struct e1000_context_desc *TXD;
3207	struct em_buffer *tx_buffer;
3208	struct ether_vlan_header *eh;
3209	struct ip *ip;
3210	struct ip6_hdr *ip6;
3211	struct tcp_hdr *th;
3212	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3213	uint32_t cmd = 0;
3214	uint16_t etype;
3215	uint8_t ipproto;
3216
3217	/* Setup checksum offload context. */
3218	curr_txd = adapter->next_avail_tx_desc;
3219	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3220	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3221
3222	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3223		     E1000_TXD_DTYP_D;		/* Data descr */
3224
3225	/*
3226	 * Determine where frame payload starts.
3227	 * Jump over vlan headers if already present,
3228	 * helpful for QinQ too.
3229	 */
3230	eh = mtod(mp, struct ether_vlan_header *);
3231	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3232		etype = ntohs(eh->evl_proto);
3233		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3234	} else {
3235		etype = ntohs(eh->evl_encap_proto);
3236		ehdrlen = ETHER_HDR_LEN;
3237	}
3238
3239	/*
3240	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3241	 * TODO: Support SCTP too when it hits the tree.
3242	 */
3243	switch (etype) {
3244	case ETHERTYPE_IP:
3245		ip = (struct ip *)(mp->m_data + ehdrlen);
3246		ip_hlen = ip->ip_hl << 2;
3247
3248		/* Setup of IP header checksum. */
3249		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3250			/*
3251			 * Start offset for header checksum calculation.
3252			 * End offset for header checksum calculation.
3253			 * Offset of place to put the checksum.
3254			 */
3255			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3256			TXD->lower_setup.ip_fields.ipcse =
3257			    htole16(ehdrlen + ip_hlen);
3258			TXD->lower_setup.ip_fields.ipcso =
3259			    ehdrlen + offsetof(struct ip, ip_sum);
3260			cmd |= E1000_TXD_CMD_IP;
3261			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3262		}
3263
3264		if (mp->m_len < ehdrlen + ip_hlen)
3265			return;	/* failure */
3266
3267		hdr_len = ehdrlen + ip_hlen;
3268		ipproto = ip->ip_p;
3269
3270		break;
3271	case ETHERTYPE_IPV6:
3272		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3273		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3274
3275		if (mp->m_len < ehdrlen + ip_hlen)
3276			return;	/* failure */
3277
3278		/* IPv6 doesn't have a header checksum. */
3279
3280		hdr_len = ehdrlen + ip_hlen;
3281		ipproto = ip6->ip6_nxt;
3282
3283		break;
3284	default:
3285		*txd_upper = 0;
3286		*txd_lower = 0;
3287		return;
3288	}
3289
3290	switch (ipproto) {
3291	case IPPROTO_TCP:
3292		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3293			/*
3294			 * Start offset for payload checksum calculation.
3295			 * End offset for payload checksum calculation.
3296			 * Offset of place to put the checksum.
3297			 */
3298			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3299			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3300			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3301			TXD->upper_setup.tcp_fields.tucso =
3302			    hdr_len + offsetof(struct tcphdr, th_sum);
3303			cmd |= E1000_TXD_CMD_TCP;
3304			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3305		}
3306		break;
3307	case IPPROTO_UDP:
3308		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3309			/*
3310			 * Start offset for header checksum calculation.
3311			 * End offset for header checksum calculation.
3312			 * Offset of place to put the checksum.
3313			 */
3314			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3315			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3316			TXD->upper_setup.tcp_fields.tucso =
3317			    hdr_len + offsetof(struct udphdr, uh_sum);
3318			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3319		}
3320		break;
3321	default:
3322		break;
3323	}
3324
3325	TXD->tcp_seg_setup.data = htole32(0);
3326	TXD->cmd_and_length =
3327	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3328	tx_buffer->m_head = NULL;
3329	tx_buffer->next_eop = -1;
3330
3331	if (++curr_txd == adapter->num_tx_desc)
3332		curr_txd = 0;
3333
3334	adapter->num_tx_desc_avail--;
3335	adapter->next_avail_tx_desc = curr_txd;
3336}
3337
3338/**********************************************************************
3339 *
3340 *  Setup work for hardware segmentation offload (TSO)
3341 *
3342 **********************************************************************/
3343static boolean_t
3344em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3345   uint32_t *txd_lower)
3346{
3347	struct e1000_context_desc *TXD;
3348	struct em_buffer *tx_buffer;
3349	struct ether_vlan_header *eh;
3350	struct ip *ip;
3351	struct ip6_hdr *ip6;
3352	struct tcphdr *th;
3353	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3354	uint16_t etype;
3355
3356	/*
3357	 * XXX: This is not really correct as the stack would not have
3358	 * set up all checksums.
3359	 * XXX: Return FALSE is not sufficient as we may have to return
3360	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3361	 * and 1 (success).
3362	 */
3363	if (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE)
3364		return FALSE;	/* 0 */
3365
3366	/*
3367	 * This function could/should be extended to support IP/IPv6
3368	 * fragmentation as well.  But as they say, one step at a time.
3369	 */
3370
3371	/*
3372	 * Determine where frame payload starts.
3373	 * Jump over vlan headers if already present,
3374	 * helpful for QinQ too.
3375	 */
3376	eh = mtod(mp, struct ether_vlan_header *);
3377	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3378		etype = ntohs(eh->evl_proto);
3379		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3380	} else {
3381		etype = ntohs(eh->evl_encap_proto);
3382		ehdrlen = ETHER_HDR_LEN;
3383	}
3384
3385	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3386	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3387		return FALSE;	/* -1 */
3388
3389	/*
3390	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3391	 * TODO: Support SCTP too when it hits the tree.
3392	 */
3393	switch (etype) {
3394	case ETHERTYPE_IP:
3395		isip6 = 0;
3396		ip = (struct ip *)(mp->m_data + ehdrlen);
3397		if (ip->ip_p != IPPROTO_TCP)
3398			return FALSE;	/* 0 */
3399		ip->ip_len = 0;
3400		ip->ip_sum = 0;
3401		ip_hlen = ip->ip_hl << 2;
3402		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3403			return FALSE;	/* -1 */
3404		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3405#if 1
3406		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3407		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3408#else
3409		th->th_sum = mp->m_pkthdr.csum_data;
3410#endif
3411		break;
3412	case ETHERTYPE_IPV6:
3413		isip6 = 1;
3414		return FALSE;			/* Not supported yet. */
3415		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3416		if (ip6->ip6_nxt != IPPROTO_TCP)
3417			return FALSE;	/* 0 */
3418		ip6->ip6_plen = 0;
3419		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3420		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3421			return FALSE;	/* -1 */
3422		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3423#if 0
3424		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3425		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3426#else
3427		th->th_sum = mp->m_pkthdr.csum_data;
3428#endif
3429		break;
3430	default:
3431		return FALSE;
3432	}
3433	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3434
3435	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3436		      E1000_TXD_DTYP_D |	/* Data descr type */
3437		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3438
3439	/* IP and/or TCP header checksum calculation and insertion. */
3440	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3441		      E1000_TXD_POPTS_TXSM) << 8;
3442
3443	curr_txd = adapter->next_avail_tx_desc;
3444	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3445	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3446
3447	/* IPv6 doesn't have a header checksum. */
3448	if (!isip6) {
3449		/*
3450		 * Start offset for header checksum calculation.
3451		 * End offset for header checksum calculation.
3452		 * Offset of place put the checksum.
3453		 */
3454		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3455		TXD->lower_setup.ip_fields.ipcse =
3456		    htole16(ehdrlen + ip_hlen - 1);
3457		TXD->lower_setup.ip_fields.ipcso =
3458		    ehdrlen + offsetof(struct ip, ip_sum);
3459	}
3460	/*
3461	 * Start offset for payload checksum calculation.
3462	 * End offset for payload checksum calculation.
3463	 * Offset of place to put the checksum.
3464	 */
3465	TXD->upper_setup.tcp_fields.tucss =
3466	    ehdrlen + ip_hlen;
3467	TXD->upper_setup.tcp_fields.tucse = 0;
3468	TXD->upper_setup.tcp_fields.tucso =
3469	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3470	/*
3471	 * Payload size per packet w/o any headers.
3472	 * Length of all headers up to payload.
3473	 */
3474	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3475	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3476
3477	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3478				E1000_TXD_CMD_DEXT |	/* Extended descr */
3479				E1000_TXD_CMD_TSE |	/* TSE context */
3480				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3481				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3482				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3483
3484	tx_buffer->m_head = NULL;
3485	tx_buffer->next_eop = -1;
3486
3487	if (++curr_txd == adapter->num_tx_desc)
3488		curr_txd = 0;
3489
3490	adapter->num_tx_desc_avail--;
3491	adapter->next_avail_tx_desc = curr_txd;
3492	adapter->tx_tso = TRUE;
3493
3494	return TRUE;
3495}
3496
3497
3498/**********************************************************************
3499 *
3500 *  Setup work for hardware segmentation offload (TSO) on
3501 *  adapters using advanced tx descriptors
3502 *
3503 **********************************************************************/
3504static boolean_t
3505em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3506{
3507	struct e1000_adv_tx_context_desc *TXD;
3508	struct em_buffer        *tx_buffer;
3509	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3510	u32 mss_l4len_idx = 0;
3511	u16 vtag = 0;
3512	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3513	struct ether_vlan_header *eh;
3514	struct ip *ip;
3515	struct tcphdr *th;
3516
3517	if (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE)
3518		return FALSE;
3519
3520	/*
3521	 * Determine where frame payload starts.
3522	 * Jump over vlan headers if already present
3523	 */
3524	eh = mtod(mp, struct ether_vlan_header *);
3525	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3526		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3527	else
3528		ehdrlen = ETHER_HDR_LEN;
3529
3530	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3531	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3532		return FALSE;
3533
3534	/* Only supports IPV4 for now */
3535	ctxd = adapter->next_avail_tx_desc;
3536	tx_buffer = &adapter->tx_buffer_area[ctxd];
3537	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3538
3539	ip = (struct ip *)(mp->m_data + ehdrlen);
3540	if (ip->ip_p != IPPROTO_TCP)
3541                return FALSE;   /* 0 */
3542	ip->ip_len = 0;
3543	ip->ip_sum = 0;
3544	ip_hlen = ip->ip_hl << 2;
3545	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3546	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3547	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3548	tcp_hlen = th->th_off << 2;
3549	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3550	/* Calculate payload, this is used in the transmit desc in encap */
3551	*paylen = mp->m_pkthdr.len - hdrlen;
3552
3553	/* VLAN MACLEN IPLEN */
3554	if (mp->m_flags & M_VLANTAG) {
3555		vtag = htole16(mp->m_pkthdr.ether_vtag);
3556		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3557	}
3558	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3559	vlan_macip_lens |= ip_hlen;
3560	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3561
3562	/* ADV DTYPE TUCMD */
3563	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3564	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3565	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3566	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3567
3568	/* MSS L4LEN IDX */
3569	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3570	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3571	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3572
3573	TXD->seqnum_seed = htole32(0);
3574	tx_buffer->m_head = NULL;
3575	tx_buffer->next_eop = -1;
3576
3577	if (++ctxd == adapter->num_tx_desc)
3578		ctxd = 0;
3579
3580	adapter->num_tx_desc_avail--;
3581	adapter->next_avail_tx_desc = ctxd;
3582	return TRUE;
3583}
3584
3585
3586/*********************************************************************
3587 *
3588 *  Advanced Context Descriptor setup for VLAN or CSUM
3589 *
3590 **********************************************************************/
3591
3592static void
3593em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3594{
3595	struct e1000_adv_tx_context_desc *TXD;
3596	struct em_buffer        *tx_buffer;
3597	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3598	struct ether_vlan_header *eh;
3599	struct ip *ip;
3600	struct ip6_hdr *ip6;
3601	int  ehdrlen, ip_hlen;
3602	u16	etype;
3603	u8	ipproto;
3604
3605	int ctxd = adapter->next_avail_tx_desc;
3606	u16 vtag = 0;
3607
3608	tx_buffer = &adapter->tx_buffer_area[ctxd];
3609	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3610
3611	/*
3612	** In advanced descriptors the vlan tag must
3613	** be placed into the descriptor itself.
3614	*/
3615	if (mp->m_flags & M_VLANTAG) {
3616		vtag = htole16(mp->m_pkthdr.ether_vtag);
3617		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3618	}
3619
3620	/*
3621	 * Determine where frame payload starts.
3622	 * Jump over vlan headers if already present,
3623	 * helpful for QinQ too.
3624	 */
3625	eh = mtod(mp, struct ether_vlan_header *);
3626	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3627		etype = ntohs(eh->evl_proto);
3628		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3629	} else {
3630		etype = ntohs(eh->evl_encap_proto);
3631		ehdrlen = ETHER_HDR_LEN;
3632	}
3633
3634	/* Set the ether header length */
3635	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3636
3637	switch (etype) {
3638		case ETHERTYPE_IP:
3639			ip = (struct ip *)(mp->m_data + ehdrlen);
3640			ip_hlen = ip->ip_hl << 2;
3641			if (mp->m_len < ehdrlen + ip_hlen)
3642				return; /* failure */
3643			ipproto = ip->ip_p;
3644			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3645			break;
3646		case ETHERTYPE_IPV6:
3647			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3648			ip_hlen = sizeof(struct ip6_hdr);
3649			if (mp->m_len < ehdrlen + ip_hlen)
3650				return; /* failure */
3651			ipproto = ip6->ip6_nxt;
3652			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3653			break;
3654		default:
3655			return;
3656	}
3657
3658	vlan_macip_lens |= ip_hlen;
3659	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3660
3661	switch (ipproto) {
3662		case IPPROTO_TCP:
3663			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3664				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3665			break;
3666		case IPPROTO_UDP:
3667			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3668				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3669			break;
3670	}
3671
3672	/* Now copy bits into descriptor */
3673	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3674	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3675	TXD->seqnum_seed = htole32(0);
3676	TXD->mss_l4len_idx = htole32(0);
3677
3678	tx_buffer->m_head = NULL;
3679	tx_buffer->next_eop = -1;
3680
3681	/* We've consumed the first desc, adjust counters */
3682	if (++ctxd == adapter->num_tx_desc)
3683		ctxd = 0;
3684	adapter->next_avail_tx_desc = ctxd;
3685	--adapter->num_tx_desc_avail;
3686
3687        return;
3688}
3689
3690
3691/**********************************************************************
3692 *
3693 *  Examine each tx_buffer in the used queue. If the hardware is done
3694 *  processing the packet then free associated resources. The
3695 *  tx_buffer is put back on the free queue.
3696 *
3697 **********************************************************************/
3698static void
3699em_txeof(struct adapter *adapter)
3700{
3701        int first, last, done, num_avail;
3702        struct em_buffer *tx_buffer;
3703        struct e1000_tx_desc   *tx_desc, *eop_desc;
3704	struct ifnet   *ifp = adapter->ifp;
3705
3706	EM_LOCK_ASSERT(adapter);
3707
3708        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3709                return;
3710
3711        num_avail = adapter->num_tx_desc_avail;
3712        first = adapter->next_tx_to_clean;
3713        tx_desc = &adapter->tx_desc_base[first];
3714        tx_buffer = &adapter->tx_buffer_area[first];
3715	last = tx_buffer->next_eop;
3716        eop_desc = &adapter->tx_desc_base[last];
3717
3718	/*
3719	 * What this does is get the index of the
3720	 * first descriptor AFTER the EOP of the
3721	 * first packet, that way we can do the
3722	 * simple comparison on the inner while loop.
3723	 */
3724	if (++last == adapter->num_tx_desc)
3725 		last = 0;
3726	done = last;
3727
3728        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3729            BUS_DMASYNC_POSTREAD);
3730
3731        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3732		/* We clean the range of the packet */
3733		while (first != done) {
3734                	tx_desc->upper.data = 0;
3735                	tx_desc->lower.data = 0;
3736                	tx_desc->buffer_addr = 0;
3737                	num_avail++;
3738
3739			if (tx_buffer->m_head) {
3740				ifp->if_opackets++;
3741				bus_dmamap_sync(adapter->txtag,
3742				    tx_buffer->map,
3743				    BUS_DMASYNC_POSTWRITE);
3744				bus_dmamap_unload(adapter->txtag,
3745				    tx_buffer->map);
3746
3747                        	m_freem(tx_buffer->m_head);
3748                        	tx_buffer->m_head = NULL;
3749                	}
3750			tx_buffer->next_eop = -1;
3751
3752	                if (++first == adapter->num_tx_desc)
3753				first = 0;
3754
3755	                tx_buffer = &adapter->tx_buffer_area[first];
3756			tx_desc = &adapter->tx_desc_base[first];
3757		}
3758		/* See if we can continue to the next packet */
3759		last = tx_buffer->next_eop;
3760		if (last != -1) {
3761        		eop_desc = &adapter->tx_desc_base[last];
3762			/* Get new done point */
3763			if (++last == adapter->num_tx_desc) last = 0;
3764			done = last;
3765		} else
3766			break;
3767        }
3768        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3769            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3770
3771        adapter->next_tx_to_clean = first;
3772
3773        /*
3774         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3775         * that it is OK to send packets.
3776         * If there are no pending descriptors, clear the timeout. Otherwise,
3777         * if some descriptors have been freed, restart the timeout.
3778         */
3779        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3780                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3781		/* All clean, turn off the timer */
3782                if (num_avail == adapter->num_tx_desc)
3783			adapter->watchdog_timer = 0;
3784		/* Some cleaned, reset the timer */
3785                else if (num_avail != adapter->num_tx_desc_avail)
3786			adapter->watchdog_timer = EM_TX_TIMEOUT;
3787        }
3788        adapter->num_tx_desc_avail = num_avail;
3789        return;
3790}
3791
3792/*********************************************************************
3793 *
3794 *  Get a buffer from system mbuf buffer pool.
3795 *
3796 **********************************************************************/
3797static int
3798em_get_buf(struct adapter *adapter, int i)
3799{
3800	struct mbuf		*m;
3801	bus_dma_segment_t	segs[1];
3802	bus_dmamap_t		map;
3803	struct em_buffer	*rx_buffer;
3804	int			error, nsegs;
3805
3806	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3807	if (m == NULL) {
3808		adapter->mbuf_cluster_failed++;
3809		return (ENOBUFS);
3810	}
3811	m->m_len = m->m_pkthdr.len = MCLBYTES;
3812
3813	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3814		m_adj(m, ETHER_ALIGN);
3815
3816	/*
3817	 * Using memory from the mbuf cluster pool, invoke the
3818	 * bus_dma machinery to arrange the memory mapping.
3819	 */
3820	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3821	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3822	if (error != 0) {
3823		m_free(m);
3824		return (error);
3825	}
3826
3827	/* If nsegs is wrong then the stack is corrupt. */
3828	KASSERT(nsegs == 1, ("Too many segments returned!"));
3829
3830	rx_buffer = &adapter->rx_buffer_area[i];
3831	if (rx_buffer->m_head != NULL)
3832		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3833
3834	map = rx_buffer->map;
3835	rx_buffer->map = adapter->rx_sparemap;
3836	adapter->rx_sparemap = map;
3837	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3838	rx_buffer->m_head = m;
3839
3840	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3841	return (0);
3842}
3843
3844/*********************************************************************
3845 *
3846 *  Allocate memory for rx_buffer structures. Since we use one
3847 *  rx_buffer per received packet, the maximum number of rx_buffer's
3848 *  that we'll need is equal to the number of receive descriptors
3849 *  that we've allocated.
3850 *
3851 **********************************************************************/
3852static int
3853em_allocate_receive_structures(struct adapter *adapter)
3854{
3855	device_t dev = adapter->dev;
3856	struct em_buffer *rx_buffer;
3857	int i, error;
3858
3859	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3860	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3861	if (adapter->rx_buffer_area == NULL) {
3862		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3863		return (ENOMEM);
3864	}
3865
3866	bzero(adapter->rx_buffer_area,
3867	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3868
3869	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3870				1, 0,			/* alignment, bounds */
3871				BUS_SPACE_MAXADDR,	/* lowaddr */
3872				BUS_SPACE_MAXADDR,	/* highaddr */
3873				NULL, NULL,		/* filter, filterarg */
3874				MCLBYTES,		/* maxsize */
3875				1,			/* nsegments */
3876				MCLBYTES,		/* maxsegsize */
3877				0,			/* flags */
3878				NULL,			/* lockfunc */
3879				NULL,			/* lockarg */
3880				&adapter->rxtag);
3881	if (error) {
3882		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3883		    __func__, error);
3884		goto fail;
3885	}
3886
3887	/* Create the spare map (used by getbuf) */
3888	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3889	     &adapter->rx_sparemap);
3890	if (error) {
3891		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3892		    __func__, error);
3893		goto fail;
3894	}
3895
3896	rx_buffer = adapter->rx_buffer_area;
3897	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3898		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3899		    &rx_buffer->map);
3900		if (error) {
3901			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3902			    __func__, error);
3903			goto fail;
3904		}
3905	}
3906
3907	/* Setup the initial buffers */
3908	for (i = 0; i < adapter->num_rx_desc; i++) {
3909		error = em_get_buf(adapter, i);
3910		if (error)
3911			goto fail;
3912	}
3913	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3914	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3915
3916	return (0);
3917
3918fail:
3919	em_free_receive_structures(adapter);
3920	return (error);
3921}
3922
3923/*********************************************************************
3924 *
3925 *  Allocate and initialize receive structures.
3926 *
3927 **********************************************************************/
3928static int
3929em_setup_receive_structures(struct adapter *adapter)
3930{
3931	int error;
3932
3933	bzero(adapter->rx_desc_base,
3934	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3935
3936	if ((error = em_allocate_receive_structures(adapter)) !=0)
3937		return (error);
3938
3939	/* Setup our descriptor pointers */
3940	adapter->next_rx_desc_to_check = 0;
3941
3942	return (0);
3943}
3944
3945/*********************************************************************
3946 *
3947 *  Enable receive unit.
3948 *
3949 **********************************************************************/
3950static void
3951em_initialize_receive_unit(struct adapter *adapter)
3952{
3953	struct ifnet	*ifp = adapter->ifp;
3954	uint64_t	bus_addr;
3955	uint32_t	reg_rctl;
3956	uint32_t	reg_rxcsum;
3957
3958	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3959
3960	/*
3961	 * Make sure receives are disabled while setting
3962	 * up the descriptor ring
3963	 */
3964	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3965	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
3966
3967	if(adapter->hw.mac.type >= e1000_82540) {
3968		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3969		    adapter->rx_abs_int_delay.value);
3970		/*
3971		 * Set the interrupt throttling rate. Value is calculated
3972		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3973		 */
3974#define MAX_INTS_PER_SEC	8000
3975#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3976		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
3977	}
3978
3979	/* Setup the Base and Length of the Rx Descriptor Ring */
3980	bus_addr = adapter->rxdma.dma_paddr;
3981	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
3982			sizeof(struct e1000_rx_desc));
3983	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
3984	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
3985
3986	/* Setup the Receive Control Register */
3987	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3988	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3989		   E1000_RCTL_RDMTS_HALF |
3990		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3991
3992	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
3993		reg_rctl |= E1000_RCTL_SBP;
3994	else
3995		reg_rctl &= ~E1000_RCTL_SBP;
3996
3997	switch (adapter->rx_buffer_len) {
3998	default:
3999	case 2048:
4000		reg_rctl |= E1000_RCTL_SZ_2048;
4001		break;
4002	case 4096:
4003		reg_rctl |= E1000_RCTL_SZ_4096 |
4004		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4005		break;
4006	case 8192:
4007		reg_rctl |= E1000_RCTL_SZ_8192 |
4008		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4009		break;
4010	case 16384:
4011		reg_rctl |= E1000_RCTL_SZ_16384 |
4012		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4013		break;
4014	}
4015
4016	if (ifp->if_mtu > ETHERMTU)
4017		reg_rctl |= E1000_RCTL_LPE;
4018	else
4019		reg_rctl &= ~E1000_RCTL_LPE;
4020
4021	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4022	if ((adapter->hw.mac.type >= e1000_82543) &&
4023	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4024		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4025		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4026		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4027	}
4028
4029	/*
4030	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4031	** long latencies are observed, like Lenovo X60. This
4032	** change eliminates the problem, but since having positive
4033	** values in RDTR is a known source of problems on other
4034	** platforms another solution is being sought.
4035	*/
4036	if (adapter->hw.mac.type == e1000_82573)
4037		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4038
4039	/* Enable Receives */
4040	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4041
4042	/*
4043	 * Setup the HW Rx Head and
4044	 * Tail Descriptor Pointers
4045	 */
4046	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4047	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4048
4049	return;
4050}
4051
4052/*********************************************************************
4053 *
4054 *  Free receive related data structures.
4055 *
4056 **********************************************************************/
4057static void
4058em_free_receive_structures(struct adapter *adapter)
4059{
4060	struct em_buffer *rx_buffer;
4061	int i;
4062
4063	INIT_DEBUGOUT("free_receive_structures: begin");
4064
4065	if (adapter->rx_sparemap) {
4066		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4067		adapter->rx_sparemap = NULL;
4068	}
4069
4070	/* Cleanup any existing buffers */
4071	if (adapter->rx_buffer_area != NULL) {
4072		rx_buffer = adapter->rx_buffer_area;
4073		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4074			if (rx_buffer->m_head != NULL) {
4075				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4076				    BUS_DMASYNC_POSTREAD);
4077				bus_dmamap_unload(adapter->rxtag,
4078				    rx_buffer->map);
4079				m_freem(rx_buffer->m_head);
4080				rx_buffer->m_head = NULL;
4081			} else if (rx_buffer->map != NULL)
4082				bus_dmamap_unload(adapter->rxtag,
4083				    rx_buffer->map);
4084			if (rx_buffer->map != NULL) {
4085				bus_dmamap_destroy(adapter->rxtag,
4086				    rx_buffer->map);
4087				rx_buffer->map = NULL;
4088			}
4089		}
4090	}
4091
4092	if (adapter->rx_buffer_area != NULL) {
4093		free(adapter->rx_buffer_area, M_DEVBUF);
4094		adapter->rx_buffer_area = NULL;
4095	}
4096
4097	if (adapter->rxtag != NULL) {
4098		bus_dma_tag_destroy(adapter->rxtag);
4099		adapter->rxtag = NULL;
4100	}
4101}
4102
4103/*********************************************************************
4104 *
4105 *  This routine executes in interrupt context. It replenishes
4106 *  the mbufs in the descriptor and sends data which has been
4107 *  dma'ed into host memory to upper layer.
4108 *
4109 *  We loop at most count times if count is > 0, or until done if
4110 *  count < 0.
4111 *
4112 *********************************************************************/
4113static int
4114em_rxeof(struct adapter *adapter, int count)
4115{
4116	struct ifnet	*ifp;
4117	struct mbuf	*mp;
4118	uint8_t		accept_frame = 0;
4119	uint8_t		eop = 0;
4120	uint16_t 	len, desc_len, prev_len_adj;
4121	int		i;
4122
4123	/* Pointer to the receive descriptor being examined. */
4124	struct e1000_rx_desc   *current_desc;
4125	uint8_t		status;
4126
4127	ifp = adapter->ifp;
4128	i = adapter->next_rx_desc_to_check;
4129	current_desc = &adapter->rx_desc_base[i];
4130	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4131	    BUS_DMASYNC_POSTREAD);
4132
4133	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4134		return (0);
4135
4136	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4137	    (count != 0) &&
4138	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4139		struct mbuf *m = NULL;
4140
4141		mp = adapter->rx_buffer_area[i].m_head;
4142		/*
4143		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4144		 * needs to access the last received byte in the mbuf.
4145		 */
4146		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4147		    BUS_DMASYNC_POSTREAD);
4148
4149		accept_frame = 1;
4150		prev_len_adj = 0;
4151		desc_len = le16toh(current_desc->length);
4152		status = current_desc->status;
4153		if (status & E1000_RXD_STAT_EOP) {
4154			count--;
4155			eop = 1;
4156			if (desc_len < ETHER_CRC_LEN) {
4157				len = 0;
4158				prev_len_adj = ETHER_CRC_LEN - desc_len;
4159			} else
4160				len = desc_len - ETHER_CRC_LEN;
4161		} else {
4162			eop = 0;
4163			len = desc_len;
4164		}
4165
4166		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4167			uint8_t		last_byte;
4168			uint32_t	pkt_len = desc_len;
4169
4170			if (adapter->fmp != NULL)
4171				pkt_len += adapter->fmp->m_pkthdr.len;
4172
4173			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4174			if (TBI_ACCEPT(&adapter->hw, status,
4175			    current_desc->errors, pkt_len, last_byte)) {
4176				e1000_tbi_adjust_stats_82543(&adapter->hw,
4177				    &adapter->stats, pkt_len,
4178				    adapter->hw.mac.addr);
4179				if (len > 0)
4180					len--;
4181			} else
4182				accept_frame = 0;
4183		}
4184
4185		if (accept_frame) {
4186			if (em_get_buf(adapter, i) != 0) {
4187				ifp->if_iqdrops++;
4188				goto discard;
4189			}
4190
4191			/* Assign correct length to the current fragment */
4192			mp->m_len = len;
4193
4194			if (adapter->fmp == NULL) {
4195				mp->m_pkthdr.len = len;
4196				adapter->fmp = mp; /* Store the first mbuf */
4197				adapter->lmp = mp;
4198			} else {
4199				/* Chain mbuf's together */
4200				mp->m_flags &= ~M_PKTHDR;
4201				/*
4202				 * Adjust length of previous mbuf in chain if
4203				 * we received less than 4 bytes in the last
4204				 * descriptor.
4205				 */
4206				if (prev_len_adj > 0) {
4207					adapter->lmp->m_len -= prev_len_adj;
4208					adapter->fmp->m_pkthdr.len -=
4209					    prev_len_adj;
4210				}
4211				adapter->lmp->m_next = mp;
4212				adapter->lmp = adapter->lmp->m_next;
4213				adapter->fmp->m_pkthdr.len += len;
4214			}
4215
4216			if (eop) {
4217				adapter->fmp->m_pkthdr.rcvif = ifp;
4218				ifp->if_ipackets++;
4219				em_receive_checksum(adapter, current_desc,
4220				    adapter->fmp);
4221#ifndef __NO_STRICT_ALIGNMENT
4222				if (adapter->hw.mac.max_frame_size >
4223				    (MCLBYTES - ETHER_ALIGN) &&
4224				    em_fixup_rx(adapter) != 0)
4225					goto skip;
4226#endif
4227				if (status & E1000_RXD_STAT_VP) {
4228					adapter->fmp->m_pkthdr.ether_vtag =
4229					    (le16toh(current_desc->special) &
4230					    E1000_RXD_SPC_VLAN_MASK);
4231					adapter->fmp->m_flags |= M_VLANTAG;
4232				}
4233#ifndef __NO_STRICT_ALIGNMENT
4234skip:
4235#endif
4236				m = adapter->fmp;
4237				adapter->fmp = NULL;
4238				adapter->lmp = NULL;
4239			}
4240		} else {
4241			ifp->if_ierrors++;
4242discard:
4243			/* Reuse loaded DMA map and just update mbuf chain */
4244			mp = adapter->rx_buffer_area[i].m_head;
4245			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4246			mp->m_data = mp->m_ext.ext_buf;
4247			mp->m_next = NULL;
4248			if (adapter->hw.mac.max_frame_size <=
4249			    (MCLBYTES - ETHER_ALIGN))
4250				m_adj(mp, ETHER_ALIGN);
4251			if (adapter->fmp != NULL) {
4252				m_freem(adapter->fmp);
4253				adapter->fmp = NULL;
4254				adapter->lmp = NULL;
4255			}
4256			m = NULL;
4257		}
4258
4259		/* Zero out the receive descriptors status. */
4260		current_desc->status = 0;
4261		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4262		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4263
4264		/* Advance our pointers to the next descriptor. */
4265		if (++i == adapter->num_rx_desc)
4266			i = 0;
4267		if (m != NULL) {
4268			adapter->next_rx_desc_to_check = i;
4269#ifdef DEVICE_POLLING
4270			EM_UNLOCK(adapter);
4271			(*ifp->if_input)(ifp, m);
4272			EM_LOCK(adapter);
4273#else
4274			/* Already running unlocked */
4275			(*ifp->if_input)(ifp, m);
4276#endif
4277			i = adapter->next_rx_desc_to_check;
4278		}
4279		current_desc = &adapter->rx_desc_base[i];
4280	}
4281	adapter->next_rx_desc_to_check = i;
4282
4283	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4284	if (--i < 0)
4285		i = adapter->num_rx_desc - 1;
4286	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4287	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4288		return (0);
4289
4290	return (1);
4291}
4292
4293#ifndef __NO_STRICT_ALIGNMENT
4294/*
4295 * When jumbo frames are enabled we should realign entire payload on
4296 * architecures with strict alignment. This is serious design mistake of 8254x
4297 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4298 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4299 * payload. On architecures without strict alignment restrictions 8254x still
4300 * performs unaligned memory access which would reduce the performance too.
4301 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4302 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4303 * existing mbuf chain.
4304 *
4305 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4306 * not used at all on architectures with strict alignment.
4307 */
4308static int
4309em_fixup_rx(struct adapter *adapter)
4310{
4311	struct mbuf *m, *n;
4312	int error;
4313
4314	error = 0;
4315	m = adapter->fmp;
4316	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4317		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4318		m->m_data += ETHER_HDR_LEN;
4319	} else {
4320		MGETHDR(n, M_DONTWAIT, MT_DATA);
4321		if (n != NULL) {
4322			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4323			m->m_data += ETHER_HDR_LEN;
4324			m->m_len -= ETHER_HDR_LEN;
4325			n->m_len = ETHER_HDR_LEN;
4326			M_MOVE_PKTHDR(n, m);
4327			n->m_next = m;
4328			adapter->fmp = n;
4329		} else {
4330			adapter->dropped_pkts++;
4331			m_freem(adapter->fmp);
4332			adapter->fmp = NULL;
4333			error = ENOMEM;
4334		}
4335	}
4336
4337	return (error);
4338}
4339#endif
4340
4341/*********************************************************************
4342 *
4343 *  Verify that the hardware indicated that the checksum is valid.
4344 *  Inform the stack about the status of checksum so that stack
4345 *  doesn't spend time verifying the checksum.
4346 *
4347 *********************************************************************/
4348static void
4349em_receive_checksum(struct adapter *adapter,
4350	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4351{
4352	/* 82543 or newer only */
4353	if ((adapter->hw.mac.type < e1000_82543) ||
4354	    /* Ignore Checksum bit is set */
4355	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4356		mp->m_pkthdr.csum_flags = 0;
4357		return;
4358	}
4359
4360	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4361		/* Did it pass? */
4362		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4363			/* IP Checksum Good */
4364			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4365			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4366
4367		} else {
4368			mp->m_pkthdr.csum_flags = 0;
4369		}
4370	}
4371
4372	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4373		/* Did it pass? */
4374		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4375			mp->m_pkthdr.csum_flags |=
4376			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4377			mp->m_pkthdr.csum_data = htons(0xffff);
4378		}
4379	}
4380}
4381
4382
4383static void
4384em_enable_vlans(struct adapter *adapter)
4385{
4386	uint32_t ctrl;
4387
4388	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4389
4390	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4391	ctrl |= E1000_CTRL_VME;
4392	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4393}
4394
4395static void
4396em_enable_intr(struct adapter *adapter)
4397{
4398	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4399	    (IMS_ENABLE_MASK));
4400}
4401
4402static void
4403em_disable_intr(struct adapter *adapter)
4404{
4405	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4406}
4407
4408/*
4409 * Bit of a misnomer, what this really means is
4410 * to enable OS management of the system... aka
4411 * to disable special hardware management features
4412 */
4413static void
4414em_init_manageability(struct adapter *adapter)
4415{
4416	/* A shared code workaround */
4417#define E1000_82542_MANC2H E1000_MANC2H
4418	if (adapter->has_manage) {
4419		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4420		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4421
4422		/* disable hardware interception of ARP */
4423		manc &= ~(E1000_MANC_ARP_EN);
4424
4425                /* enable receiving management packets to the host */
4426                if (adapter->hw.mac.type >= e1000_82571) {
4427			manc |= E1000_MANC_EN_MNG2HOST;
4428#define E1000_MNG2HOST_PORT_623 (1 << 5)
4429#define E1000_MNG2HOST_PORT_664 (1 << 6)
4430			manc2h |= E1000_MNG2HOST_PORT_623;
4431			manc2h |= E1000_MNG2HOST_PORT_664;
4432			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4433		}
4434
4435		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4436	}
4437}
4438
4439/*
4440 * Give control back to hardware management
4441 * controller if there is one.
4442 */
4443static void
4444em_release_manageability(struct adapter *adapter)
4445{
4446	if (adapter->has_manage) {
4447		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4448
4449		/* re-enable hardware interception of ARP */
4450		manc |= E1000_MANC_ARP_EN;
4451
4452		if (adapter->hw.mac.type >= e1000_82571)
4453			manc &= ~E1000_MANC_EN_MNG2HOST;
4454
4455		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4456	}
4457}
4458
4459/*
4460 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4461 * For ASF and Pass Through versions of f/w this means that
4462 * the driver is loaded. For AMT version (only with 82573)
4463 * of the f/w this means that the network i/f is open.
4464 *
4465 */
4466static void
4467em_get_hw_control(struct adapter *adapter)
4468{
4469	u32 ctrl_ext, swsm;
4470
4471	/* Let firmware know the driver has taken over */
4472	switch (adapter->hw.mac.type) {
4473	case e1000_82573:
4474		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4475		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4476		    swsm | E1000_SWSM_DRV_LOAD);
4477		break;
4478	case e1000_82571:
4479	case e1000_82572:
4480	case e1000_80003es2lan:
4481	case e1000_ich8lan:
4482	case e1000_ich9lan:
4483		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4484		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4485		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4486		break;
4487	default:
4488		break;
4489	}
4490}
4491
4492/*
4493 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4494 * For ASF and Pass Through versions of f/w this means that the
4495 * driver is no longer loaded. For AMT version (only with 82573) i
4496 * of the f/w this means that the network i/f is closed.
4497 *
4498 */
4499static void
4500em_release_hw_control(struct adapter *adapter)
4501{
4502	u32 ctrl_ext, swsm;
4503
4504	/* Let firmware taken over control of h/w */
4505	switch (adapter->hw.mac.type) {
4506	case e1000_82573:
4507		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4508		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4509		    swsm & ~E1000_SWSM_DRV_LOAD);
4510		break;
4511	case e1000_82571:
4512	case e1000_82572:
4513	case e1000_80003es2lan:
4514	case e1000_ich8lan:
4515	case e1000_ich9lan:
4516		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4517		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4518		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4519		break;
4520	default:
4521		break;
4522
4523	}
4524}
4525
4526static int
4527em_is_valid_ether_addr(uint8_t *addr)
4528{
4529	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4530
4531	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4532		return (FALSE);
4533	}
4534
4535	return (TRUE);
4536}
4537
4538/*
4539 * NOTE: the following routines using the e1000
4540 * 	naming style are provided to the shared
4541 *	code which expects that rather than 'em'
4542 */
4543
4544void
4545e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4546{
4547	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4548}
4549
4550void
4551e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4552{
4553	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4554}
4555
4556void
4557e1000_pci_set_mwi(struct e1000_hw *hw)
4558{
4559	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4560	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4561}
4562
4563void
4564e1000_pci_clear_mwi(struct e1000_hw *hw)
4565{
4566	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4567	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4568}
4569
4570/*
4571 * Read the PCI Express capabilities
4572 */
4573int32_t
4574e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4575{
4576	int32_t		error = E1000_SUCCESS;
4577	uint16_t	cap_off;
4578
4579	switch (hw->mac.type) {
4580
4581		case e1000_82571:
4582		case e1000_82572:
4583		case e1000_82573:
4584		case e1000_80003es2lan:
4585			cap_off = 0xE0;
4586			e1000_read_pci_cfg(hw, cap_off + reg, value);
4587			break;
4588		default:
4589			error = ~E1000_NOT_IMPLEMENTED;
4590			break;
4591	}
4592
4593	return (error);
4594}
4595
4596int32_t
4597e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4598{
4599	int32_t error = 0;
4600
4601	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4602	if (hw->dev_spec == NULL)
4603		error = ENOMEM;
4604	bzero(hw->dev_spec, size);
4605
4606	return (error);
4607}
4608
4609void
4610e1000_free_dev_spec_struct(struct e1000_hw *hw)
4611{
4612	if (hw->dev_spec != NULL)
4613		free(hw->dev_spec, M_DEVBUF);
4614	return;
4615}
4616
4617/*
4618 * Enable PCI Wake On Lan capability
4619 */
4620void
4621em_enable_wakeup(device_t dev)
4622{
4623	u16     cap, status;
4624	u8      id;
4625
4626	/* First find the capabilities pointer*/
4627	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4628	/* Read the PM Capabilities */
4629	id = pci_read_config(dev, cap, 1);
4630	if (id != PCIY_PMG)     /* Something wrong */
4631		return;
4632	/* OK, we have the power capabilities, so
4633	   now get the status register */
4634	cap += PCIR_POWER_STATUS;
4635	status = pci_read_config(dev, cap, 2);
4636	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4637	pci_write_config(dev, cap, status, 2);
4638	return;
4639}
4640
4641
4642/*********************************************************************
4643* 82544 Coexistence issue workaround.
4644*    There are 2 issues.
4645*       1. Transmit Hang issue.
4646*    To detect this issue, following equation can be used...
4647*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4648*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4649*
4650*       2. DAC issue.
4651*    To detect this issue, following equation can be used...
4652*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4653*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4654*
4655*
4656*    WORKAROUND:
4657*	  Make sure we do not have ending address
4658*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4659*
4660*************************************************************************/
4661static uint32_t
4662em_fill_descriptors (bus_addr_t address, uint32_t length,
4663		PDESC_ARRAY desc_array)
4664{
4665	/* Since issue is sensitive to length and address.*/
4666	/* Let us first check the address...*/
4667	uint32_t safe_terminator;
4668	if (length <= 4) {
4669		desc_array->descriptor[0].address = address;
4670		desc_array->descriptor[0].length = length;
4671		desc_array->elements = 1;
4672		return (desc_array->elements);
4673	}
4674	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4675	    (length & 0xF)) & 0xF);
4676	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4677	if (safe_terminator == 0   ||
4678	(safe_terminator > 4   &&
4679	safe_terminator < 9)   ||
4680	(safe_terminator > 0xC &&
4681	safe_terminator <= 0xF)) {
4682		desc_array->descriptor[0].address = address;
4683		desc_array->descriptor[0].length = length;
4684		desc_array->elements = 1;
4685		return (desc_array->elements);
4686	}
4687
4688	desc_array->descriptor[0].address = address;
4689	desc_array->descriptor[0].length = length - 4;
4690	desc_array->descriptor[1].address = address + (length - 4);
4691	desc_array->descriptor[1].length = 4;
4692	desc_array->elements = 2;
4693	return (desc_array->elements);
4694}
4695
4696/**********************************************************************
4697 *
4698 *  Update the board statistics counters.
4699 *
4700 **********************************************************************/
4701static void
4702em_update_stats_counters(struct adapter *adapter)
4703{
4704	struct ifnet   *ifp;
4705
4706	if(adapter->hw.media_type == e1000_media_type_copper ||
4707	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4708		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4709		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4710	}
4711	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4712	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4713	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4714	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4715
4716	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4717	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4718	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4719	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4720	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4721	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4722	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4723	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4724	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4725	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4726	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4727	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4728	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4729	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4730	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4731	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4732	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4733	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4734	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4735	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4736
4737	/* For the 64-bit byte counters the low dword must be read first. */
4738	/* Both registers clear on the read of the high dword */
4739
4740	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4741	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4742	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4743	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4744
4745	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4746	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4747	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4748	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4749	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4750
4751	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4752	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4753	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4754	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4755
4756	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4757	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4758	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4759	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4760	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4761	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4762	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4763	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4764	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4765	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4766
4767	if (adapter->hw.mac.type >= e1000_82543) {
4768		adapter->stats.algnerrc +=
4769		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4770		adapter->stats.rxerrc +=
4771		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4772		adapter->stats.tncrs +=
4773		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4774		adapter->stats.cexterr +=
4775		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4776		adapter->stats.tsctc +=
4777		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4778		adapter->stats.tsctfc +=
4779		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4780	}
4781	ifp = adapter->ifp;
4782
4783	ifp->if_collisions = adapter->stats.colc;
4784
4785	/* Rx Errors */
4786	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4787	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4788	    adapter->stats.ruc + adapter->stats.roc +
4789	    adapter->stats.mpc + adapter->stats.cexterr;
4790
4791	/* Tx Errors */
4792	ifp->if_oerrors = adapter->stats.ecol +
4793	    adapter->stats.latecol + adapter->watchdog_events;
4794}
4795
4796
4797/**********************************************************************
4798 *
4799 *  This routine is called only when em_display_debug_stats is enabled.
4800 *  This routine provides a way to take a look at important statistics
4801 *  maintained by the driver and hardware.
4802 *
4803 **********************************************************************/
4804static void
4805em_print_debug_info(struct adapter *adapter)
4806{
4807	device_t dev = adapter->dev;
4808	uint8_t *hw_addr = adapter->hw.hw_addr;
4809
4810	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4811	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4812	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4813	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4814	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4815	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4816	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4817	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4818	    adapter->hw.mac.fc_high_water,
4819	    adapter->hw.mac.fc_low_water);
4820	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4821	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4822	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4823	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4824	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4825	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4826	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4827	    (long long)adapter->tx_fifo_wrk_cnt,
4828	    (long long)adapter->tx_fifo_reset_cnt);
4829	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4830	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4831	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4832	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4833	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4834	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4835	device_printf(dev, "Num Tx descriptors avail = %d\n",
4836	    adapter->num_tx_desc_avail);
4837	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4838	    adapter->no_tx_desc_avail1);
4839	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4840	    adapter->no_tx_desc_avail2);
4841	device_printf(dev, "Std mbuf failed = %ld\n",
4842	    adapter->mbuf_alloc_failed);
4843	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4844	    adapter->mbuf_cluster_failed);
4845	device_printf(dev, "Driver dropped packets = %ld\n",
4846	    adapter->dropped_pkts);
4847	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4848		adapter->no_tx_dma_setup);
4849}
4850
4851static void
4852em_print_hw_stats(struct adapter *adapter)
4853{
4854	device_t dev = adapter->dev;
4855
4856	device_printf(dev, "Excessive collisions = %lld\n",
4857	    (long long)adapter->stats.ecol);
4858#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4859	device_printf(dev, "Symbol errors = %lld\n",
4860	    (long long)adapter->stats.symerrs);
4861#endif
4862	device_printf(dev, "Sequence errors = %lld\n",
4863	    (long long)adapter->stats.sec);
4864	device_printf(dev, "Defer count = %lld\n",
4865	    (long long)adapter->stats.dc);
4866	device_printf(dev, "Missed Packets = %lld\n",
4867	    (long long)adapter->stats.mpc);
4868	device_printf(dev, "Receive No Buffers = %lld\n",
4869	    (long long)adapter->stats.rnbc);
4870	/* RLEC is inaccurate on some hardware, calculate our own. */
4871	device_printf(dev, "Receive Length Errors = %lld\n",
4872	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4873	device_printf(dev, "Receive errors = %lld\n",
4874	    (long long)adapter->stats.rxerrc);
4875	device_printf(dev, "Crc errors = %lld\n",
4876	    (long long)adapter->stats.crcerrs);
4877	device_printf(dev, "Alignment errors = %lld\n",
4878	    (long long)adapter->stats.algnerrc);
4879	device_printf(dev, "Carrier extension errors = %lld\n",
4880	    (long long)adapter->stats.cexterr);
4881	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4882	device_printf(dev, "watchdog timeouts = %ld\n",
4883	    adapter->watchdog_events);
4884	device_printf(dev, "XON Rcvd = %lld\n",
4885	    (long long)adapter->stats.xonrxc);
4886	device_printf(dev, "XON Xmtd = %lld\n",
4887	    (long long)adapter->stats.xontxc);
4888	device_printf(dev, "XOFF Rcvd = %lld\n",
4889	    (long long)adapter->stats.xoffrxc);
4890	device_printf(dev, "XOFF Xmtd = %lld\n",
4891	    (long long)adapter->stats.xofftxc);
4892	device_printf(dev, "Good Packets Rcvd = %lld\n",
4893	    (long long)adapter->stats.gprc);
4894	device_printf(dev, "Good Packets Xmtd = %lld\n",
4895	    (long long)adapter->stats.gptc);
4896	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4897	    (long long)adapter->stats.tsctc);
4898	device_printf(dev, "TSO Contexts Failed = %lld\n",
4899	    (long long)adapter->stats.tsctfc);
4900}
4901
4902static int
4903em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4904{
4905	struct adapter *adapter;
4906	int error;
4907	int result;
4908
4909	result = -1;
4910	error = sysctl_handle_int(oidp, &result, 0, req);
4911
4912	if (error || !req->newptr)
4913		return (error);
4914
4915	if (result == 1) {
4916		adapter = (struct adapter *)arg1;
4917		em_print_debug_info(adapter);
4918	}
4919
4920	return (error);
4921}
4922
4923
4924static int
4925em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4926{
4927	struct adapter *adapter;
4928	int error;
4929	int result;
4930
4931	result = -1;
4932	error = sysctl_handle_int(oidp, &result, 0, req);
4933
4934	if (error || !req->newptr)
4935		return (error);
4936
4937	if (result == 1) {
4938		adapter = (struct adapter *)arg1;
4939		em_print_hw_stats(adapter);
4940	}
4941
4942	return (error);
4943}
4944
4945static int
4946em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4947{
4948	struct em_int_delay_info *info;
4949	struct adapter *adapter;
4950	uint32_t regval;
4951	int error;
4952	int usecs;
4953	int ticks;
4954
4955	info = (struct em_int_delay_info *)arg1;
4956	usecs = info->value;
4957	error = sysctl_handle_int(oidp, &usecs, 0, req);
4958	if (error != 0 || req->newptr == NULL)
4959		return (error);
4960	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4961		return (EINVAL);
4962	info->value = usecs;
4963	ticks = EM_USECS_TO_TICKS(usecs);
4964
4965	adapter = info->adapter;
4966
4967	EM_LOCK(adapter);
4968	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4969	regval = (regval & ~0xffff) | (ticks & 0xffff);
4970	/* Handle a few special cases. */
4971	switch (info->offset) {
4972	case E1000_RDTR:
4973		break;
4974	case E1000_TIDV:
4975		if (ticks == 0) {
4976			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4977			/* Don't write 0 into the TIDV register. */
4978			regval++;
4979		} else
4980			if (adapter->hw.mac.type != e1000_82575)
4981				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4982		break;
4983	}
4984	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4985	EM_UNLOCK(adapter);
4986	return (0);
4987}
4988
4989static void
4990em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4991	const char *description, struct em_int_delay_info *info,
4992	int offset, int value)
4993{
4994	info->adapter = adapter;
4995	info->offset = offset;
4996	info->value = value;
4997	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4998	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4999	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5000	    info, 0, em_sysctl_int_delay, "I", description);
5001}
5002
5003#ifndef DEVICE_POLLING
5004static void
5005em_add_rx_process_limit(struct adapter *adapter, const char *name,
5006	const char *description, int *limit, int value)
5007{
5008	*limit = value;
5009	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5010	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5011	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5012}
5013#endif
5014