if_em.c revision 169240
1/**************************************************************************
2
3Copyright (c) 2001-2007, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34$FreeBSD: head/sys/dev/em/if_em.c 169240 2007-05-04 00:00:12Z jfv $
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79
80#include "e1000_api.h"
81#include "e1000_82575.h"
82#include "if_em.h"
83
84/*********************************************************************
85 *  Set this to one to display debug statistics
86 *********************************************************************/
87int	em_display_debug_stats = 0;
88
89/*********************************************************************
90 *  Driver version:
91 *********************************************************************/
92char em_driver_version[] = "Version - 6.5.0";
93
94
95/*********************************************************************
96 *  PCI Device ID Table
97 *
98 *  Used by probe to select devices to load on
99 *  Last field stores an index into e1000_strings
100 *  Last entry must be all 0s
101 *
102 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
103 *********************************************************************/
104
105static em_vendor_info_t em_vendor_info_array[] =
106{
107	/* Intel(R) PRO/1000 Network Connection */
108	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
113
114	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126
127	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137
138	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148
149	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
152
153	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166
167	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
177						PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
185
186	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
191
192	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	/* required last entry */
201	{ 0, 0, 0, 0, 0}
202};
203
204/*********************************************************************
205 *  Table of branding strings for all supported NICs.
206 *********************************************************************/
207
208static char *em_strings[] = {
209	"Intel(R) PRO/1000 Network Connection"
210};
211
212/*********************************************************************
213 *  Function prototypes
214 *********************************************************************/
215static int	em_probe(device_t);
216static int	em_attach(device_t);
217static int	em_detach(device_t);
218static int	em_shutdown(device_t);
219static int	em_suspend(device_t);
220static int	em_resume(device_t);
221static void	em_start(struct ifnet *);
222static void	em_start_locked(struct ifnet *ifp);
223static int	em_ioctl(struct ifnet *, u_long, caddr_t);
224static void	em_watchdog(struct adapter *);
225static void	em_init(void *);
226static void	em_init_locked(struct adapter *);
227static void	em_stop(void *);
228static void	em_media_status(struct ifnet *, struct ifmediareq *);
229static int	em_media_change(struct ifnet *);
230static void	em_identify_hardware(struct adapter *);
231static int	em_allocate_pci_resources(struct adapter *);
232static int	em_allocate_intr(struct adapter *);
233static void	em_free_intr(struct adapter *);
234static void	em_free_pci_resources(struct adapter *);
235static void	em_local_timer(void *);
236static int	em_hardware_init(struct adapter *);
237static void	em_setup_interface(device_t, struct adapter *);
238static int	em_setup_transmit_structures(struct adapter *);
239static void	em_initialize_transmit_unit(struct adapter *);
240static int	em_setup_receive_structures(struct adapter *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_enable_intr(struct adapter *);
243static void	em_disable_intr(struct adapter *);
244static void	em_free_transmit_structures(struct adapter *);
245static void	em_free_receive_structures(struct adapter *);
246static void	em_update_stats_counters(struct adapter *);
247static void	em_txeof(struct adapter *);
248static int	em_allocate_receive_structures(struct adapter *);
249static int	em_allocate_transmit_structures(struct adapter *);
250static int	em_rxeof(struct adapter *, int);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct adapter *);
253#endif
254static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
255		    struct mbuf *);
256static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
257		    uint32_t *, uint32_t *);
258static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
259		    uint32_t *);
260static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
261static void	em_set_promisc(struct adapter *);
262static void	em_disable_promisc(struct adapter *);
263static void	em_set_multi(struct adapter *);
264static void	em_print_hw_stats(struct adapter *);
265static void	em_update_link_status(struct adapter *);
266static int	em_get_buf(struct adapter *, int);
267static void	em_enable_vlans(struct adapter *);
268static int	em_encap(struct adapter *, struct mbuf **);
269static int	em_adv_encap(struct adapter *, struct mbuf **);
270static void	em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
271static void	em_smartspeed(struct adapter *);
272static int	em_82547_fifo_workaround(struct adapter *, int);
273static void	em_82547_update_fifo_head(struct adapter *, int);
274static int	em_82547_tx_fifo_reset(struct adapter *);
275static void	em_82547_move_tail(void *);
276static int	em_dma_malloc(struct adapter *, bus_size_t,
277		    struct em_dma_alloc *, int);
278static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
279static void	em_print_debug_info(struct adapter *);
280static int 	em_is_valid_ether_addr(uint8_t *);
281static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
282static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
284		    PDESC_ARRAY desc_array);
285static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287		    const char *, struct em_int_delay_info *, int, int);
288/* Management and WOL Support */
289static void	em_init_manageability(struct adapter *);
290static void	em_release_manageability(struct adapter *);
291static void     em_get_hw_control(struct adapter *);
292static void     em_release_hw_control(struct adapter *);
293static void     em_enable_wakeup(device_t);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297static void	em_intr(void *);
298#else
299static int	em_intr_fast(void *);
300static void	em_add_rx_process_limit(struct adapter *, const char *,
301		    const char *, int *, int);
302static void	em_handle_rxtx(void *context, int pending);
303static void	em_handle_link(void *context, int pending);
304#endif
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	{0, 0}
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325static devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342static int em_rxd = EM_DEFAULT_RXD;
343static int em_txd = EM_DEFAULT_TXD;
344static int em_smart_pwr_down = FALSE;
345
346TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
348TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rxd", &em_rxd);
351TUNABLE_INT("hw.em.txd", &em_txd);
352TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
353#ifndef DEVICE_POLLING
354/* How many packets rxeof tries to clean at a time */
355static int em_rx_process_limit = 100;
356TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357#endif
358/* Global used in WOL setup with multiport cards */
359static int global_quad_port_a = 0;
360
361/*********************************************************************
362 *  Device identification routine
363 *
364 *  em_probe determines if the driver should be loaded on
365 *  adapter based on PCI vendor/device id of the adapter.
366 *
367 *  return BUS_PROBE_DEFAULT on success, positive on failure
368 *********************************************************************/
369
370static int
371em_probe(device_t dev)
372{
373	char		adapter_name[60];
374	uint16_t	pci_vendor_id = 0;
375	uint16_t	pci_device_id = 0;
376	uint16_t	pci_subvendor_id = 0;
377	uint16_t	pci_subdevice_id = 0;
378	em_vendor_info_t *ent;
379
380	INIT_DEBUGOUT("em_probe: begin");
381
382	pci_vendor_id = pci_get_vendor(dev);
383	if (pci_vendor_id != EM_VENDOR_ID)
384		return (ENXIO);
385
386	pci_device_id = pci_get_device(dev);
387	pci_subvendor_id = pci_get_subvendor(dev);
388	pci_subdevice_id = pci_get_subdevice(dev);
389
390	ent = em_vendor_info_array;
391	while (ent->vendor_id != 0) {
392		if ((pci_vendor_id == ent->vendor_id) &&
393		    (pci_device_id == ent->device_id) &&
394
395		    ((pci_subvendor_id == ent->subvendor_id) ||
396		    (ent->subvendor_id == PCI_ANY_ID)) &&
397
398		    ((pci_subdevice_id == ent->subdevice_id) ||
399		    (ent->subdevice_id == PCI_ANY_ID))) {
400			sprintf(adapter_name, "%s %s",
401				em_strings[ent->index],
402				em_driver_version);
403			device_set_desc_copy(dev, adapter_name);
404			return (BUS_PROBE_DEFAULT);
405		}
406		ent++;
407	}
408
409	return (ENXIO);
410}
411
412/*********************************************************************
413 *  Device initialization routine
414 *
415 *  The attach entry point is called when the driver is being loaded.
416 *  This routine identifies the type of hardware, allocates all resources
417 *  and initializes the hardware.
418 *
419 *  return 0 on success, positive on failure
420 *********************************************************************/
421
422static int
423em_attach(device_t dev)
424{
425	struct adapter	*adapter;
426	int		tsize, rsize;
427	int		error = 0;
428	u16		eeprom_data, device_id;
429
430	INIT_DEBUGOUT("em_attach: begin");
431
432	adapter = device_get_softc(dev);
433	adapter->dev = adapter->osdep.dev = dev;
434	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
435
436	/* SYSCTL stuff */
437	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
438	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
439	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
440	    em_sysctl_debug_info, "I", "Debug Information");
441
442	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
443	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
444	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
445	    em_sysctl_stats, "I", "Statistics");
446
447	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
448	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
449
450	/* Determine hardware revision */
451	em_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (em_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/*
461	** For ICH8 and family we need to
462	** map the flash memory, and this
463	** must happen after the MAC is
464	** identified
465	*/
466	if ((adapter->hw.mac.type == e1000_ich8lan) ||
467	    (adapter->hw.mac.type == e1000_ich9lan)) {
468		int rid = EM_BAR_TYPE_FLASH;
469		adapter->flash_mem = bus_alloc_resource_any(dev,
470		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
471		/* This is used in the shared code */
472		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
473		adapter->osdep.flash_bus_space_tag =
474		    rman_get_bustag(adapter->flash_mem);
475		adapter->osdep.flash_bus_space_handle =
476		    rman_get_bushandle(adapter->flash_mem);
477	}
478
479	/* Do Shared Code initialization */
480	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
481		device_printf(dev, "Setup of Shared code failed\n");
482		error = ENXIO;
483		goto err_pci;
484	}
485
486	e1000_get_bus_info(&adapter->hw);
487
488	/* Set up some sysctls for the tunable interrupt delays */
489	em_add_int_delay_sysctl(adapter, "rx_int_delay",
490	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
491	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
492	em_add_int_delay_sysctl(adapter, "tx_int_delay",
493	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
494	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
495	if (adapter->hw.mac.type >= e1000_82540) {
496		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
497		    "receive interrupt delay limit in usecs",
498		    &adapter->rx_abs_int_delay,
499		    E1000_REGISTER(&adapter->hw, E1000_RADV),
500		    em_rx_abs_int_delay_dflt);
501		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
502		    "transmit interrupt delay limit in usecs",
503		    &adapter->tx_abs_int_delay,
504		    E1000_REGISTER(&adapter->hw, E1000_TADV),
505		    em_tx_abs_int_delay_dflt);
506	}
507
508#ifndef DEVICE_POLLING
509	/* Sysctls for limiting the amount of work done in the taskqueue */
510	em_add_rx_process_limit(adapter, "rx_processing_limit",
511	    "max number of rx packets to process", &adapter->rx_process_limit,
512	    em_rx_process_limit);
513#endif
514
515	/*
516	 * Validate number of transmit and receive descriptors. It
517	 * must not exceed hardware maximum, and must be multiple
518	 * of E1000_DBA_ALIGN.
519	 */
520	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
521	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
522	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
523	    (em_txd < EM_MIN_TXD)) {
524		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
525		    EM_DEFAULT_TXD, em_txd);
526		adapter->num_tx_desc = EM_DEFAULT_TXD;
527	} else
528		adapter->num_tx_desc = em_txd;
529	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
530	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
531	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
532	    (em_rxd < EM_MIN_RXD)) {
533		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
534		    EM_DEFAULT_RXD, em_rxd);
535		adapter->num_rx_desc = EM_DEFAULT_RXD;
536	} else
537		adapter->num_rx_desc = em_rxd;
538
539	adapter->hw.mac.autoneg = DO_AUTO_NEG;
540	adapter->hw.phy.wait_for_link = FALSE;
541	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
542	adapter->rx_buffer_len = 2048;
543
544	e1000_init_script_state_82541(&adapter->hw, TRUE);
545	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
546
547	/* Copper options */
548	if (adapter->hw.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the max frame size assuming standard ethernet
556	 * sized frames.
557	 */
558	adapter->hw.mac.max_frame_size =
559	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560
561	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
562
563	/*
564	 * This controls when hardware reports transmit completion
565	 * status.
566	 */
567	adapter->hw.mac.report_tx_early = 1;
568
569	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
570	    EM_DBA_ALIGN);
571
572	/* Allocate Transmit Descriptor ring */
573	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
574		device_printf(dev, "Unable to allocate tx_desc memory\n");
575		error = ENOMEM;
576		goto err_tx_desc;
577	}
578	adapter->tx_desc_base =
579	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
580
581	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
582	    EM_DBA_ALIGN);
583
584	/* Allocate Receive Descriptor ring */
585	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
586		device_printf(dev, "Unable to allocate rx_desc memory\n");
587		error = ENOMEM;
588		goto err_rx_desc;
589	}
590	adapter->rx_desc_base =
591	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_hw_init;
605		}
606	}
607
608	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
609		device_printf(dev, "EEPROM read error "
610		    "reading part number\n");
611		error = EIO;
612		goto err_hw_init;
613	}
614
615	/* Initialize the hardware */
616	if (em_hardware_init(adapter)) {
617		device_printf(dev, "Unable to initialize the hardware\n");
618		error = EIO;
619		goto err_hw_init;
620	}
621
622	/* Copy the permanent MAC address out of the EEPROM */
623	if (e1000_read_mac_addr(&adapter->hw) < 0) {
624		device_printf(dev, "EEPROM read error while reading MAC"
625		    " address\n");
626		error = EIO;
627		goto err_hw_init;
628	}
629
630	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
631		device_printf(dev, "Invalid MAC address\n");
632		error = EIO;
633		goto err_hw_init;
634	}
635
636	/* Setup OS specific network interface */
637	em_setup_interface(dev, adapter);
638
639	em_allocate_intr(adapter);
640
641	/* Initialize statistics */
642	em_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	em_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	switch (adapter->hw.mac.type) {
659
660	case e1000_82542:
661	case e1000_82543:
662		break;
663	case e1000_82546:
664	case e1000_82546_rev_3:
665	case e1000_82571:
666	case e1000_80003es2lan:
667		if (adapter->hw.bus.func == 1)
668			e1000_read_nvm(&adapter->hw,
669			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
670		else
671			e1000_read_nvm(&adapter->hw,
672			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
673		eeprom_data &= EM_EEPROM_APME;
674		break;
675	default:
676		/* APME bit in EEPROM is mapped to WUC.APME */
677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
678		    E1000_WUC_APME;
679		break;
680	}
681	if (eeprom_data)
682		adapter->wol = E1000_WUFC_MAG;
683	/*
684         * We have the eeprom settings, now apply the special cases
685         * where the eeprom may be wrong or the board won't support
686         * wake on lan on a particular port
687	 */
688	device_id = pci_get_device(dev);
689        switch (device_id) {
690	case E1000_DEV_ID_82546GB_PCIE:
691		adapter->wol = 0;
692		break;
693	case E1000_DEV_ID_82546EB_FIBER:
694	case E1000_DEV_ID_82546GB_FIBER:
695	case E1000_DEV_ID_82571EB_FIBER:
696		/* Wake events only supported on port A for dual fiber
697		 * regardless of eeprom setting */
698		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
699		    E1000_STATUS_FUNC_1)
700			adapter->wol = 0;
701		break;
702	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
706                /* if quad port adapter, disable WoL on all but port A */
707		if (global_quad_port_a != 0)
708			adapter->wol = 0;
709		/* Reset for multiple quad port adapters */
710		if (++global_quad_port_a == 4)
711			global_quad_port_a = 0;
712                break;
713	}
714
715	/* Do we need workaround for 82544 PCI-X adapter? */
716	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
717	    adapter->hw.mac.type == e1000_82544)
718		adapter->pcix_82544 = TRUE;
719	else
720		adapter->pcix_82544 = FALSE;
721
722	/* Get control from any management/hw control */
723	if (((adapter->hw.mac.type != e1000_82573) &&
724	    (adapter->hw.mac.type != e1000_ich8lan) &&
725	    (adapter->hw.mac.type != e1000_ich9lan)) ||
726	    !e1000_check_mng_mode(&adapter->hw))
727		em_get_hw_control(adapter);
728
729	/* Tell the stack that the interface is not active */
730	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
731
732	INIT_DEBUGOUT("em_attach: end");
733
734	return (0);
735
736err_hw_init:
737	em_release_hw_control(adapter);
738	e1000_remove_device(&adapter->hw);
739	em_dma_free(adapter, &adapter->rxdma);
740err_rx_desc:
741	em_dma_free(adapter, &adapter->txdma);
742err_tx_desc:
743err_pci:
744	em_free_intr(adapter);
745	em_free_pci_resources(adapter);
746	EM_LOCK_DESTROY(adapter);
747
748	return (error);
749}
750
751/*********************************************************************
752 *  Device removal routine
753 *
754 *  The detach entry point is called when the driver is being removed.
755 *  This routine stops the adapter and deallocates all the resources
756 *  that were allocated for driver operation.
757 *
758 *  return 0 on success, positive on failure
759 *********************************************************************/
760
761static int
762em_detach(device_t dev)
763{
764	struct adapter	*adapter = device_get_softc(dev);
765	struct ifnet	*ifp = adapter->ifp;
766
767	INIT_DEBUGOUT("em_detach: begin");
768
769#ifdef DEVICE_POLLING
770	if (ifp->if_capenable & IFCAP_POLLING)
771		ether_poll_deregister(ifp);
772#endif
773
774	em_disable_intr(adapter);
775	em_free_intr(adapter);
776	EM_LOCK(adapter);
777	adapter->in_detach = 1;
778	em_stop(adapter);
779	e1000_phy_hw_reset(&adapter->hw);
780
781	em_release_manageability(adapter);
782	if (((adapter->hw.mac.type != e1000_82573) &&
783	    (adapter->hw.mac.type != e1000_ich8lan) &&
784	    (adapter->hw.mac.type != e1000_ich9lan)) ||
785	    !e1000_check_mng_mode(&adapter->hw))
786		em_release_hw_control(adapter);
787	if (adapter->wol) {
788		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
789		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
790		em_enable_wakeup(dev);
791	}
792
793	EM_UNLOCK(adapter);
794	ether_ifdetach(adapter->ifp);
795
796	callout_drain(&adapter->timer);
797	callout_drain(&adapter->tx_fifo_timer);
798
799	em_free_pci_resources(adapter);
800	bus_generic_detach(dev);
801	if_free(ifp);
802
803	e1000_remove_device(&adapter->hw);
804	em_free_transmit_structures(adapter);
805	em_free_receive_structures(adapter);
806
807	/* Free Transmit Descriptor ring */
808	if (adapter->tx_desc_base) {
809		em_dma_free(adapter, &adapter->txdma);
810		adapter->tx_desc_base = NULL;
811	}
812
813	/* Free Receive Descriptor ring */
814	if (adapter->rx_desc_base) {
815		em_dma_free(adapter, &adapter->rxdma);
816		adapter->rx_desc_base = NULL;
817	}
818
819	EM_LOCK_DESTROY(adapter);
820
821	return (0);
822}
823
824/*********************************************************************
825 *
826 *  Shutdown entry point
827 *
828 **********************************************************************/
829
830static int
831em_shutdown(device_t dev)
832{
833	return em_suspend(dev);
834}
835
836/*
837 * Suspend/resume device methods.
838 */
839static int
840em_suspend(device_t dev)
841{
842	struct adapter *adapter = device_get_softc(dev);
843
844	EM_LOCK(adapter);
845	em_stop(adapter);
846
847        em_release_manageability(adapter);
848        if (((adapter->hw.mac.type != e1000_82573) &&
849            (adapter->hw.mac.type != e1000_ich8lan) &&
850            (adapter->hw.mac.type != e1000_ich9lan)) ||
851            !e1000_check_mng_mode(&adapter->hw))
852                em_release_hw_control(adapter);
853        if (adapter->wol) {
854                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
855                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
856                em_enable_wakeup(dev);
857        }
858
859	EM_UNLOCK(adapter);
860
861	return bus_generic_suspend(dev);
862}
863
864static int
865em_resume(device_t dev)
866{
867	struct adapter *adapter = device_get_softc(dev);
868	struct ifnet *ifp = adapter->ifp;
869
870	EM_LOCK(adapter);
871	em_init_locked(adapter);
872
873        /* Get control from any management/hw control */
874	if (((adapter->hw.mac.type != e1000_82573) &&
875	    (adapter->hw.mac.type != e1000_ich8lan) &&
876	    (adapter->hw.mac.type != e1000_ich9lan)) ||
877	    !e1000_check_mng_mode(&adapter->hw))
878		em_get_hw_control(adapter);
879	em_init_manageability(adapter);
880
881	if ((ifp->if_flags & IFF_UP) &&
882	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
883		em_start_locked(ifp);
884
885	EM_UNLOCK(adapter);
886
887	return bus_generic_resume(dev);
888}
889
890
891/*********************************************************************
892 *  Transmit entry point
893 *
894 *  em_start is called by the stack to initiate a transmit.
895 *  The driver will remain in this routine as long as there are
896 *  packets to transmit and transmit resources are available.
897 *  In case resources are not available stack is notified and
898 *  the packet is requeued.
899 **********************************************************************/
900
901static void
902em_start_locked(struct ifnet *ifp)
903{
904	struct adapter	*adapter = ifp->if_softc;
905	struct mbuf	*m_head;
906
907	EM_LOCK_ASSERT(adapter);
908
909	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
910	    IFF_DRV_RUNNING)
911		return;
912	if (!adapter->link_active)
913		return;
914
915	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
916
917		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
918		if (m_head == NULL)
919			break;
920		/*
921		 *  Encapsulation can modify our pointer, and or make it
922		 *  NULL on failure.  In that event, we can't requeue.
923		 *
924		 *  We now use a pointer to accomodate legacy and
925		 *  advanced transmit functions.
926		 */
927		if (adapter->em_xmit(adapter, &m_head)) {
928			if (m_head == NULL)
929				break;
930			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
931			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
932			break;
933		}
934
935		/* Send a copy of the frame to the BPF listener */
936		BPF_MTAP(ifp, m_head);
937
938		/* Set timeout in case hardware has problems transmitting. */
939		adapter->watchdog_timer = EM_TX_TIMEOUT;
940	}
941}
942
943static void
944em_start(struct ifnet *ifp)
945{
946	struct adapter *adapter = ifp->if_softc;
947
948	EM_LOCK(adapter);
949	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
950		em_start_locked(ifp);
951	EM_UNLOCK(adapter);
952}
953
954/*********************************************************************
955 *  Ioctl entry point
956 *
957 *  em_ioctl is called when the user wants to configure the
958 *  interface.
959 *
960 *  return 0 on success, positive on failure
961 **********************************************************************/
962
963static int
964em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
965{
966	struct adapter	*adapter = ifp->if_softc;
967	struct ifreq *ifr = (struct ifreq *)data;
968	struct ifaddr *ifa = (struct ifaddr *)data;
969	int error = 0;
970
971	if (adapter->in_detach)
972		return (error);
973
974	switch (command) {
975	case SIOCSIFADDR:
976	case SIOCGIFADDR:
977		if (ifa->ifa_addr->sa_family == AF_INET) {
978			/*
979			 * XXX
980			 * Since resetting hardware takes a very long time
981			 * and results in link renegotiation we only
982			 * initialize the hardware only when it is absolutely
983			 * required.
984			 */
985			ifp->if_flags |= IFF_UP;
986			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
987				EM_LOCK(adapter);
988				em_init_locked(adapter);
989				EM_UNLOCK(adapter);
990			}
991			arp_ifinit(ifp, ifa);
992		} else
993			error = ether_ioctl(ifp, command, data);
994		break;
995	case SIOCSIFMTU:
996	    {
997		int max_frame_size;
998		uint16_t eeprom_data = 0;
999
1000		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1001
1002		EM_LOCK(adapter);
1003		switch (adapter->hw.mac.type) {
1004		case e1000_82573:
1005			/*
1006			 * 82573 only supports jumbo frames
1007			 * if ASPM is disabled.
1008			 */
1009			e1000_read_nvm(&adapter->hw,
1010			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1011			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1012				max_frame_size = ETHER_MAX_LEN;
1013				break;
1014			}
1015			/* Allow Jumbo frames - fall thru */
1016		case e1000_82571:
1017		case e1000_82572:
1018		case e1000_ich9lan:
1019		case e1000_82575:
1020		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1021			max_frame_size = 9234;
1022			break;
1023		case e1000_ich8lan:
1024			/* ICH8 does not support jumbo frames */
1025			max_frame_size = ETHER_MAX_LEN;
1026			break;
1027		default:
1028			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1029		}
1030		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1031		    ETHER_CRC_LEN) {
1032			EM_UNLOCK(adapter);
1033			error = EINVAL;
1034			break;
1035		}
1036
1037		ifp->if_mtu = ifr->ifr_mtu;
1038		adapter->hw.mac.max_frame_size =
1039		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1040		em_init_locked(adapter);
1041		EM_UNLOCK(adapter);
1042		break;
1043	    }
1044	case SIOCSIFFLAGS:
1045		IOCTL_DEBUGOUT("ioctl rcv'd:\
1046		    SIOCSIFFLAGS (Set Interface Flags)");
1047		EM_LOCK(adapter);
1048		if (ifp->if_flags & IFF_UP) {
1049			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1050				if ((ifp->if_flags ^ adapter->if_flags) &
1051				    IFF_PROMISC) {
1052					em_disable_promisc(adapter);
1053					em_set_promisc(adapter);
1054				}
1055			} else
1056				em_init_locked(adapter);
1057		} else
1058			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1059				em_stop(adapter);
1060		adapter->if_flags = ifp->if_flags;
1061		EM_UNLOCK(adapter);
1062		break;
1063	case SIOCADDMULTI:
1064	case SIOCDELMULTI:
1065		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1066		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1067			EM_LOCK(adapter);
1068			em_disable_intr(adapter);
1069			em_set_multi(adapter);
1070			if (adapter->hw.mac.type == e1000_82542 &&
1071	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1072				em_initialize_receive_unit(adapter);
1073			}
1074#ifdef DEVICE_POLLING
1075			if (!(ifp->if_capenable & IFCAP_POLLING))
1076#endif
1077				em_enable_intr(adapter);
1078			EM_UNLOCK(adapter);
1079		}
1080		break;
1081	case SIOCSIFMEDIA:
1082		/* Check SOL/IDER usage */
1083		EM_LOCK(adapter);
1084		if (e1000_check_reset_block(&adapter->hw)) {
1085			EM_UNLOCK(adapter);
1086			device_printf(adapter->dev, "Media change is"
1087			    " blocked due to SOL/IDER session.\n");
1088			break;
1089		}
1090		EM_UNLOCK(adapter);
1091	case SIOCGIFMEDIA:
1092		IOCTL_DEBUGOUT("ioctl rcv'd: \
1093		    SIOCxIFMEDIA (Get/Set Interface Media)");
1094		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1095		break;
1096	case SIOCSIFCAP:
1097	    {
1098		int mask, reinit;
1099
1100		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1101		reinit = 0;
1102		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1103#ifdef DEVICE_POLLING
1104		if (mask & IFCAP_POLLING) {
1105			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1106				error = ether_poll_register(em_poll, ifp);
1107				if (error)
1108					return (error);
1109				EM_LOCK(adapter);
1110				em_disable_intr(adapter);
1111				ifp->if_capenable |= IFCAP_POLLING;
1112				EM_UNLOCK(adapter);
1113			} else {
1114				error = ether_poll_deregister(ifp);
1115				/* Enable interrupt even in error case */
1116				EM_LOCK(adapter);
1117				em_enable_intr(adapter);
1118				ifp->if_capenable &= ~IFCAP_POLLING;
1119				EM_UNLOCK(adapter);
1120			}
1121		}
1122#endif
1123		if (mask & IFCAP_HWCSUM) {
1124			ifp->if_capenable ^= IFCAP_HWCSUM;
1125			reinit = 1;
1126		}
1127		if (mask & IFCAP_TSO4) {
1128			ifp->if_capenable ^= IFCAP_TSO4;
1129			reinit = 1;
1130		}
1131		if (mask & IFCAP_VLAN_HWTAGGING) {
1132			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1133			reinit = 1;
1134		}
1135		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1136			em_init(adapter);
1137		VLAN_CAPABILITIES(ifp);
1138		break;
1139	    }
1140	default:
1141		error = ether_ioctl(ifp, command, data);
1142		break;
1143	}
1144
1145	return (error);
1146}
1147
1148/*********************************************************************
1149 *  Watchdog timer:
1150 *
1151 *  This routine is called from the local timer every second.
1152 *  As long as transmit descriptors are being cleaned the value
1153 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1154 *  and we then reset the device.
1155 *
1156 **********************************************************************/
1157
1158static void
1159em_watchdog(struct adapter *adapter)
1160{
1161
1162	EM_LOCK_ASSERT(adapter);
1163
1164	/*
1165	** The timer is set to 5 every time start queues a packet.
1166	** Then txeof keeps resetting to 5 as long as it cleans at
1167	** least one descriptor.
1168	** Finally, anytime all descriptors are clean the timer is
1169	** set to 0.
1170	*/
1171	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1172		return;
1173
1174	/* If we are in this routine because of pause frames, then
1175	 * don't reset the hardware.
1176	 */
1177	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1178	    E1000_STATUS_TXOFF) {
1179		adapter->watchdog_timer = EM_TX_TIMEOUT;
1180		return;
1181	}
1182
1183	if (e1000_check_for_link(&adapter->hw) == 0)
1184		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1185	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1186	adapter->watchdog_events++;
1187
1188	em_init_locked(adapter);
1189}
1190
1191/*********************************************************************
1192 *  Init entry point
1193 *
1194 *  This routine is used in two ways. It is used by the stack as
1195 *  init entry point in network interface structure. It is also used
1196 *  by the driver as a hw/sw initialization routine to get to a
1197 *  consistent state.
1198 *
1199 *  return 0 on success, positive on failure
1200 **********************************************************************/
1201
1202static void
1203em_init_locked(struct adapter *adapter)
1204{
1205	struct ifnet	*ifp = adapter->ifp;
1206	device_t	dev = adapter->dev;
1207	uint32_t	pba;
1208
1209	INIT_DEBUGOUT("em_init: begin");
1210
1211	EM_LOCK_ASSERT(adapter);
1212
1213	em_stop(adapter);
1214
1215	/*
1216	 * Packet Buffer Allocation (PBA)
1217	 * Writing PBA sets the receive portion of the buffer
1218	 * the remainder is used for the transmit buffer.
1219	 *
1220	 * Devices before the 82547 had a Packet Buffer of 64K.
1221	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1222	 * After the 82547 the buffer was reduced to 40K.
1223	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1224	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1225	 */
1226	switch (adapter->hw.mac.type) {
1227	case e1000_82547:
1228	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1229		if (adapter->hw.mac.max_frame_size > 8192)
1230			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1231		else
1232			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1233		adapter->tx_fifo_head = 0;
1234		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1235		adapter->tx_fifo_size =
1236		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1237		break;
1238	/* Total Packet Buffer on these is 48K */
1239	case e1000_82571:
1240	case e1000_82572:
1241	case e1000_82575:
1242	case e1000_80003es2lan:
1243			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1244		break;
1245	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1246			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1247		break;
1248	case e1000_ich9lan:
1249#define E1000_PBA_10K	0x000A
1250		pba = E1000_PBA_10K;
1251		break;
1252	case e1000_ich8lan:
1253		pba = E1000_PBA_8K;
1254		break;
1255	default:
1256		/* Devices before 82547 had a Packet Buffer of 64K.   */
1257		if (adapter->hw.mac.max_frame_size > 8192)
1258			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1259		else
1260			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1261	}
1262
1263	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1264	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1265
1266	/* Get the latest mac address, User can use a LAA */
1267        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1268              ETHER_ADDR_LEN);
1269
1270	/* Initialize the hardware */
1271	if (em_hardware_init(adapter)) {
1272		device_printf(dev, "Unable to initialize the hardware\n");
1273		return;
1274	}
1275	em_update_link_status(adapter);
1276
1277	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1278		em_enable_vlans(adapter);
1279
1280	/* Set hardware offload abilities */
1281	ifp->if_hwassist = 0;
1282	if (adapter->hw.mac.type >= e1000_82543) {
1283		if (ifp->if_capenable & IFCAP_TXCSUM)
1284			ifp->if_hwassist |= EM_CHECKSUM_FEATURES;
1285		if (ifp->if_capenable & IFCAP_TSO)
1286			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1287	}
1288
1289	/* Configure for OS presence */
1290	em_init_manageability(adapter);
1291
1292	/* Prepare transmit descriptors and buffers */
1293	if (em_setup_transmit_structures(adapter)) {
1294		device_printf(dev, "Could not setup transmit structures\n");
1295		em_stop(adapter);
1296		return;
1297	}
1298	em_initialize_transmit_unit(adapter);
1299
1300	/* Setup Multicast table */
1301	em_set_multi(adapter);
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Don't lose promiscuous settings */
1312	em_set_promisc(adapter);
1313
1314	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1315	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1316
1317	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1318	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1319
1320#ifdef DEVICE_POLLING
1321	/*
1322	 * Only enable interrupts if we are not polling, make sure
1323	 * they are off otherwise.
1324	 */
1325	if (ifp->if_capenable & IFCAP_POLLING)
1326		em_disable_intr(adapter);
1327	else
1328#endif /* DEVICE_POLLING */
1329		em_enable_intr(adapter);
1330
1331	/* Don't reset the phy next time init gets called */
1332	adapter->hw.phy.reset_disable = TRUE;
1333}
1334
1335static void
1336em_init(void *arg)
1337{
1338	struct adapter *adapter = arg;
1339
1340	EM_LOCK(adapter);
1341	em_init_locked(adapter);
1342	EM_UNLOCK(adapter);
1343}
1344
1345
1346#ifdef DEVICE_POLLING
1347/*********************************************************************
1348 *
1349 *  Legacy polling routine
1350 *
1351 *********************************************************************/
1352static void
1353em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1354{
1355	struct adapter *adapter = ifp->if_softc;
1356	uint32_t reg_icr;
1357
1358	EM_LOCK(adapter);
1359	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1360		EM_UNLOCK(adapter);
1361		return;
1362	}
1363
1364	if (cmd == POLL_AND_CHECK_STATUS) {
1365		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1366		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1367			callout_stop(&adapter->timer);
1368			adapter->hw.mac.get_link_status = 1;
1369			e1000_check_for_link(&adapter->hw);
1370			em_update_link_status(adapter);
1371			callout_reset(&adapter->timer, hz,
1372			    em_local_timer, adapter);
1373		}
1374	}
1375	em_rxeof(adapter, count);
1376	em_txeof(adapter);
1377
1378	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1379		em_start_locked(ifp);
1380	EM_UNLOCK(adapter);
1381}
1382
1383/*********************************************************************
1384 *
1385 *  Legacy Interrupt Service routine
1386 *
1387 *********************************************************************/
1388
1389static void
1390em_intr(void *arg)
1391{
1392	struct adapter	*adapter = arg;
1393	struct ifnet	*ifp;
1394	uint32_t	reg_icr;
1395
1396	EM_LOCK(adapter);
1397	ifp = adapter->ifp;
1398
1399	if (ifp->if_capenable & IFCAP_POLLING) {
1400		EM_UNLOCK(adapter);
1401		return;
1402	}
1403
1404	for (;;) {
1405		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1406
1407		if (adapter->hw.mac.type >= e1000_82571 &&
1408	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1409			break;
1410		else if (reg_icr == 0)
1411			break;
1412
1413		/*
1414		 * XXX: some laptops trigger several spurious interrupts
1415		 * on em(4) when in the resume cycle. The ICR register
1416		 * reports all-ones value in this case. Processing such
1417		 * interrupts would lead to a freeze. I don't know why.
1418		 */
1419		if (reg_icr == 0xffffffff)
1420			break;
1421
1422		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1423			em_rxeof(adapter, -1);
1424			em_txeof(adapter);
1425		}
1426
1427		/* Link status change */
1428		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1429			callout_stop(&adapter->timer);
1430			adapter->hw.mac.get_link_status = 1;
1431			e1000_check_for_link(&adapter->hw);
1432			em_update_link_status(adapter);
1433			callout_reset(&adapter->timer, hz,
1434			    em_local_timer, adapter);
1435		}
1436
1437		if (reg_icr & E1000_ICR_RXO)
1438			adapter->rx_overruns++;
1439	}
1440
1441	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1442	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443		em_start_locked(ifp);
1444	EM_UNLOCK(adapter);
1445}
1446
1447#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1448
1449static void
1450em_handle_link(void *context, int pending)
1451{
1452	struct adapter	*adapter = context;
1453	struct ifnet *ifp;
1454
1455	ifp = adapter->ifp;
1456
1457	EM_LOCK(adapter);
1458	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1459		EM_UNLOCK(adapter);
1460		return;
1461	}
1462
1463	callout_stop(&adapter->timer);
1464	adapter->hw.mac.get_link_status = 1;
1465	e1000_check_for_link(&adapter->hw);
1466	em_update_link_status(adapter);
1467	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1468	EM_UNLOCK(adapter);
1469}
1470
1471static void
1472em_handle_rxtx(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp;
1476
1477	NET_LOCK_GIANT();
1478	ifp = adapter->ifp;
1479
1480	/*
1481	 * TODO:
1482	 * It should be possible to run the tx clean loop without the lock.
1483	 */
1484	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1485		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1486			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1487		EM_LOCK(adapter);
1488		em_txeof(adapter);
1489
1490		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491			em_start_locked(ifp);
1492		EM_UNLOCK(adapter);
1493	}
1494
1495	em_enable_intr(adapter);
1496	NET_UNLOCK_GIANT();
1497}
1498
1499/*********************************************************************
1500 *
1501 *  Fast Interrupt Service routine
1502 *
1503 *********************************************************************/
1504static int
1505em_intr_fast(void *arg)
1506{
1507	struct adapter	*adapter = arg;
1508	struct ifnet	*ifp;
1509	uint32_t	reg_icr;
1510
1511	ifp = adapter->ifp;
1512
1513	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514
1515	/* Hot eject?  */
1516	if (reg_icr == 0xffffffff)
1517		return (FILTER_STRAY);
1518
1519	/* Definitely not our interrupt.  */
1520	if (reg_icr == 0x0)
1521		return (FILTER_STRAY);
1522
1523	/*
1524	 * Starting with the 82571 chip, bit 31 should be used to
1525	 * determine whether the interrupt belongs to us.
1526	 */
1527	if (adapter->hw.mac.type >= e1000_82571 &&
1528	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1529		return (FILTER_STRAY);
1530
1531	/*
1532	 * Mask interrupts until the taskqueue is finished running.  This is
1533	 * cheap, just assume that it is needed.  This also works around the
1534	 * MSI message reordering errata on certain systems.
1535	 */
1536	em_disable_intr(adapter);
1537	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1538
1539	/* Link status change */
1540	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1541		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1542
1543	if (reg_icr & E1000_ICR_RXO)
1544		adapter->rx_overruns++;
1545	return (FILTER_HANDLED);
1546}
1547#endif /* ! DEVICE_POLLING */
1548
1549/*********************************************************************
1550 *
1551 *  Media Ioctl callback
1552 *
1553 *  This routine is called whenever the user queries the status of
1554 *  the interface using ifconfig.
1555 *
1556 **********************************************************************/
1557static void
1558em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1559{
1560	struct adapter *adapter = ifp->if_softc;
1561	u_char fiber_type = IFM_1000_SX;
1562
1563	INIT_DEBUGOUT("em_media_status: begin");
1564
1565	EM_LOCK(adapter);
1566	e1000_check_for_link(&adapter->hw);
1567	em_update_link_status(adapter);
1568
1569	ifmr->ifm_status = IFM_AVALID;
1570	ifmr->ifm_active = IFM_ETHER;
1571
1572	if (!adapter->link_active) {
1573		EM_UNLOCK(adapter);
1574		return;
1575	}
1576
1577	ifmr->ifm_status |= IFM_ACTIVE;
1578
1579	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1580	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1581		if (adapter->hw.mac.type == e1000_82545)
1582			fiber_type = IFM_1000_LX;
1583		ifmr->ifm_active |= fiber_type | IFM_FDX;
1584	} else {
1585		switch (adapter->link_speed) {
1586		case 10:
1587			ifmr->ifm_active |= IFM_10_T;
1588			break;
1589		case 100:
1590			ifmr->ifm_active |= IFM_100_TX;
1591			break;
1592		case 1000:
1593			ifmr->ifm_active |= IFM_1000_T;
1594			break;
1595		}
1596		if (adapter->link_duplex == FULL_DUPLEX)
1597			ifmr->ifm_active |= IFM_FDX;
1598		else
1599			ifmr->ifm_active |= IFM_HDX;
1600	}
1601	EM_UNLOCK(adapter);
1602}
1603
1604/*********************************************************************
1605 *
1606 *  Media Ioctl callback
1607 *
1608 *  This routine is called when the user changes speed/duplex using
1609 *  media/mediopt option with ifconfig.
1610 *
1611 **********************************************************************/
1612static int
1613em_media_change(struct ifnet *ifp)
1614{
1615	struct adapter *adapter = ifp->if_softc;
1616	struct ifmedia  *ifm = &adapter->media;
1617
1618	INIT_DEBUGOUT("em_media_change: begin");
1619
1620	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1621		return (EINVAL);
1622
1623	EM_LOCK(adapter);
1624	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1625	case IFM_AUTO:
1626		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1627		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1628		break;
1629	case IFM_1000_LX:
1630	case IFM_1000_SX:
1631	case IFM_1000_T:
1632		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1633		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1634		break;
1635	case IFM_100_TX:
1636		adapter->hw.mac.autoneg = FALSE;
1637		adapter->hw.phy.autoneg_advertised = 0;
1638		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1639			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1640		else
1641			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1642		break;
1643	case IFM_10_T:
1644		adapter->hw.mac.autoneg = FALSE;
1645		adapter->hw.phy.autoneg_advertised = 0;
1646		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1647			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1648		else
1649			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1650		break;
1651	default:
1652		device_printf(adapter->dev, "Unsupported media type\n");
1653	}
1654
1655	/* As the speed/duplex settings my have changed we need to
1656	 * reset the PHY.
1657	 */
1658	adapter->hw.phy.reset_disable = FALSE;
1659
1660	em_init_locked(adapter);
1661	EM_UNLOCK(adapter);
1662
1663	return (0);
1664}
1665
1666/*********************************************************************
1667 *
1668 *  This routine maps the mbufs to tx descriptors.
1669 *
1670 *  return 0 on success, positive on failure
1671 **********************************************************************/
1672
1673static int
1674em_encap(struct adapter *adapter, struct mbuf **m_headp)
1675{
1676	struct ifnet		*ifp = adapter->ifp;
1677	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1678	bus_dmamap_t		map;
1679	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1680	struct e1000_tx_desc	*ctxd = NULL;
1681	struct mbuf		*m_head;
1682	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1683	int			nsegs, i, j, first, last = 0;
1684	int			error, do_tso, tso_desc = 0;
1685
1686	m_head = *m_headp;
1687	txd_upper = txd_lower = txd_used = txd_saved = 0;
1688
1689	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1690
1691        /*
1692         * Force a cleanup if number of TX descriptors
1693         * available hits the threshold
1694         */
1695	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1696		em_txeof(adapter);
1697		/* Now do we at least have a minimal? */
1698		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1699			adapter->no_tx_desc_avail1++;
1700			return (ENOBUFS);
1701		}
1702	}
1703
1704
1705	/*
1706	 * TSO workaround:
1707	 *  If an mbuf is only header we need
1708	 *     to pull 4 bytes of data into it.
1709	 */
1710	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1711		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1712		*m_headp = m_head;
1713		if (m_head == NULL)
1714			return (ENOBUFS);
1715	}
1716
1717	/*
1718	 * Map the packet for DMA
1719	 *
1720	 * Capture the first descriptor index,
1721	 * this descriptor will have the index
1722	 * of the EOP which is the only one that
1723	 * now gets a DONE bit writeback.
1724	 */
1725	first = adapter->next_avail_tx_desc;
1726	tx_buffer = &adapter->tx_buffer_area[first];
1727	tx_buffer_mapped = tx_buffer;
1728	map = tx_buffer->map;
1729
1730	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1731	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1732
1733	/*
1734	 * There are two types of errors we can (try) to handle:
1735	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1736	 *   out of segments.  Defragment the mbuf chain and try again.
1737	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1738	 *   at this point in time.  Defer sending and try again later.
1739	 * All other errors, in particular EINVAL, are fatal and prevent the
1740	 * mbuf chain from ever going through.  Drop it and report error.
1741	 */
1742	if (error == EFBIG) {
1743		struct mbuf *m;
1744
1745		m = m_defrag(*m_headp, M_DONTWAIT);
1746		if (m == NULL) {
1747			adapter->mbuf_alloc_failed++;
1748			m_freem(*m_headp);
1749			*m_headp = NULL;
1750			return (ENOBUFS);
1751		}
1752		*m_headp = m;
1753
1754		/* Try it again */
1755		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1756		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1757
1758		if (error == ENOMEM) {
1759			adapter->no_tx_dma_setup++;
1760			return (error);
1761		} else if (error != 0) {
1762			adapter->no_tx_dma_setup++;
1763			m_freem(*m_headp);
1764			*m_headp = NULL;
1765			return (error);
1766		}
1767	} else if (error == ENOMEM) {
1768		adapter->no_tx_dma_setup++;
1769		return (error);
1770	} else if (error != 0) {
1771		adapter->no_tx_dma_setup++;
1772		m_freem(*m_headp);
1773		*m_headp = NULL;
1774		return (error);
1775	}
1776
1777	/*
1778	 * TSO Hardware workaround, if this packet is not
1779	 * TSO, and is only a single descriptor long, and
1780	 * it follows a TSO burst, then we need to add a
1781	 * sentinel descriptor to prevent premature writeback.
1782	 */
1783	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1784		if (nsegs == 1)
1785			tso_desc = TRUE;
1786		adapter->tx_tso = FALSE;
1787	}
1788
1789        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1790                adapter->no_tx_desc_avail2++;
1791		bus_dmamap_unload(adapter->txtag, map);
1792		return (ENOBUFS);
1793        }
1794	m_head = *m_headp;
1795
1796	/* Do hardware assists */
1797	if (ifp->if_hwassist > 0) {
1798        	if (do_tso && em_tso_setup(adapter, m_head,
1799		    &txd_upper, &txd_lower)) {
1800			/* we need to make a final sentinel transmit desc */
1801			tso_desc = TRUE;
1802		} else
1803			em_transmit_checksum_setup(adapter,  m_head,
1804			    &txd_upper, &txd_lower);
1805	}
1806
1807	i = adapter->next_avail_tx_desc;
1808	if (adapter->pcix_82544)
1809		txd_saved = i;
1810
1811	/* Set up our transmit descriptors */
1812	for (j = 0; j < nsegs; j++) {
1813		bus_size_t seg_len;
1814		bus_addr_t seg_addr;
1815		/* If adapter is 82544 and on PCIX bus */
1816		if(adapter->pcix_82544) {
1817			DESC_ARRAY	desc_array;
1818			uint32_t	array_elements, counter;
1819			/*
1820			 * Check the Address and Length combination and
1821			 * split the data accordingly
1822			 */
1823			array_elements = em_fill_descriptors(segs[j].ds_addr,
1824			    segs[j].ds_len, &desc_array);
1825			for (counter = 0; counter < array_elements; counter++) {
1826				if (txd_used == adapter->num_tx_desc_avail) {
1827					adapter->next_avail_tx_desc = txd_saved;
1828					adapter->no_tx_desc_avail2++;
1829					bus_dmamap_unload(adapter->txtag, map);
1830					return (ENOBUFS);
1831				}
1832				tx_buffer = &adapter->tx_buffer_area[i];
1833				ctxd = &adapter->tx_desc_base[i];
1834				ctxd->buffer_addr = htole64(
1835				    desc_array.descriptor[counter].address);
1836				ctxd->lower.data = htole32(
1837				    (adapter->txd_cmd | txd_lower | (uint16_t)
1838				    desc_array.descriptor[counter].length));
1839				ctxd->upper.data =
1840				    htole32((txd_upper));
1841				last = i;
1842				if (++i == adapter->num_tx_desc)
1843                                         i = 0;
1844				tx_buffer->m_head = NULL;
1845				tx_buffer->next_eop = -1;
1846				txd_used++;
1847                        }
1848		} else {
1849			tx_buffer = &adapter->tx_buffer_area[i];
1850			ctxd = &adapter->tx_desc_base[i];
1851			seg_addr = segs[j].ds_addr;
1852			seg_len  = segs[j].ds_len;
1853			/*
1854			** TSO Workaround:
1855			** If this is the last descriptor, we want to
1856			** split it so we have a small final sentinel
1857			*/
1858			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1859				seg_len -= 4;
1860				ctxd->buffer_addr = htole64(seg_addr);
1861				ctxd->lower.data = htole32(
1862				adapter->txd_cmd | txd_lower | seg_len);
1863				ctxd->upper.data =
1864				    htole32(txd_upper);
1865				if (++i == adapter->num_tx_desc)
1866					i = 0;
1867				/* Now make the sentinel */
1868				++txd_used; /* using an extra txd */
1869				ctxd = &adapter->tx_desc_base[i];
1870				tx_buffer = &adapter->tx_buffer_area[i];
1871				ctxd->buffer_addr =
1872				    htole64(seg_addr + seg_len);
1873				ctxd->lower.data = htole32(
1874				adapter->txd_cmd | txd_lower | 4);
1875				ctxd->upper.data =
1876				    htole32(txd_upper);
1877				last = i;
1878				if (++i == adapter->num_tx_desc)
1879					i = 0;
1880			} else {
1881				ctxd->buffer_addr = seg_addr;
1882				ctxd->lower.data = htole32(
1883				adapter->txd_cmd | txd_lower | seg_len);
1884				ctxd->upper.data =
1885				    htole32(txd_upper);
1886				last = i;
1887				if (++i == adapter->num_tx_desc)
1888					i = 0;
1889			}
1890			tx_buffer->m_head = NULL;
1891			tx_buffer->next_eop = -1;
1892		}
1893	}
1894
1895	adapter->next_avail_tx_desc = i;
1896	if (adapter->pcix_82544)
1897		adapter->num_tx_desc_avail -= txd_used;
1898	else {
1899		adapter->num_tx_desc_avail -= nsegs;
1900		if (tso_desc) /* TSO used an extra for sentinel */
1901			adapter->num_tx_desc_avail -= txd_used;
1902	}
1903
1904	if (m_head->m_flags & M_VLANTAG) {
1905		/* Set the vlan id. */
1906		ctxd->upper.fields.special =
1907		    htole16(m_head->m_pkthdr.ether_vtag);
1908                /* Tell hardware to add tag */
1909                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1910        }
1911
1912        tx_buffer->m_head = m_head;
1913	tx_buffer_mapped->map = tx_buffer->map;
1914	tx_buffer->map = map;
1915        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1916
1917        /*
1918         * Last Descriptor of Packet
1919	 * needs End Of Packet (EOP)
1920	 * and Report Status (RS)
1921         */
1922        ctxd->lower.data |=
1923	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1924	/*
1925	 * Keep track in the first buffer which
1926	 * descriptor will be written back
1927	 */
1928	tx_buffer = &adapter->tx_buffer_area[first];
1929	tx_buffer->next_eop = last;
1930
1931	/*
1932	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1933	 * that this frame is available to transmit.
1934	 */
1935	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1936	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1937	if (adapter->hw.mac.type == e1000_82547 &&
1938	    adapter->link_duplex == HALF_DUPLEX)
1939		em_82547_move_tail(adapter);
1940	else {
1941		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1942		if (adapter->hw.mac.type == e1000_82547)
1943			em_82547_update_fifo_head(adapter,
1944			    m_head->m_pkthdr.len);
1945	}
1946
1947	return (0);
1948}
1949
1950/*********************************************************************
1951 *
1952 *  This routine maps the mbufs to Advanced TX descriptors.
1953 *  used by the 82575 adapter. It also needs no workarounds.
1954 *
1955 **********************************************************************/
1956static int
1957em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1958{
1959	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1960	bus_dmamap_t		map;
1961	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1962	union e1000_adv_tx_desc	*txd = NULL;
1963	struct mbuf		*m_head;
1964	u32			olinfo_status = 0, cmd_type_len = 0;
1965	u32			do_tso, paylen = 0;
1966	int			nsegs, i, j, error, first, last = 0;
1967
1968	m_head = *m_headp;
1969
1970	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1971
1972	/* Set basic descriptor constants */
1973	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1974	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1975
1976        /*
1977         * Force a cleanup if number of TX descriptors
1978         * available hits the threshold
1979         */
1980	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1981		em_txeof(adapter);
1982		/* Now do we at least have a minimal? */
1983		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1984			adapter->no_tx_desc_avail1++;
1985			return (ENOBUFS);
1986		}
1987	}
1988
1989	/*
1990         * Map the packet for DMA.
1991	 *
1992	 * Capture the first descriptor index,
1993	 * this descriptor will have the index
1994	 * of the EOP which is the only one that
1995	 * now gets a DONE bit writeback.
1996	 */
1997	first = adapter->next_avail_tx_desc;
1998	tx_buffer = &adapter->tx_buffer_area[first];
1999	tx_buffer_mapped = tx_buffer;
2000	map = tx_buffer->map;
2001
2002	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2003	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2004
2005	if (error == EFBIG) {
2006		struct mbuf *m;
2007
2008		m = m_defrag(*m_headp, M_DONTWAIT);
2009		if (m == NULL) {
2010			adapter->mbuf_alloc_failed++;
2011			m_freem(*m_headp);
2012			*m_headp = NULL;
2013			return (ENOBUFS);
2014		}
2015		*m_headp = m;
2016
2017		/* Try it again */
2018		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2019		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2020
2021		if (error == ENOMEM) {
2022			adapter->no_tx_dma_setup++;
2023			return (error);
2024		} else if (error != 0) {
2025			adapter->no_tx_dma_setup++;
2026			m_freem(*m_headp);
2027			*m_headp = NULL;
2028			return (error);
2029		}
2030	} else if (error == ENOMEM) {
2031		adapter->no_tx_dma_setup++;
2032		return (error);
2033	} else if (error != 0) {
2034		adapter->no_tx_dma_setup++;
2035		m_freem(*m_headp);
2036		*m_headp = NULL;
2037		return (error);
2038	}
2039
2040	/* Check again to be sure we have enough descriptors */
2041        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2042                adapter->no_tx_desc_avail2++;
2043		bus_dmamap_unload(adapter->txtag, map);
2044		return (ENOBUFS);
2045        }
2046	m_head = *m_headp;
2047
2048        /*
2049         * Set up the context descriptor:
2050         * used when any hardware offload is done.
2051	 * This includes CSUM, VLAN, and TSO. It
2052	 * will use the first descriptor.
2053         */
2054	if (m_head->m_pkthdr.csum_flags) {
2055		/* All offloads set this */
2056		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2057		/* First try TSO */
2058		if ((do_tso) && em_tso_adv_setup(adapter, m_head, &paylen)) {
2059			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2060			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2061			olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2062		} else	/* Just checksum offload */
2063			em_tx_adv_ctx_setup(adapter, m_head);
2064	}
2065
2066	/* Set up our transmit descriptors */
2067	i = adapter->next_avail_tx_desc;
2068	for (j = 0; j < nsegs; j++) {
2069		bus_size_t seg_len;
2070		bus_addr_t seg_addr;
2071
2072		tx_buffer = &adapter->tx_buffer_area[i];
2073		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2074		seg_addr = segs[j].ds_addr;
2075		seg_len  = segs[j].ds_len;
2076
2077		txd->read.buffer_addr = htole64(seg_addr);
2078		txd->read.cmd_type_len = htole32(
2079		    adapter->txd_cmd | cmd_type_len | seg_len);
2080		txd->read.olinfo_status = htole32(olinfo_status);
2081		last = i;
2082		if (++i == adapter->num_tx_desc)
2083			i = 0;
2084		tx_buffer->m_head = NULL;
2085		tx_buffer->next_eop = -1;
2086	}
2087
2088	adapter->next_avail_tx_desc = i;
2089	adapter->num_tx_desc_avail -= nsegs;
2090
2091        tx_buffer->m_head = m_head;
2092	tx_buffer_mapped->map = tx_buffer->map;
2093	tx_buffer->map = map;
2094        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2095
2096        /*
2097         * Last Descriptor of Packet
2098	 * needs End Of Packet (EOP)
2099	 * and Report Status (RS)
2100         */
2101        txd->read.cmd_type_len |=
2102	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2103	/*
2104	 * Keep track in the first buffer which
2105	 * descriptor will be written back
2106	 */
2107	tx_buffer = &adapter->tx_buffer_area[first];
2108	tx_buffer->next_eop = last;
2109
2110	/*
2111	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112	 * that this frame is available to transmit.
2113	 */
2114	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2115	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2117
2118	return (0);
2119
2120}
2121
2122/*********************************************************************
2123 *
2124 * 82547 workaround to avoid controller hang in half-duplex environment.
2125 * The workaround is to avoid queuing a large packet that would span
2126 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2127 * in this case. We do that only when FIFO is quiescent.
2128 *
2129 **********************************************************************/
2130static void
2131em_82547_move_tail(void *arg)
2132{
2133	struct adapter *adapter = arg;
2134	uint16_t hw_tdt;
2135	uint16_t sw_tdt;
2136	struct e1000_tx_desc *tx_desc;
2137	uint16_t length = 0;
2138	boolean_t eop = 0;
2139
2140	EM_LOCK_ASSERT(adapter);
2141
2142	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2143	sw_tdt = adapter->next_avail_tx_desc;
2144
2145	while (hw_tdt != sw_tdt) {
2146		tx_desc = &adapter->tx_desc_base[hw_tdt];
2147		length += tx_desc->lower.flags.length;
2148		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2149		if (++hw_tdt == adapter->num_tx_desc)
2150			hw_tdt = 0;
2151
2152		if (eop) {
2153			if (em_82547_fifo_workaround(adapter, length)) {
2154				adapter->tx_fifo_wrk_cnt++;
2155				callout_reset(&adapter->tx_fifo_timer, 1,
2156					em_82547_move_tail, adapter);
2157				break;
2158			}
2159			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2160			em_82547_update_fifo_head(adapter, length);
2161			length = 0;
2162		}
2163	}
2164}
2165
2166static int
2167em_82547_fifo_workaround(struct adapter *adapter, int len)
2168{
2169	int fifo_space, fifo_pkt_len;
2170
2171	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2172
2173	if (adapter->link_duplex == HALF_DUPLEX) {
2174		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2175
2176		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2177			if (em_82547_tx_fifo_reset(adapter))
2178				return (0);
2179			else
2180				return (1);
2181		}
2182	}
2183
2184	return (0);
2185}
2186
2187static void
2188em_82547_update_fifo_head(struct adapter *adapter, int len)
2189{
2190	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2191
2192	/* tx_fifo_head is always 16 byte aligned */
2193	adapter->tx_fifo_head += fifo_pkt_len;
2194	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2195		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2196	}
2197}
2198
2199
2200static int
2201em_82547_tx_fifo_reset(struct adapter *adapter)
2202{
2203	uint32_t tctl;
2204
2205	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2206	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2207	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2208	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2209	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2210	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2211	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2212		/* Disable TX unit */
2213		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2214		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2215		    tctl & ~E1000_TCTL_EN);
2216
2217		/* Reset FIFO pointers */
2218		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2219		    adapter->tx_head_addr);
2220		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2221		    adapter->tx_head_addr);
2222		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2223		    adapter->tx_head_addr);
2224		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2225		    adapter->tx_head_addr);
2226
2227		/* Re-enable TX unit */
2228		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2229		E1000_WRITE_FLUSH(&adapter->hw);
2230
2231		adapter->tx_fifo_head = 0;
2232		adapter->tx_fifo_reset_cnt++;
2233
2234		return (TRUE);
2235	}
2236	else {
2237		return (FALSE);
2238	}
2239}
2240
2241static void
2242em_set_promisc(struct adapter *adapter)
2243{
2244	struct ifnet	*ifp = adapter->ifp;
2245	uint32_t	reg_rctl;
2246
2247	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2248
2249	if (ifp->if_flags & IFF_PROMISC) {
2250		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2251		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2252	} else if (ifp->if_flags & IFF_ALLMULTI) {
2253		reg_rctl |= E1000_RCTL_MPE;
2254		reg_rctl &= ~E1000_RCTL_UPE;
2255		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2256	}
2257}
2258
2259static void
2260em_disable_promisc(struct adapter *adapter)
2261{
2262	uint32_t	reg_rctl;
2263
2264	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2265
2266	reg_rctl &=  (~E1000_RCTL_UPE);
2267	reg_rctl &=  (~E1000_RCTL_MPE);
2268	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2269}
2270
2271
2272/*********************************************************************
2273 *  Multicast Update
2274 *
2275 *  This routine is called whenever multicast address list is updated.
2276 *
2277 **********************************************************************/
2278
2279static void
2280em_set_multi(struct adapter *adapter)
2281{
2282	struct ifnet	*ifp = adapter->ifp;
2283	struct ifmultiaddr *ifma;
2284	uint32_t reg_rctl = 0;
2285	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2286	int mcnt = 0;
2287
2288	IOCTL_DEBUGOUT("em_set_multi: begin");
2289
2290	if (adapter->hw.mac.type == e1000_82542 &&
2291	    adapter->hw.revision_id == E1000_REVISION_2) {
2292		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2293		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2294			e1000_pci_clear_mwi(&adapter->hw);
2295		reg_rctl |= E1000_RCTL_RST;
2296		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2297		msec_delay(5);
2298	}
2299
2300	IF_ADDR_LOCK(ifp);
2301	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2302		if (ifma->ifma_addr->sa_family != AF_LINK)
2303			continue;
2304
2305		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2306			break;
2307
2308		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2309		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2310		mcnt++;
2311	}
2312	IF_ADDR_UNLOCK(ifp);
2313
2314	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2315		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2316		reg_rctl |= E1000_RCTL_MPE;
2317		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2318	} else
2319		e1000_mc_addr_list_update(&adapter->hw, mta,
2320		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2321
2322	if (adapter->hw.mac.type == e1000_82542 &&
2323	    adapter->hw.revision_id == E1000_REVISION_2) {
2324		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2325		reg_rctl &= ~E1000_RCTL_RST;
2326		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2327		msec_delay(5);
2328		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2329			e1000_pci_set_mwi(&adapter->hw);
2330	}
2331}
2332
2333
2334/*********************************************************************
2335 *  Timer routine
2336 *
2337 *  This routine checks for link status and updates statistics.
2338 *
2339 **********************************************************************/
2340
2341static void
2342em_local_timer(void *arg)
2343{
2344	struct adapter	*adapter = arg;
2345	struct ifnet	*ifp = adapter->ifp;
2346
2347	EM_LOCK_ASSERT(adapter);
2348
2349	e1000_check_for_link(&adapter->hw);
2350	em_update_link_status(adapter);
2351	em_update_stats_counters(adapter);
2352	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2353		em_print_hw_stats(adapter);
2354	em_smartspeed(adapter);
2355	/*
2356	 * Each second we check the watchdog to
2357	 * protect against hardware hangs.
2358	 */
2359	em_watchdog(adapter);
2360
2361	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2362
2363}
2364
2365static void
2366em_update_link_status(struct adapter *adapter)
2367{
2368	struct ifnet *ifp = adapter->ifp;
2369	device_t dev = adapter->dev;
2370
2371	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2372	    E1000_STATUS_LU) {
2373		if (adapter->link_active == 0) {
2374			e1000_get_speed_and_duplex(&adapter->hw,
2375			    &adapter->link_speed, &adapter->link_duplex);
2376			/* Check if we must disable SPEED_MODE bit on PCI-E */
2377			if ((adapter->link_speed != SPEED_1000) &&
2378			    ((adapter->hw.mac.type == e1000_82571) ||
2379			    (adapter->hw.mac.type == e1000_82572))) {
2380				int tarc0;
2381
2382				tarc0 = E1000_READ_REG(&adapter->hw,
2383				    E1000_TARC0);
2384				tarc0 &= ~SPEED_MODE_BIT;
2385				E1000_WRITE_REG(&adapter->hw,
2386				    E1000_TARC0, tarc0);
2387			}
2388			if (bootverbose)
2389				device_printf(dev, "Link is up %d Mbps %s\n",
2390				    adapter->link_speed,
2391				    ((adapter->link_duplex == FULL_DUPLEX) ?
2392				    "Full Duplex" : "Half Duplex"));
2393			adapter->link_active = 1;
2394			adapter->smartspeed = 0;
2395			ifp->if_baudrate = adapter->link_speed * 1000000;
2396			if_link_state_change(ifp, LINK_STATE_UP);
2397		}
2398	} else {
2399		if (adapter->link_active == 1) {
2400			ifp->if_baudrate = adapter->link_speed = 0;
2401			adapter->link_duplex = 0;
2402			if (bootverbose)
2403				device_printf(dev, "Link is Down\n");
2404			adapter->link_active = 0;
2405			if_link_state_change(ifp, LINK_STATE_DOWN);
2406		}
2407	}
2408}
2409
2410/*********************************************************************
2411 *
2412 *  This routine disables all traffic on the adapter by issuing a
2413 *  global reset on the MAC and deallocates TX/RX buffers.
2414 *
2415 **********************************************************************/
2416
2417static void
2418em_stop(void *arg)
2419{
2420	struct adapter	*adapter = arg;
2421	struct ifnet	*ifp = adapter->ifp;
2422
2423	EM_LOCK_ASSERT(adapter);
2424
2425	INIT_DEBUGOUT("em_stop: begin");
2426
2427	em_disable_intr(adapter);
2428	callout_stop(&adapter->timer);
2429	callout_stop(&adapter->tx_fifo_timer);
2430	em_free_transmit_structures(adapter);
2431	em_free_receive_structures(adapter);
2432
2433	/* Tell the stack that the interface is no longer active */
2434	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2435
2436	e1000_reset_hw(&adapter->hw);
2437	if (adapter->hw.mac.type >= e1000_82544)
2438		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2439}
2440
2441
2442/*********************************************************************
2443 *
2444 *  Determine hardware revision.
2445 *
2446 **********************************************************************/
2447static void
2448em_identify_hardware(struct adapter *adapter)
2449{
2450	device_t dev = adapter->dev;
2451
2452	/* Make sure our PCI config space has the necessary stuff set */
2453	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2454	if ((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
2455	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN)) {
2456		device_printf(dev, "Memory Access and/or Bus Master bits "
2457		    "were not set!\n");
2458		adapter->hw.bus.pci_cmd_word |=
2459		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2460		pci_write_config(dev, PCIR_COMMAND,
2461		    adapter->hw.bus.pci_cmd_word, 2);
2462	}
2463
2464	/* Save off the information about this board */
2465	adapter->hw.vendor_id = pci_get_vendor(dev);
2466	adapter->hw.device_id = pci_get_device(dev);
2467	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2468	adapter->hw.subsystem_vendor_id =
2469	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2470	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2471
2472	/* Do Shared Code Init and Setup */
2473	if (e1000_set_mac_type(&adapter->hw)) {
2474		device_printf(dev, "Setup init failure\n");
2475		return;
2476	}
2477}
2478
2479static int
2480em_allocate_pci_resources(struct adapter *adapter)
2481{
2482	device_t	dev = adapter->dev;
2483	int		val, rid;
2484
2485	rid = PCIR_BAR(0);
2486	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2487	    &rid, RF_ACTIVE);
2488	if (adapter->res_memory == NULL) {
2489		device_printf(dev, "Unable to allocate bus resource: memory\n");
2490		return (ENXIO);
2491	}
2492	adapter->osdep.mem_bus_space_tag =
2493	    rman_get_bustag(adapter->res_memory);
2494	adapter->osdep.mem_bus_space_handle =
2495	    rman_get_bushandle(adapter->res_memory);
2496	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2497
2498	/* Only older adapters use IO mapping */
2499	if (adapter->hw.mac.type <= e1000_82543) {
2500		/* Figure our where our IO BAR is ? */
2501		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2502			val = pci_read_config(dev, rid, 4);
2503			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2504				adapter->io_rid = rid;
2505				break;
2506			}
2507			rid += 4;
2508			/* check for 64bit BAR */
2509			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2510				rid += 4;
2511		}
2512		if (rid >= PCIR_CIS) {
2513			device_printf(dev, "Unable to locate IO BAR\n");
2514			return (ENXIO);
2515		}
2516		adapter->res_ioport = bus_alloc_resource_any(dev,
2517		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2518		if (adapter->res_ioport == NULL) {
2519			device_printf(dev, "Unable to allocate bus resource: "
2520			    "ioport\n");
2521			return (ENXIO);
2522		}
2523		adapter->hw.io_base = 0;
2524		adapter->osdep.io_bus_space_tag =
2525		    rman_get_bustag(adapter->res_ioport);
2526		adapter->osdep.io_bus_space_handle =
2527		    rman_get_bushandle(adapter->res_ioport);
2528	}
2529
2530	/*
2531	 * Setup MSI/X or MSI if PCI Express
2532	 * only the latest can use MSI/X and
2533	 * real support for it is forthcoming
2534	 */
2535	adapter->msi = 0; /* Set defaults */
2536	rid = 0x0;
2537	if (adapter->hw.mac.type >= e1000_82575) {
2538		/*
2539		 * Eventually this will be used
2540		 * for Multiqueue, for now we will
2541		 * just use one vector.
2542		 */
2543        	val = pci_msix_count(dev);
2544		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2545                	rid = 1;
2546                	adapter->msi = 1;
2547		}
2548	} else if (adapter->hw.bus.type == e1000_bus_type_pci_express) {
2549        	val = pci_msi_count(dev);
2550        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2551                	rid = 1;
2552                	adapter->msi = 1;
2553        	}
2554	}
2555	adapter->res_interrupt = bus_alloc_resource_any(dev,
2556	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2557	if (adapter->res_interrupt == NULL) {
2558		device_printf(dev, "Unable to allocate bus resource: "
2559		    "interrupt\n");
2560		return (ENXIO);
2561	}
2562
2563	adapter->hw.back = &adapter->osdep;
2564
2565	return (0);
2566}
2567
2568/*********************************************************************
2569 *
2570 *  Setup the appropriate Interrupt handlers.
2571 *
2572 **********************************************************************/
2573int
2574em_allocate_intr(struct adapter *adapter)
2575{
2576	device_t dev = adapter->dev;
2577	int error;
2578
2579	/* Manually turn off all interrupts */
2580	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2581
2582#ifdef DEVICE_POLLING
2583	/* We do Legacy setup */
2584	if (adapter->int_handler_tag == NULL &&
2585	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2586	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2587	    &adapter->int_handler_tag)) != 0) {
2588		device_printf(dev, "Failed to register interrupt handler");
2589		return (error);
2590	}
2591
2592#else
2593	/*
2594	 * Try allocating a fast interrupt and the associated deferred
2595	 * processing contexts.
2596	 */
2597	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2598	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2599	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2600	    taskqueue_thread_enqueue, &adapter->tq);
2601	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2602	    device_get_nameunit(adapter->dev));
2603	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2604	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2605	    &adapter->int_handler_tag)) != 0) {
2606		device_printf(dev, "Failed to register fast interrupt "
2607			    "handler: %d\n", error);
2608		taskqueue_free(adapter->tq);
2609		adapter->tq = NULL;
2610		return (error);
2611	}
2612#endif
2613
2614	em_enable_intr(adapter);
2615	return (0);
2616}
2617
2618static void
2619em_free_intr(struct adapter *adapter)
2620{
2621	device_t dev = adapter->dev;
2622
2623	if (adapter->res_interrupt != NULL) {
2624		bus_teardown_intr(dev, adapter->res_interrupt,
2625			adapter->int_handler_tag);
2626		adapter->int_handler_tag = NULL;
2627	}
2628	if (adapter->tq != NULL) {
2629		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2630		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2631		taskqueue_free(adapter->tq);
2632		adapter->tq = NULL;
2633	}
2634}
2635
2636static void
2637em_free_pci_resources(struct adapter *adapter)
2638{
2639	device_t dev = adapter->dev;
2640
2641	if (adapter->res_interrupt != NULL)
2642		bus_release_resource(dev, SYS_RES_IRQ,
2643		    0, adapter->res_interrupt);
2644
2645	if (adapter->msi)
2646		pci_release_msi(dev);
2647
2648	if (adapter->res_memory != NULL)
2649		bus_release_resource(dev, SYS_RES_MEMORY,
2650		    PCIR_BAR(0), adapter->res_memory);
2651
2652	if (adapter->flash_mem != NULL)
2653		bus_release_resource(dev, SYS_RES_MEMORY,
2654		    EM_FLASH, adapter->flash_mem);
2655
2656	if (adapter->res_ioport != NULL)
2657		bus_release_resource(dev, SYS_RES_IOPORT,
2658		    adapter->io_rid, adapter->res_ioport);
2659}
2660
2661/*********************************************************************
2662 *
2663 *  Initialize the hardware to a configuration
2664 *  as specified by the adapter structure.
2665 *
2666 **********************************************************************/
2667static int
2668em_hardware_init(struct adapter *adapter)
2669{
2670	device_t dev = adapter->dev;
2671	uint16_t rx_buffer_size;
2672
2673	INIT_DEBUGOUT("em_hardware_init: begin");
2674
2675	/* Issue a global reset */
2676	e1000_reset_hw(&adapter->hw);
2677
2678	/* When hardware is reset, fifo_head is also reset */
2679	adapter->tx_fifo_head = 0;
2680
2681	/* Set up smart power down as default off on newer adapters. */
2682	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2683	    adapter->hw.mac.type == e1000_82572)) {
2684		uint16_t phy_tmp = 0;
2685
2686		/* Speed up time to link by disabling smart power down. */
2687		e1000_read_phy_reg(&adapter->hw,
2688		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2689		phy_tmp &= ~IGP02E1000_PM_SPD;
2690		e1000_write_phy_reg(&adapter->hw,
2691		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2692	}
2693
2694	/*
2695	 * These parameters control the automatic generation (Tx) and
2696	 * response (Rx) to Ethernet PAUSE frames.
2697	 * - High water mark should allow for at least two frames to be
2698	 *   received after sending an XOFF.
2699	 * - Low water mark works best when it is very near the high water mark.
2700	 *   This allows the receiver to restart by sending XON when it has
2701	 *   drained a bit. Here we use an arbitary value of 1500 which will
2702	 *   restart after one full frame is pulled from the buffer. There
2703	 *   could be several smaller frames in the buffer and if so they will
2704	 *   not trigger the XON until their total number reduces the buffer
2705	 *   by 1500.
2706	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2707	 */
2708	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2709	    0xffff) << 10 );
2710
2711	adapter->hw.mac.fc_high_water = rx_buffer_size -
2712	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2713	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2714	if (adapter->hw.mac.type == e1000_80003es2lan)
2715		adapter->hw.mac.fc_pause_time = 0xFFFF;
2716	else
2717		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2718	adapter->hw.mac.fc_send_xon = TRUE;
2719	adapter->hw.mac.fc = e1000_fc_full;
2720
2721	if (e1000_init_hw(&adapter->hw) < 0) {
2722		device_printf(dev, "Hardware Initialization Failed\n");
2723		return (EIO);
2724	}
2725
2726	e1000_check_for_link(&adapter->hw);
2727
2728	return (0);
2729}
2730
2731/*********************************************************************
2732 *
2733 *  Setup networking device structure and register an interface.
2734 *
2735 **********************************************************************/
2736static void
2737em_setup_interface(device_t dev, struct adapter *adapter)
2738{
2739	struct ifnet   *ifp;
2740
2741	INIT_DEBUGOUT("em_setup_interface: begin");
2742
2743	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2744	if (ifp == NULL)
2745		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2746	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2747	ifp->if_mtu = ETHERMTU;
2748	ifp->if_init =  em_init;
2749	ifp->if_softc = adapter;
2750	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2751	ifp->if_ioctl = em_ioctl;
2752	ifp->if_start = em_start;
2753	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2754	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2755	IFQ_SET_READY(&ifp->if_snd);
2756
2757	ether_ifattach(ifp, adapter->hw.mac.addr);
2758
2759	ifp->if_capabilities = ifp->if_capenable = 0;
2760
2761	if (adapter->hw.mac.type >= e1000_82543) {
2762		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2763		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2764	}
2765
2766	/* Enable TSO if available */
2767	if ((adapter->hw.mac.type > e1000_82544) &&
2768	    (adapter->hw.mac.type != e1000_82547)) {
2769		ifp->if_capabilities |= IFCAP_TSO4;
2770		ifp->if_capenable |= IFCAP_TSO4;
2771	}
2772
2773	/*
2774	 * Tell the upper layer(s) we support long frames.
2775	 */
2776	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2777	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2778	ifp->if_capenable |= IFCAP_VLAN_MTU;
2779
2780#ifdef DEVICE_POLLING
2781	ifp->if_capabilities |= IFCAP_POLLING;
2782#endif
2783
2784	/*
2785	 * Specify the media types supported by this adapter and register
2786	 * callbacks to update media and link information
2787	 */
2788	ifmedia_init(&adapter->media, IFM_IMASK,
2789	    em_media_change, em_media_status);
2790	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2791	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2792		u_char fiber_type = IFM_1000_SX;	/* default type */
2793
2794		if (adapter->hw.mac.type == e1000_82545)
2795			fiber_type = IFM_1000_LX;
2796		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2797			    0, NULL);
2798		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2799	} else {
2800		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2801		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2802			    0, NULL);
2803		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2804			    0, NULL);
2805		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2806			    0, NULL);
2807		if (adapter->hw.phy.type != e1000_phy_ife) {
2808			ifmedia_add(&adapter->media,
2809				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2810			ifmedia_add(&adapter->media,
2811				IFM_ETHER | IFM_1000_T, 0, NULL);
2812		}
2813	}
2814	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2815	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2816}
2817
2818
2819/*********************************************************************
2820 *
2821 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2822 *
2823 **********************************************************************/
2824static void
2825em_smartspeed(struct adapter *adapter)
2826{
2827	uint16_t phy_tmp;
2828
2829	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2830	    adapter->hw.mac.autoneg == 0 ||
2831	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2832		return;
2833
2834	if (adapter->smartspeed == 0) {
2835		/* If Master/Slave config fault is asserted twice,
2836		 * we assume back-to-back */
2837		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2838		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2839			return;
2840		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2841		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2842			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2843			if(phy_tmp & CR_1000T_MS_ENABLE) {
2844				phy_tmp &= ~CR_1000T_MS_ENABLE;
2845				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2846				    phy_tmp);
2847				adapter->smartspeed++;
2848				if(adapter->hw.mac.autoneg &&
2849				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2850				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2851				    &phy_tmp)) {
2852					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2853						    MII_CR_RESTART_AUTO_NEG);
2854					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2855					    phy_tmp);
2856				}
2857			}
2858		}
2859		return;
2860	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2861		/* If still no link, perhaps using 2/3 pair cable */
2862		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2863		phy_tmp |= CR_1000T_MS_ENABLE;
2864		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2865		if(adapter->hw.mac.autoneg &&
2866		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2867		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2868			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2869				    MII_CR_RESTART_AUTO_NEG);
2870			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2871		}
2872	}
2873	/* Restart process after EM_SMARTSPEED_MAX iterations */
2874	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2875		adapter->smartspeed = 0;
2876}
2877
2878
2879/*
2880 * Manage DMA'able memory.
2881 */
2882static void
2883em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2884{
2885	if (error)
2886		return;
2887	*(bus_addr_t *) arg = segs[0].ds_addr;
2888}
2889
2890static int
2891em_dma_malloc(struct adapter *adapter, bus_size_t size,
2892        struct em_dma_alloc *dma, int mapflags)
2893{
2894	int error;
2895
2896	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2897				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2898				BUS_SPACE_MAXADDR,	/* lowaddr */
2899				BUS_SPACE_MAXADDR,	/* highaddr */
2900				NULL, NULL,		/* filter, filterarg */
2901				size,			/* maxsize */
2902				1,			/* nsegments */
2903				size,			/* maxsegsize */
2904				0,			/* flags */
2905				NULL,			/* lockfunc */
2906				NULL,			/* lockarg */
2907				&dma->dma_tag);
2908	if (error) {
2909		device_printf(adapter->dev,
2910		    "%s: bus_dma_tag_create failed: %d\n",
2911		    __func__, error);
2912		goto fail_0;
2913	}
2914
2915	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2916	    BUS_DMA_NOWAIT, &dma->dma_map);
2917	if (error) {
2918		device_printf(adapter->dev,
2919		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2920		    __func__, (uintmax_t)size, error);
2921		goto fail_2;
2922	}
2923
2924	dma->dma_paddr = 0;
2925	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2926	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2927	if (error || dma->dma_paddr == 0) {
2928		device_printf(adapter->dev,
2929		    "%s: bus_dmamap_load failed: %d\n",
2930		    __func__, error);
2931		goto fail_3;
2932	}
2933
2934	return (0);
2935
2936fail_3:
2937	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2938fail_2:
2939	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2940	bus_dma_tag_destroy(dma->dma_tag);
2941fail_0:
2942	dma->dma_map = NULL;
2943	dma->dma_tag = NULL;
2944
2945	return (error);
2946}
2947
2948static void
2949em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2950{
2951	if (dma->dma_tag == NULL)
2952		return;
2953	if (dma->dma_map != NULL) {
2954		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2955		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2956		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2957		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2958		dma->dma_map = NULL;
2959	}
2960	bus_dma_tag_destroy(dma->dma_tag);
2961	dma->dma_tag = NULL;
2962}
2963
2964
2965/*********************************************************************
2966 *
2967 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2968 *  the information needed to transmit a packet on the wire.
2969 *
2970 **********************************************************************/
2971static int
2972em_allocate_transmit_structures(struct adapter *adapter)
2973{
2974	device_t dev = adapter->dev;
2975
2976	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2977	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2978	if (adapter->tx_buffer_area == NULL) {
2979		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2980		return (ENOMEM);
2981	}
2982
2983	bzero(adapter->tx_buffer_area,
2984	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
2985
2986	return (0);
2987}
2988
2989/*********************************************************************
2990 *
2991 *  Initialize transmit structures.
2992 *
2993 **********************************************************************/
2994static int
2995em_setup_transmit_structures(struct adapter *adapter)
2996{
2997	device_t dev = adapter->dev;
2998	struct em_buffer *tx_buffer;
2999	int error, i;
3000
3001	/*
3002	 * Create DMA tags for tx descriptors
3003	 */
3004	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3005				1, 0,			/* alignment, bounds */
3006				BUS_SPACE_MAXADDR,	/* lowaddr */
3007				BUS_SPACE_MAXADDR,	/* highaddr */
3008				NULL, NULL,		/* filter, filterarg */
3009				EM_TSO_SIZE,		/* maxsize */
3010				EM_MAX_SCATTER,		/* nsegments */
3011				EM_TSO_SEG_SIZE,	/* maxsegsize */
3012				0,			/* flags */
3013				NULL,		/* lockfunc */
3014				NULL,		/* lockarg */
3015				&adapter->txtag)) != 0) {
3016		device_printf(dev, "Unable to allocate TX DMA tag\n");
3017		goto fail;
3018	}
3019
3020	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3021		goto fail;
3022
3023	/* Clear the old ring contents */
3024	bzero(adapter->tx_desc_base,
3025	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3026
3027	/* Create the descriptor buffer dma maps */
3028	tx_buffer = adapter->tx_buffer_area;
3029	for (i = 0; i < adapter->num_tx_desc; i++) {
3030		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3031		if (error != 0) {
3032			device_printf(dev, "Unable to create TX DMA map\n");
3033			goto fail;
3034		}
3035		tx_buffer->next_eop = -1;
3036		tx_buffer++;
3037	}
3038
3039	adapter->next_avail_tx_desc = 0;
3040	adapter->next_tx_to_clean = 0;
3041
3042	/* Set number of descriptors available */
3043	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3044
3045	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3046	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3047
3048	return (0);
3049
3050fail:
3051	em_free_transmit_structures(adapter);
3052	return (error);
3053}
3054
3055/*********************************************************************
3056 *
3057 *  Enable transmit unit.
3058 *
3059 **********************************************************************/
3060static void
3061em_initialize_transmit_unit(struct adapter *adapter)
3062{
3063	uint32_t	tctl, tarc, tipg = 0;
3064	uint64_t	bus_addr;
3065
3066	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3067	/* Setup the Base and Length of the Tx Descriptor Ring */
3068	bus_addr = adapter->txdma.dma_paddr;
3069	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3070	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3071	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3072	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3073
3074	/* Setup the HW Tx Head and Tail descriptor pointers */
3075	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3076	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3077
3078	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3079	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3080	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3081
3082	/* Set the default values for the Tx Inter Packet Gap timer */
3083	switch (adapter->hw.mac.type) {
3084	case e1000_82542:
3085		tipg = DEFAULT_82542_TIPG_IPGT;
3086		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3087		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3088		break;
3089	case e1000_80003es2lan:
3090		tipg = DEFAULT_82543_TIPG_IPGR1;
3091		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3092		    E1000_TIPG_IPGR2_SHIFT;
3093		break;
3094	default:
3095		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3096		    (adapter->hw.media_type ==
3097		    e1000_media_type_internal_serdes))
3098			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3099		else
3100			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3101		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3102		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3103	}
3104
3105	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3106	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3107	if(adapter->hw.mac.type >= e1000_82540)
3108		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3109		    adapter->tx_abs_int_delay.value);
3110
3111	if ((adapter->hw.mac.type == e1000_82571) ||
3112	    (adapter->hw.mac.type == e1000_82572)) {
3113		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3114		tarc |= SPEED_MODE_BIT;
3115		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3116	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3117		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3118		tarc |= 1;
3119		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3120		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3121		tarc |= 1;
3122		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3123	}
3124
3125	/* Program the Transmit Control Register */
3126	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3127	tctl &= ~E1000_TCTL_CT;
3128	tctl = E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3129		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
3130
3131	if (adapter->hw.mac.type >= e1000_82571)
3132		tctl |= E1000_TCTL_MULR;
3133
3134	/* This write will effectively turn on the transmit unit. */
3135	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3136
3137	/* Setup Transmit Descriptor Base Settings */
3138	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3139
3140	if ((adapter->tx_int_delay.value > 0) &&
3141	    (adapter->hw.mac.type != e1000_82575))
3142		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3143
3144        /* Set the function pointer for the transmit routine */
3145        if (adapter->hw.mac.type >= e1000_82575)
3146                adapter->em_xmit = em_adv_encap;
3147        else
3148                adapter->em_xmit = em_encap;
3149}
3150
3151/*********************************************************************
3152 *
3153 *  Free all transmit related data structures.
3154 *
3155 **********************************************************************/
3156static void
3157em_free_transmit_structures(struct adapter *adapter)
3158{
3159	struct em_buffer *tx_buffer;
3160	int i;
3161
3162	INIT_DEBUGOUT("free_transmit_structures: begin");
3163
3164	if (adapter->tx_buffer_area != NULL) {
3165		tx_buffer = adapter->tx_buffer_area;
3166		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3167			if (tx_buffer->m_head != NULL) {
3168				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3169				    BUS_DMASYNC_POSTWRITE);
3170				bus_dmamap_unload(adapter->txtag,
3171				    tx_buffer->map);
3172				m_freem(tx_buffer->m_head);
3173				tx_buffer->m_head = NULL;
3174			} else if (tx_buffer->map != NULL)
3175				bus_dmamap_unload(adapter->txtag,
3176				    tx_buffer->map);
3177			if (tx_buffer->map != NULL) {
3178				bus_dmamap_destroy(adapter->txtag,
3179				    tx_buffer->map);
3180				tx_buffer->map = NULL;
3181			}
3182		}
3183	}
3184	if (adapter->tx_buffer_area != NULL) {
3185		free(adapter->tx_buffer_area, M_DEVBUF);
3186		adapter->tx_buffer_area = NULL;
3187	}
3188	if (adapter->txtag != NULL) {
3189		bus_dma_tag_destroy(adapter->txtag);
3190		adapter->txtag = NULL;
3191	}
3192}
3193
3194/*********************************************************************
3195 *
3196 *  The offload context needs to be set when we transfer the first
3197 *  packet of a particular protocol (TCP/UDP). This routine has been
3198 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3199 *
3200 **********************************************************************/
3201static void
3202em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3203    uint32_t *txd_upper, uint32_t *txd_lower)
3204{
3205	struct e1000_context_desc *TXD;
3206	struct em_buffer *tx_buffer;
3207	struct ether_vlan_header *eh;
3208	struct ip *ip;
3209	struct ip6_hdr *ip6;
3210	struct tcp_hdr *th;
3211	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3212	uint32_t cmd = 0;
3213	uint16_t etype;
3214	uint8_t ipproto;
3215
3216	/* Setup checksum offload context. */
3217	curr_txd = adapter->next_avail_tx_desc;
3218	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3219	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3220
3221	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3222		     E1000_TXD_DTYP_D;		/* Data descr */
3223
3224	/*
3225	 * Determine where frame payload starts.
3226	 * Jump over vlan headers if already present,
3227	 * helpful for QinQ too.
3228	 */
3229	eh = mtod(mp, struct ether_vlan_header *);
3230	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3231		etype = ntohs(eh->evl_proto);
3232		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3233	} else {
3234		etype = ntohs(eh->evl_encap_proto);
3235		ehdrlen = ETHER_HDR_LEN;
3236	}
3237
3238	/*
3239	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3240	 * TODO: Support SCTP too when it hits the tree.
3241	 */
3242	switch (etype) {
3243	case ETHERTYPE_IP:
3244		ip = (struct ip *)(mp->m_data + ehdrlen);
3245		ip_hlen = ip->ip_hl << 2;
3246
3247		/* Setup of IP header checksum. */
3248		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3249			/*
3250			 * Start offset for header checksum calculation.
3251			 * End offset for header checksum calculation.
3252			 * Offset of place to put the checksum.
3253			 */
3254			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3255			TXD->lower_setup.ip_fields.ipcse =
3256			    htole16(ehdrlen + ip_hlen);
3257			TXD->lower_setup.ip_fields.ipcso =
3258			    ehdrlen + offsetof(struct ip, ip_sum);
3259			cmd |= E1000_TXD_CMD_IP;
3260			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3261		}
3262
3263		if (mp->m_len < ehdrlen + ip_hlen)
3264			return;	/* failure */
3265
3266		hdr_len = ehdrlen + ip_hlen;
3267		ipproto = ip->ip_p;
3268
3269		break;
3270	case ETHERTYPE_IPV6:
3271		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3272		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3273
3274		if (mp->m_len < ehdrlen + ip_hlen)
3275			return;	/* failure */
3276
3277		/* IPv6 doesn't have a header checksum. */
3278
3279		hdr_len = ehdrlen + ip_hlen;
3280		ipproto = ip6->ip6_nxt;
3281
3282		break;
3283	default:
3284		*txd_upper = 0;
3285		*txd_lower = 0;
3286		return;
3287	}
3288
3289	switch (ipproto) {
3290	case IPPROTO_TCP:
3291		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3292			/*
3293			 * Start offset for payload checksum calculation.
3294			 * End offset for payload checksum calculation.
3295			 * Offset of place to put the checksum.
3296			 */
3297			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3298			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3299			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3300			TXD->upper_setup.tcp_fields.tucso =
3301			    hdr_len + offsetof(struct tcphdr, th_sum);
3302			cmd |= E1000_TXD_CMD_TCP;
3303			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3304		}
3305		break;
3306	case IPPROTO_UDP:
3307		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3308			/*
3309			 * Start offset for header checksum calculation.
3310			 * End offset for header checksum calculation.
3311			 * Offset of place to put the checksum.
3312			 */
3313			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3314			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3315			TXD->upper_setup.tcp_fields.tucso =
3316			    hdr_len + offsetof(struct udphdr, uh_sum);
3317			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3318		}
3319		break;
3320	default:
3321		break;
3322	}
3323
3324	TXD->tcp_seg_setup.data = htole32(0);
3325	TXD->cmd_and_length =
3326	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3327	tx_buffer->m_head = NULL;
3328	tx_buffer->next_eop = -1;
3329
3330	if (++curr_txd == adapter->num_tx_desc)
3331		curr_txd = 0;
3332
3333	adapter->num_tx_desc_avail--;
3334	adapter->next_avail_tx_desc = curr_txd;
3335}
3336
3337/**********************************************************************
3338 *
3339 *  Setup work for hardware segmentation offload (TSO)
3340 *
3341 **********************************************************************/
3342static boolean_t
3343em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3344   uint32_t *txd_lower)
3345{
3346	struct e1000_context_desc *TXD;
3347	struct em_buffer *tx_buffer;
3348	struct ether_vlan_header *eh;
3349	struct ip *ip;
3350	struct ip6_hdr *ip6;
3351	struct tcphdr *th;
3352	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3353	uint16_t etype;
3354
3355	/*
3356	 * XXX: This is not really correct as the stack would not have
3357	 * set up all checksums.
3358	 * XXX: Return FALSE is not sufficient as we may have to return
3359	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3360	 * and 1 (success).
3361	 */
3362	if (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE)
3363		return FALSE;	/* 0 */
3364
3365	/*
3366	 * This function could/should be extended to support IP/IPv6
3367	 * fragmentation as well.  But as they say, one step at a time.
3368	 */
3369
3370	/*
3371	 * Determine where frame payload starts.
3372	 * Jump over vlan headers if already present,
3373	 * helpful for QinQ too.
3374	 */
3375	eh = mtod(mp, struct ether_vlan_header *);
3376	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3377		etype = ntohs(eh->evl_proto);
3378		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3379	} else {
3380		etype = ntohs(eh->evl_encap_proto);
3381		ehdrlen = ETHER_HDR_LEN;
3382	}
3383
3384	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3385	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3386		return FALSE;	/* -1 */
3387
3388	/*
3389	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3390	 * TODO: Support SCTP too when it hits the tree.
3391	 */
3392	switch (etype) {
3393	case ETHERTYPE_IP:
3394		isip6 = 0;
3395		ip = (struct ip *)(mp->m_data + ehdrlen);
3396		if (ip->ip_p != IPPROTO_TCP)
3397			return FALSE;	/* 0 */
3398		ip->ip_len = 0;
3399		ip->ip_sum = 0;
3400		ip_hlen = ip->ip_hl << 2;
3401		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3402			return FALSE;	/* -1 */
3403		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3404#if 1
3405		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3406		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3407#else
3408		th->th_sum = mp->m_pkthdr.csum_data;
3409#endif
3410		break;
3411	case ETHERTYPE_IPV6:
3412		isip6 = 1;
3413		return FALSE;			/* Not supported yet. */
3414		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3415		if (ip6->ip6_nxt != IPPROTO_TCP)
3416			return FALSE;	/* 0 */
3417		ip6->ip6_plen = 0;
3418		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3419		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3420			return FALSE;	/* -1 */
3421		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3422#if 0
3423		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3424		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3425#else
3426		th->th_sum = mp->m_pkthdr.csum_data;
3427#endif
3428		break;
3429	default:
3430		return FALSE;
3431	}
3432	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3433
3434	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3435		      E1000_TXD_DTYP_D |	/* Data descr type */
3436		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3437
3438	/* IP and/or TCP header checksum calculation and insertion. */
3439	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3440		      E1000_TXD_POPTS_TXSM) << 8;
3441
3442	curr_txd = adapter->next_avail_tx_desc;
3443	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3444	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3445
3446	/* IPv6 doesn't have a header checksum. */
3447	if (!isip6) {
3448		/*
3449		 * Start offset for header checksum calculation.
3450		 * End offset for header checksum calculation.
3451		 * Offset of place put the checksum.
3452		 */
3453		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3454		TXD->lower_setup.ip_fields.ipcse =
3455		    htole16(ehdrlen + ip_hlen - 1);
3456		TXD->lower_setup.ip_fields.ipcso =
3457		    ehdrlen + offsetof(struct ip, ip_sum);
3458	}
3459	/*
3460	 * Start offset for payload checksum calculation.
3461	 * End offset for payload checksum calculation.
3462	 * Offset of place to put the checksum.
3463	 */
3464	TXD->upper_setup.tcp_fields.tucss =
3465	    ehdrlen + ip_hlen;
3466	TXD->upper_setup.tcp_fields.tucse = 0;
3467	TXD->upper_setup.tcp_fields.tucso =
3468	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3469	/*
3470	 * Payload size per packet w/o any headers.
3471	 * Length of all headers up to payload.
3472	 */
3473	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3474	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3475
3476	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3477				E1000_TXD_CMD_DEXT |	/* Extended descr */
3478				E1000_TXD_CMD_TSE |	/* TSE context */
3479				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3480				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3481				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3482
3483	tx_buffer->m_head = NULL;
3484	tx_buffer->next_eop = -1;
3485
3486	if (++curr_txd == adapter->num_tx_desc)
3487		curr_txd = 0;
3488
3489	adapter->num_tx_desc_avail--;
3490	adapter->next_avail_tx_desc = curr_txd;
3491	adapter->tx_tso = TRUE;
3492
3493	return TRUE;
3494}
3495
3496
3497/**********************************************************************
3498 *
3499 *  Setup work for hardware segmentation offload (TSO) on
3500 *  adapters using advanced tx descriptors
3501 *
3502 **********************************************************************/
3503static boolean_t
3504em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3505{
3506	struct e1000_adv_tx_context_desc *TXD;
3507	struct em_buffer        *tx_buffer;
3508	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3509	u32 mss_l4len_idx = 0;
3510	u16 vtag = 0;
3511	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3512	struct ether_vlan_header *eh;
3513	struct ip *ip;
3514	struct tcphdr *th;
3515
3516	if (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE)
3517		return FALSE;
3518
3519	/*
3520	 * Determine where frame payload starts.
3521	 * Jump over vlan headers if already present
3522	 */
3523	eh = mtod(mp, struct ether_vlan_header *);
3524	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3525		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3526	else
3527		ehdrlen = ETHER_HDR_LEN;
3528
3529	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3530	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3531		return FALSE;
3532
3533	/* Only supports IPV4 for now */
3534	ctxd = adapter->next_avail_tx_desc;
3535	tx_buffer = &adapter->tx_buffer_area[ctxd];
3536	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3537
3538	ip = (struct ip *)(mp->m_data + ehdrlen);
3539	if (ip->ip_p != IPPROTO_TCP)
3540                return FALSE;   /* 0 */
3541	ip->ip_len = 0;
3542	ip->ip_sum = 0;
3543	ip_hlen = ip->ip_hl << 2;
3544	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3545	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3546	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3547	tcp_hlen = th->th_off << 2;
3548	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3549	/* Calculate payload, this is used in the transmit desc in encap */
3550	*paylen = mp->m_pkthdr.len - hdrlen;
3551
3552	/* VLAN MACLEN IPLEN */
3553	if (mp->m_flags & M_VLANTAG) {
3554		vtag = htole16(mp->m_pkthdr.ether_vtag);
3555		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3556	}
3557	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3558	vlan_macip_lens |= ip_hlen;
3559	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3560
3561	/* ADV DTYPE TUCMD */
3562	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3563	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3564	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3565	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3566
3567	/* MSS L4LEN IDX */
3568	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3569	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3570	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3571
3572	TXD->seqnum_seed = htole32(0);
3573	tx_buffer->m_head = NULL;
3574	tx_buffer->next_eop = -1;
3575
3576	if (++ctxd == adapter->num_tx_desc)
3577		ctxd = 0;
3578
3579	adapter->num_tx_desc_avail--;
3580	adapter->next_avail_tx_desc = ctxd;
3581	return TRUE;
3582}
3583
3584
3585/*********************************************************************
3586 *
3587 *  Advanced Context Descriptor setup for VLAN or CSUM
3588 *
3589 **********************************************************************/
3590
3591static void
3592em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3593{
3594	struct e1000_adv_tx_context_desc *TXD;
3595	struct em_buffer        *tx_buffer;
3596	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3597	struct ether_vlan_header *eh;
3598	struct ip *ip;
3599	struct ip6_hdr *ip6;
3600	int  ehdrlen, ip_hlen;
3601	u16	etype;
3602	u8	ipproto;
3603
3604	int ctxd = adapter->next_avail_tx_desc;
3605	u16 vtag = 0;
3606
3607	tx_buffer = &adapter->tx_buffer_area[ctxd];
3608	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3609
3610	/*
3611	** In advanced descriptors the vlan tag must
3612	** be placed into the descriptor itself.
3613	*/
3614	if (mp->m_flags & M_VLANTAG) {
3615		vtag = htole16(mp->m_pkthdr.ether_vtag);
3616		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3617	}
3618
3619	/*
3620	 * Determine where frame payload starts.
3621	 * Jump over vlan headers if already present,
3622	 * helpful for QinQ too.
3623	 */
3624	eh = mtod(mp, struct ether_vlan_header *);
3625	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3626		etype = ntohs(eh->evl_proto);
3627		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3628	} else {
3629		etype = ntohs(eh->evl_encap_proto);
3630		ehdrlen = ETHER_HDR_LEN;
3631	}
3632
3633	/* Set the ether header length */
3634	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3635
3636	switch (etype) {
3637		case ETHERTYPE_IP:
3638			ip = (struct ip *)(mp->m_data + ehdrlen);
3639			ip_hlen = ip->ip_hl << 2;
3640			if (mp->m_len < ehdrlen + ip_hlen)
3641				return; /* failure */
3642			ipproto = ip->ip_p;
3643			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3644			break;
3645		case ETHERTYPE_IPV6:
3646			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3647			ip_hlen = sizeof(struct ip6_hdr);
3648			if (mp->m_len < ehdrlen + ip_hlen)
3649				return; /* failure */
3650			ipproto = ip6->ip6_nxt;
3651			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3652			break;
3653		default:
3654			return;
3655	}
3656
3657	vlan_macip_lens |= ip_hlen;
3658	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3659
3660	switch (ipproto) {
3661		case IPPROTO_TCP:
3662			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3663				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3664			break;
3665		case IPPROTO_UDP:
3666			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3667				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3668			break;
3669	}
3670
3671	/* Now copy bits into descriptor */
3672	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3673	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3674	TXD->seqnum_seed = htole32(0);
3675	TXD->mss_l4len_idx = htole32(0);
3676
3677	tx_buffer->m_head = NULL;
3678	tx_buffer->next_eop = -1;
3679
3680	/* We've consumed the first desc, adjust counters */
3681	if (++ctxd == adapter->num_tx_desc)
3682		ctxd = 0;
3683	adapter->next_avail_tx_desc = ctxd;
3684	--adapter->num_tx_desc_avail;
3685
3686        return;
3687}
3688
3689
3690/**********************************************************************
3691 *
3692 *  Examine each tx_buffer in the used queue. If the hardware is done
3693 *  processing the packet then free associated resources. The
3694 *  tx_buffer is put back on the free queue.
3695 *
3696 **********************************************************************/
3697static void
3698em_txeof(struct adapter *adapter)
3699{
3700        int first, last, done, num_avail;
3701        struct em_buffer *tx_buffer;
3702        struct e1000_tx_desc   *tx_desc, *eop_desc;
3703	struct ifnet   *ifp = adapter->ifp;
3704
3705	EM_LOCK_ASSERT(adapter);
3706
3707        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3708                return;
3709
3710        num_avail = adapter->num_tx_desc_avail;
3711        first = adapter->next_tx_to_clean;
3712        tx_desc = &adapter->tx_desc_base[first];
3713        tx_buffer = &adapter->tx_buffer_area[first];
3714	last = tx_buffer->next_eop;
3715        eop_desc = &adapter->tx_desc_base[last];
3716
3717	/*
3718	 * What this does is get the index of the
3719	 * first descriptor AFTER the EOP of the
3720	 * first packet, that way we can do the
3721	 * simple comparison on the inner while loop.
3722	 */
3723	if (++last == adapter->num_tx_desc)
3724 		last = 0;
3725	done = last;
3726
3727        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3728            BUS_DMASYNC_POSTREAD);
3729
3730        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3731		/* We clean the range of the packet */
3732		while (first != done) {
3733                	tx_desc->upper.data = 0;
3734                	tx_desc->lower.data = 0;
3735                	tx_desc->buffer_addr = 0;
3736                	num_avail++;
3737
3738			if (tx_buffer->m_head) {
3739				ifp->if_opackets++;
3740				bus_dmamap_sync(adapter->txtag,
3741				    tx_buffer->map,
3742				    BUS_DMASYNC_POSTWRITE);
3743				bus_dmamap_unload(adapter->txtag,
3744				    tx_buffer->map);
3745
3746                        	m_freem(tx_buffer->m_head);
3747                        	tx_buffer->m_head = NULL;
3748                	}
3749			tx_buffer->next_eop = -1;
3750
3751	                if (++first == adapter->num_tx_desc)
3752				first = 0;
3753
3754	                tx_buffer = &adapter->tx_buffer_area[first];
3755			tx_desc = &adapter->tx_desc_base[first];
3756		}
3757		/* See if we can continue to the next packet */
3758		last = tx_buffer->next_eop;
3759		if (last != -1) {
3760        		eop_desc = &adapter->tx_desc_base[last];
3761			/* Get new done point */
3762			if (++last == adapter->num_tx_desc) last = 0;
3763			done = last;
3764		} else
3765			break;
3766        }
3767        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3768            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3769
3770        adapter->next_tx_to_clean = first;
3771
3772        /*
3773         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3774         * that it is OK to send packets.
3775         * If there are no pending descriptors, clear the timeout. Otherwise,
3776         * if some descriptors have been freed, restart the timeout.
3777         */
3778        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3779                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3780		/* All clean, turn off the timer */
3781                if (num_avail == adapter->num_tx_desc)
3782			adapter->watchdog_timer = 0;
3783		/* Some cleaned, reset the timer */
3784                else if (num_avail != adapter->num_tx_desc_avail)
3785			adapter->watchdog_timer = EM_TX_TIMEOUT;
3786        }
3787        adapter->num_tx_desc_avail = num_avail;
3788        return;
3789}
3790
3791/*********************************************************************
3792 *
3793 *  Get a buffer from system mbuf buffer pool.
3794 *
3795 **********************************************************************/
3796static int
3797em_get_buf(struct adapter *adapter, int i)
3798{
3799	struct mbuf		*m;
3800	bus_dma_segment_t	segs[1];
3801	bus_dmamap_t		map;
3802	struct em_buffer	*rx_buffer;
3803	int			error, nsegs;
3804
3805	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3806	if (m == NULL) {
3807		adapter->mbuf_cluster_failed++;
3808		return (ENOBUFS);
3809	}
3810	m->m_len = m->m_pkthdr.len = MCLBYTES;
3811
3812	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3813		m_adj(m, ETHER_ALIGN);
3814
3815	/*
3816	 * Using memory from the mbuf cluster pool, invoke the
3817	 * bus_dma machinery to arrange the memory mapping.
3818	 */
3819	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3820	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3821	if (error != 0) {
3822		m_free(m);
3823		return (error);
3824	}
3825
3826	/* If nsegs is wrong then the stack is corrupt. */
3827	KASSERT(nsegs == 1, ("Too many segments returned!"));
3828
3829	rx_buffer = &adapter->rx_buffer_area[i];
3830	if (rx_buffer->m_head != NULL)
3831		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3832
3833	map = rx_buffer->map;
3834	rx_buffer->map = adapter->rx_sparemap;
3835	adapter->rx_sparemap = map;
3836	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3837	rx_buffer->m_head = m;
3838
3839	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3840	return (0);
3841}
3842
3843/*********************************************************************
3844 *
3845 *  Allocate memory for rx_buffer structures. Since we use one
3846 *  rx_buffer per received packet, the maximum number of rx_buffer's
3847 *  that we'll need is equal to the number of receive descriptors
3848 *  that we've allocated.
3849 *
3850 **********************************************************************/
3851static int
3852em_allocate_receive_structures(struct adapter *adapter)
3853{
3854	device_t dev = adapter->dev;
3855	struct em_buffer *rx_buffer;
3856	int i, error;
3857
3858	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3859	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3860	if (adapter->rx_buffer_area == NULL) {
3861		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3862		return (ENOMEM);
3863	}
3864
3865	bzero(adapter->rx_buffer_area,
3866	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3867
3868	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3869				1, 0,			/* alignment, bounds */
3870				BUS_SPACE_MAXADDR,	/* lowaddr */
3871				BUS_SPACE_MAXADDR,	/* highaddr */
3872				NULL, NULL,		/* filter, filterarg */
3873				MCLBYTES,		/* maxsize */
3874				1,			/* nsegments */
3875				MCLBYTES,		/* maxsegsize */
3876				0,			/* flags */
3877				NULL,			/* lockfunc */
3878				NULL,			/* lockarg */
3879				&adapter->rxtag);
3880	if (error) {
3881		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3882		    __func__, error);
3883		goto fail;
3884	}
3885
3886	/* Create the spare map (used by getbuf) */
3887	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3888	     &adapter->rx_sparemap);
3889	if (error) {
3890		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3891		    __func__, error);
3892		goto fail;
3893	}
3894
3895	rx_buffer = adapter->rx_buffer_area;
3896	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3897		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3898		    &rx_buffer->map);
3899		if (error) {
3900			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3901			    __func__, error);
3902			goto fail;
3903		}
3904	}
3905
3906	/* Setup the initial buffers */
3907	for (i = 0; i < adapter->num_rx_desc; i++) {
3908		error = em_get_buf(adapter, i);
3909		if (error)
3910			goto fail;
3911	}
3912	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3913	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3914
3915	return (0);
3916
3917fail:
3918	em_free_receive_structures(adapter);
3919	return (error);
3920}
3921
3922/*********************************************************************
3923 *
3924 *  Allocate and initialize receive structures.
3925 *
3926 **********************************************************************/
3927static int
3928em_setup_receive_structures(struct adapter *adapter)
3929{
3930	int error;
3931
3932	bzero(adapter->rx_desc_base,
3933	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3934
3935	if ((error = em_allocate_receive_structures(adapter)) !=0)
3936		return (error);
3937
3938	/* Setup our descriptor pointers */
3939	adapter->next_rx_desc_to_check = 0;
3940
3941	return (0);
3942}
3943
3944/*********************************************************************
3945 *
3946 *  Enable receive unit.
3947 *
3948 **********************************************************************/
3949static void
3950em_initialize_receive_unit(struct adapter *adapter)
3951{
3952	struct ifnet	*ifp = adapter->ifp;
3953	uint64_t	bus_addr;
3954	uint32_t	reg_rctl;
3955	uint32_t	reg_rxcsum;
3956
3957	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3958
3959	/*
3960	 * Make sure receives are disabled while setting
3961	 * up the descriptor ring
3962	 */
3963	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3964	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
3965
3966	if(adapter->hw.mac.type >= e1000_82540) {
3967		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3968		    adapter->rx_abs_int_delay.value);
3969		/*
3970		 * Set the interrupt throttling rate. Value is calculated
3971		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3972		 */
3973#define MAX_INTS_PER_SEC	8000
3974#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3975		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
3976	}
3977
3978	/* Setup the Base and Length of the Rx Descriptor Ring */
3979	bus_addr = adapter->rxdma.dma_paddr;
3980	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
3981			sizeof(struct e1000_rx_desc));
3982	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
3983	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
3984
3985	/* Setup the Receive Control Register */
3986	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3987	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3988		   E1000_RCTL_RDMTS_HALF |
3989		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3990
3991	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
3992		reg_rctl |= E1000_RCTL_SBP;
3993	else
3994		reg_rctl &= ~E1000_RCTL_SBP;
3995
3996	switch (adapter->rx_buffer_len) {
3997	default:
3998	case 2048:
3999		reg_rctl |= E1000_RCTL_SZ_2048;
4000		break;
4001	case 4096:
4002		reg_rctl |= E1000_RCTL_SZ_4096 |
4003		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4004		break;
4005	case 8192:
4006		reg_rctl |= E1000_RCTL_SZ_8192 |
4007		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4008		break;
4009	case 16384:
4010		reg_rctl |= E1000_RCTL_SZ_16384 |
4011		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4012		break;
4013	}
4014
4015	if (ifp->if_mtu > ETHERMTU)
4016		reg_rctl |= E1000_RCTL_LPE;
4017	else
4018		reg_rctl &= ~E1000_RCTL_LPE;
4019
4020	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4021	if ((adapter->hw.mac.type >= e1000_82543) &&
4022	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4023		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4024		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4025		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4026	}
4027
4028	/*
4029	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4030	** long latencies are observed, like Lenovo X60. This
4031	** change eliminates the problem, but since having positive
4032	** values in RDTR is a known source of problems on other
4033	** platforms another solution is being sought.
4034	*/
4035	if (adapter->hw.mac.type == e1000_82573)
4036		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4037
4038	/* Enable Receives */
4039	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4040
4041	/*
4042	 * Setup the HW Rx Head and
4043	 * Tail Descriptor Pointers
4044	 */
4045	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4046	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4047
4048	return;
4049}
4050
4051/*********************************************************************
4052 *
4053 *  Free receive related data structures.
4054 *
4055 **********************************************************************/
4056static void
4057em_free_receive_structures(struct adapter *adapter)
4058{
4059	struct em_buffer *rx_buffer;
4060	int i;
4061
4062	INIT_DEBUGOUT("free_receive_structures: begin");
4063
4064	if (adapter->rx_sparemap) {
4065		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4066		adapter->rx_sparemap = NULL;
4067	}
4068
4069	/* Cleanup any existing buffers */
4070	if (adapter->rx_buffer_area != NULL) {
4071		rx_buffer = adapter->rx_buffer_area;
4072		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4073			if (rx_buffer->m_head != NULL) {
4074				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4075				    BUS_DMASYNC_POSTREAD);
4076				bus_dmamap_unload(adapter->rxtag,
4077				    rx_buffer->map);
4078				m_freem(rx_buffer->m_head);
4079				rx_buffer->m_head = NULL;
4080			} else if (rx_buffer->map != NULL)
4081				bus_dmamap_unload(adapter->rxtag,
4082				    rx_buffer->map);
4083			if (rx_buffer->map != NULL) {
4084				bus_dmamap_destroy(adapter->rxtag,
4085				    rx_buffer->map);
4086				rx_buffer->map = NULL;
4087			}
4088		}
4089	}
4090
4091	if (adapter->rx_buffer_area != NULL) {
4092		free(adapter->rx_buffer_area, M_DEVBUF);
4093		adapter->rx_buffer_area = NULL;
4094	}
4095
4096	if (adapter->rxtag != NULL) {
4097		bus_dma_tag_destroy(adapter->rxtag);
4098		adapter->rxtag = NULL;
4099	}
4100}
4101
4102/*********************************************************************
4103 *
4104 *  This routine executes in interrupt context. It replenishes
4105 *  the mbufs in the descriptor and sends data which has been
4106 *  dma'ed into host memory to upper layer.
4107 *
4108 *  We loop at most count times if count is > 0, or until done if
4109 *  count < 0.
4110 *
4111 *********************************************************************/
4112static int
4113em_rxeof(struct adapter *adapter, int count)
4114{
4115	struct ifnet	*ifp;
4116	struct mbuf	*mp;
4117	uint8_t		accept_frame = 0;
4118	uint8_t		eop = 0;
4119	uint16_t 	len, desc_len, prev_len_adj;
4120	int		i;
4121
4122	/* Pointer to the receive descriptor being examined. */
4123	struct e1000_rx_desc   *current_desc;
4124	uint8_t		status;
4125
4126	ifp = adapter->ifp;
4127	i = adapter->next_rx_desc_to_check;
4128	current_desc = &adapter->rx_desc_base[i];
4129	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4130	    BUS_DMASYNC_POSTREAD);
4131
4132	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4133		return (0);
4134
4135	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4136	    (count != 0) &&
4137	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4138		struct mbuf *m = NULL;
4139
4140		mp = adapter->rx_buffer_area[i].m_head;
4141		/*
4142		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4143		 * needs to access the last received byte in the mbuf.
4144		 */
4145		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4146		    BUS_DMASYNC_POSTREAD);
4147
4148		accept_frame = 1;
4149		prev_len_adj = 0;
4150		desc_len = le16toh(current_desc->length);
4151		status = current_desc->status;
4152		if (status & E1000_RXD_STAT_EOP) {
4153			count--;
4154			eop = 1;
4155			if (desc_len < ETHER_CRC_LEN) {
4156				len = 0;
4157				prev_len_adj = ETHER_CRC_LEN - desc_len;
4158			} else
4159				len = desc_len - ETHER_CRC_LEN;
4160		} else {
4161			eop = 0;
4162			len = desc_len;
4163		}
4164
4165		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4166			uint8_t		last_byte;
4167			uint32_t	pkt_len = desc_len;
4168
4169			if (adapter->fmp != NULL)
4170				pkt_len += adapter->fmp->m_pkthdr.len;
4171
4172			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4173			if (TBI_ACCEPT(&adapter->hw, status,
4174			    current_desc->errors, pkt_len, last_byte)) {
4175				e1000_tbi_adjust_stats_82543(&adapter->hw,
4176				    &adapter->stats, pkt_len,
4177				    adapter->hw.mac.addr);
4178				if (len > 0)
4179					len--;
4180			} else
4181				accept_frame = 0;
4182		}
4183
4184		if (accept_frame) {
4185			if (em_get_buf(adapter, i) != 0) {
4186				ifp->if_iqdrops++;
4187				goto discard;
4188			}
4189
4190			/* Assign correct length to the current fragment */
4191			mp->m_len = len;
4192
4193			if (adapter->fmp == NULL) {
4194				mp->m_pkthdr.len = len;
4195				adapter->fmp = mp; /* Store the first mbuf */
4196				adapter->lmp = mp;
4197			} else {
4198				/* Chain mbuf's together */
4199				mp->m_flags &= ~M_PKTHDR;
4200				/*
4201				 * Adjust length of previous mbuf in chain if
4202				 * we received less than 4 bytes in the last
4203				 * descriptor.
4204				 */
4205				if (prev_len_adj > 0) {
4206					adapter->lmp->m_len -= prev_len_adj;
4207					adapter->fmp->m_pkthdr.len -=
4208					    prev_len_adj;
4209				}
4210				adapter->lmp->m_next = mp;
4211				adapter->lmp = adapter->lmp->m_next;
4212				adapter->fmp->m_pkthdr.len += len;
4213			}
4214
4215			if (eop) {
4216				adapter->fmp->m_pkthdr.rcvif = ifp;
4217				ifp->if_ipackets++;
4218				em_receive_checksum(adapter, current_desc,
4219				    adapter->fmp);
4220#ifndef __NO_STRICT_ALIGNMENT
4221				if (adapter->hw.mac.max_frame_size >
4222				    (MCLBYTES - ETHER_ALIGN) &&
4223				    em_fixup_rx(adapter) != 0)
4224					goto skip;
4225#endif
4226				if (status & E1000_RXD_STAT_VP) {
4227					adapter->fmp->m_pkthdr.ether_vtag =
4228					    (le16toh(current_desc->special) &
4229					    E1000_RXD_SPC_VLAN_MASK);
4230					adapter->fmp->m_flags |= M_VLANTAG;
4231				}
4232#ifndef __NO_STRICT_ALIGNMENT
4233skip:
4234#endif
4235				m = adapter->fmp;
4236				adapter->fmp = NULL;
4237				adapter->lmp = NULL;
4238			}
4239		} else {
4240			ifp->if_ierrors++;
4241discard:
4242			/* Reuse loaded DMA map and just update mbuf chain */
4243			mp = adapter->rx_buffer_area[i].m_head;
4244			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4245			mp->m_data = mp->m_ext.ext_buf;
4246			mp->m_next = NULL;
4247			if (adapter->hw.mac.max_frame_size <=
4248			    (MCLBYTES - ETHER_ALIGN))
4249				m_adj(mp, ETHER_ALIGN);
4250			if (adapter->fmp != NULL) {
4251				m_freem(adapter->fmp);
4252				adapter->fmp = NULL;
4253				adapter->lmp = NULL;
4254			}
4255			m = NULL;
4256		}
4257
4258		/* Zero out the receive descriptors status. */
4259		current_desc->status = 0;
4260		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4261		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4262
4263		/* Advance our pointers to the next descriptor. */
4264		if (++i == adapter->num_rx_desc)
4265			i = 0;
4266		if (m != NULL) {
4267			adapter->next_rx_desc_to_check = i;
4268#ifdef DEVICE_POLLING
4269			EM_UNLOCK(adapter);
4270			(*ifp->if_input)(ifp, m);
4271			EM_LOCK(adapter);
4272#else
4273			/* Already running unlocked */
4274			(*ifp->if_input)(ifp, m);
4275#endif
4276			i = adapter->next_rx_desc_to_check;
4277		}
4278		current_desc = &adapter->rx_desc_base[i];
4279	}
4280	adapter->next_rx_desc_to_check = i;
4281
4282	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4283	if (--i < 0)
4284		i = adapter->num_rx_desc - 1;
4285	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4286	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4287		return (0);
4288
4289	return (1);
4290}
4291
4292#ifndef __NO_STRICT_ALIGNMENT
4293/*
4294 * When jumbo frames are enabled we should realign entire payload on
4295 * architecures with strict alignment. This is serious design mistake of 8254x
4296 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4297 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4298 * payload. On architecures without strict alignment restrictions 8254x still
4299 * performs unaligned memory access which would reduce the performance too.
4300 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4301 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4302 * existing mbuf chain.
4303 *
4304 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4305 * not used at all on architectures with strict alignment.
4306 */
4307static int
4308em_fixup_rx(struct adapter *adapter)
4309{
4310	struct mbuf *m, *n;
4311	int error;
4312
4313	error = 0;
4314	m = adapter->fmp;
4315	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4316		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4317		m->m_data += ETHER_HDR_LEN;
4318	} else {
4319		MGETHDR(n, M_DONTWAIT, MT_DATA);
4320		if (n != NULL) {
4321			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4322			m->m_data += ETHER_HDR_LEN;
4323			m->m_len -= ETHER_HDR_LEN;
4324			n->m_len = ETHER_HDR_LEN;
4325			M_MOVE_PKTHDR(n, m);
4326			n->m_next = m;
4327			adapter->fmp = n;
4328		} else {
4329			adapter->dropped_pkts++;
4330			m_freem(adapter->fmp);
4331			adapter->fmp = NULL;
4332			error = ENOMEM;
4333		}
4334	}
4335
4336	return (error);
4337}
4338#endif
4339
4340/*********************************************************************
4341 *
4342 *  Verify that the hardware indicated that the checksum is valid.
4343 *  Inform the stack about the status of checksum so that stack
4344 *  doesn't spend time verifying the checksum.
4345 *
4346 *********************************************************************/
4347static void
4348em_receive_checksum(struct adapter *adapter,
4349	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4350{
4351	/* 82543 or newer only */
4352	if ((adapter->hw.mac.type < e1000_82543) ||
4353	    /* Ignore Checksum bit is set */
4354	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4355		mp->m_pkthdr.csum_flags = 0;
4356		return;
4357	}
4358
4359	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4360		/* Did it pass? */
4361		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4362			/* IP Checksum Good */
4363			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4364			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4365
4366		} else {
4367			mp->m_pkthdr.csum_flags = 0;
4368		}
4369	}
4370
4371	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4372		/* Did it pass? */
4373		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4374			mp->m_pkthdr.csum_flags |=
4375			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4376			mp->m_pkthdr.csum_data = htons(0xffff);
4377		}
4378	}
4379}
4380
4381
4382static void
4383em_enable_vlans(struct adapter *adapter)
4384{
4385	uint32_t ctrl;
4386
4387	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4388
4389	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4390	ctrl |= E1000_CTRL_VME;
4391	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4392}
4393
4394static void
4395em_enable_intr(struct adapter *adapter)
4396{
4397	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4398	    (IMS_ENABLE_MASK));
4399}
4400
4401static void
4402em_disable_intr(struct adapter *adapter)
4403{
4404	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4405}
4406
4407/*
4408 * Bit of a misnomer, what this really means is
4409 * to enable OS management of the system... aka
4410 * to disable special hardware management features
4411 */
4412static void
4413em_init_manageability(struct adapter *adapter)
4414{
4415	/* A shared code workaround */
4416#define E1000_82542_MANC2H E1000_MANC2H
4417	if (adapter->has_manage) {
4418		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4419		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4420
4421		/* disable hardware interception of ARP */
4422		manc &= ~(E1000_MANC_ARP_EN);
4423
4424                /* enable receiving management packets to the host */
4425                if (adapter->hw.mac.type >= e1000_82571) {
4426			manc |= E1000_MANC_EN_MNG2HOST;
4427#define E1000_MNG2HOST_PORT_623 (1 << 5)
4428#define E1000_MNG2HOST_PORT_664 (1 << 6)
4429			manc2h |= E1000_MNG2HOST_PORT_623;
4430			manc2h |= E1000_MNG2HOST_PORT_664;
4431			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4432		}
4433
4434		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4435	}
4436}
4437
4438/*
4439 * Give control back to hardware management
4440 * controller if there is one.
4441 */
4442static void
4443em_release_manageability(struct adapter *adapter)
4444{
4445	if (adapter->has_manage) {
4446		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4447
4448		/* re-enable hardware interception of ARP */
4449		manc |= E1000_MANC_ARP_EN;
4450
4451		if (adapter->hw.mac.type >= e1000_82571)
4452			manc &= ~E1000_MANC_EN_MNG2HOST;
4453
4454		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4455	}
4456}
4457
4458/*
4459 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4460 * For ASF and Pass Through versions of f/w this means that
4461 * the driver is loaded. For AMT version (only with 82573)
4462 * of the f/w this means that the network i/f is open.
4463 *
4464 */
4465static void
4466em_get_hw_control(struct adapter *adapter)
4467{
4468	u32 ctrl_ext, swsm;
4469
4470	/* Let firmware know the driver has taken over */
4471	switch (adapter->hw.mac.type) {
4472	case e1000_82573:
4473		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4474		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4475		    swsm | E1000_SWSM_DRV_LOAD);
4476		break;
4477	case e1000_82571:
4478	case e1000_82572:
4479	case e1000_80003es2lan:
4480	case e1000_ich8lan:
4481	case e1000_ich9lan:
4482		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4483		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4484		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4485		break;
4486	default:
4487		break;
4488	}
4489}
4490
4491/*
4492 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4493 * For ASF and Pass Through versions of f/w this means that the
4494 * driver is no longer loaded. For AMT version (only with 82573) i
4495 * of the f/w this means that the network i/f is closed.
4496 *
4497 */
4498static void
4499em_release_hw_control(struct adapter *adapter)
4500{
4501	u32 ctrl_ext, swsm;
4502
4503	/* Let firmware taken over control of h/w */
4504	switch (adapter->hw.mac.type) {
4505	case e1000_82573:
4506		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4507		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4508		    swsm & ~E1000_SWSM_DRV_LOAD);
4509		break;
4510	case e1000_82571:
4511	case e1000_82572:
4512	case e1000_80003es2lan:
4513	case e1000_ich8lan:
4514	case e1000_ich9lan:
4515		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4516		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4517		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4518		break;
4519	default:
4520		break;
4521
4522	}
4523}
4524
4525static int
4526em_is_valid_ether_addr(uint8_t *addr)
4527{
4528	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4529
4530	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4531		return (FALSE);
4532	}
4533
4534	return (TRUE);
4535}
4536
4537/*
4538 * NOTE: the following routines using the e1000
4539 * 	naming style are provided to the shared
4540 *	code which expects that rather than 'em'
4541 */
4542
4543void
4544e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4545{
4546	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4547}
4548
4549void
4550e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4551{
4552	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4553}
4554
4555void
4556e1000_pci_set_mwi(struct e1000_hw *hw)
4557{
4558	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4559	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4560}
4561
4562void
4563e1000_pci_clear_mwi(struct e1000_hw *hw)
4564{
4565	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4566	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4567}
4568
4569/*
4570 * Read the PCI Express capabilities
4571 */
4572int32_t
4573e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4574{
4575	int32_t		error = E1000_SUCCESS;
4576	uint16_t	cap_off;
4577
4578	switch (hw->mac.type) {
4579
4580		case e1000_82571:
4581		case e1000_82572:
4582		case e1000_82573:
4583		case e1000_80003es2lan:
4584			cap_off = 0xE0;
4585			e1000_read_pci_cfg(hw, cap_off + reg, value);
4586			break;
4587		default:
4588			error = ~E1000_NOT_IMPLEMENTED;
4589			break;
4590	}
4591
4592	return (error);
4593}
4594
4595int32_t
4596e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4597{
4598	int32_t error = 0;
4599
4600	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4601	if (hw->dev_spec == NULL)
4602		error = ENOMEM;
4603	bzero(hw->dev_spec, size);
4604
4605	return (error);
4606}
4607
4608void
4609e1000_free_dev_spec_struct(struct e1000_hw *hw)
4610{
4611	if (hw->dev_spec != NULL)
4612		free(hw->dev_spec, M_DEVBUF);
4613	return;
4614}
4615
4616/*
4617 * Enable PCI Wake On Lan capability
4618 */
4619void
4620em_enable_wakeup(device_t dev)
4621{
4622	u16     cap, status;
4623	u8      id;
4624
4625	/* First find the capabilities pointer*/
4626	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4627	/* Read the PM Capabilities */
4628	id = pci_read_config(dev, cap, 1);
4629	if (id != PCIY_PMG)     /* Something wrong */
4630		return;
4631	/* OK, we have the power capabilities, so
4632	   now get the status register */
4633	cap += PCIR_POWER_STATUS;
4634	status = pci_read_config(dev, cap, 2);
4635	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4636	pci_write_config(dev, cap, status, 2);
4637	return;
4638}
4639
4640
4641/*********************************************************************
4642* 82544 Coexistence issue workaround.
4643*    There are 2 issues.
4644*       1. Transmit Hang issue.
4645*    To detect this issue, following equation can be used...
4646*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4647*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4648*
4649*       2. DAC issue.
4650*    To detect this issue, following equation can be used...
4651*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4652*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4653*
4654*
4655*    WORKAROUND:
4656*	  Make sure we do not have ending address
4657*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4658*
4659*************************************************************************/
4660static uint32_t
4661em_fill_descriptors (bus_addr_t address, uint32_t length,
4662		PDESC_ARRAY desc_array)
4663{
4664	/* Since issue is sensitive to length and address.*/
4665	/* Let us first check the address...*/
4666	uint32_t safe_terminator;
4667	if (length <= 4) {
4668		desc_array->descriptor[0].address = address;
4669		desc_array->descriptor[0].length = length;
4670		desc_array->elements = 1;
4671		return (desc_array->elements);
4672	}
4673	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4674	    (length & 0xF)) & 0xF);
4675	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4676	if (safe_terminator == 0   ||
4677	(safe_terminator > 4   &&
4678	safe_terminator < 9)   ||
4679	(safe_terminator > 0xC &&
4680	safe_terminator <= 0xF)) {
4681		desc_array->descriptor[0].address = address;
4682		desc_array->descriptor[0].length = length;
4683		desc_array->elements = 1;
4684		return (desc_array->elements);
4685	}
4686
4687	desc_array->descriptor[0].address = address;
4688	desc_array->descriptor[0].length = length - 4;
4689	desc_array->descriptor[1].address = address + (length - 4);
4690	desc_array->descriptor[1].length = 4;
4691	desc_array->elements = 2;
4692	return (desc_array->elements);
4693}
4694
4695/**********************************************************************
4696 *
4697 *  Update the board statistics counters.
4698 *
4699 **********************************************************************/
4700static void
4701em_update_stats_counters(struct adapter *adapter)
4702{
4703	struct ifnet   *ifp;
4704
4705	if(adapter->hw.media_type == e1000_media_type_copper ||
4706	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4707		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4708		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4709	}
4710	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4711	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4712	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4713	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4714
4715	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4716	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4717	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4718	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4719	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4720	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4721	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4722	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4723	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4724	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4725	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4726	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4727	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4728	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4729	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4730	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4731	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4732	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4733	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4734	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4735
4736	/* For the 64-bit byte counters the low dword must be read first. */
4737	/* Both registers clear on the read of the high dword */
4738
4739	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4740	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4741	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4742	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4743
4744	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4745	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4746	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4747	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4748	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4749
4750	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4751	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4752	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4753	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4754
4755	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4756	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4757	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4758	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4759	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4760	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4761	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4762	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4763	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4764	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4765
4766	if (adapter->hw.mac.type >= e1000_82543) {
4767		adapter->stats.algnerrc +=
4768		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4769		adapter->stats.rxerrc +=
4770		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4771		adapter->stats.tncrs +=
4772		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4773		adapter->stats.cexterr +=
4774		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4775		adapter->stats.tsctc +=
4776		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4777		adapter->stats.tsctfc +=
4778		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4779	}
4780	ifp = adapter->ifp;
4781
4782	ifp->if_collisions = adapter->stats.colc;
4783
4784	/* Rx Errors */
4785	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4786	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4787	    adapter->stats.ruc + adapter->stats.roc +
4788	    adapter->stats.mpc + adapter->stats.cexterr;
4789
4790	/* Tx Errors */
4791	ifp->if_oerrors = adapter->stats.ecol +
4792	    adapter->stats.latecol + adapter->watchdog_events;
4793}
4794
4795
4796/**********************************************************************
4797 *
4798 *  This routine is called only when em_display_debug_stats is enabled.
4799 *  This routine provides a way to take a look at important statistics
4800 *  maintained by the driver and hardware.
4801 *
4802 **********************************************************************/
4803static void
4804em_print_debug_info(struct adapter *adapter)
4805{
4806	device_t dev = adapter->dev;
4807	uint8_t *hw_addr = adapter->hw.hw_addr;
4808
4809	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4810	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4811	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4812	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4813	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4814	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4815	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4816	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4817	    adapter->hw.mac.fc_high_water,
4818	    adapter->hw.mac.fc_low_water);
4819	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4820	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4821	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4822	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4823	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4824	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4825	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4826	    (long long)adapter->tx_fifo_wrk_cnt,
4827	    (long long)adapter->tx_fifo_reset_cnt);
4828	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4829	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4830	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4831	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4832	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4833	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4834	device_printf(dev, "Num Tx descriptors avail = %d\n",
4835	    adapter->num_tx_desc_avail);
4836	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4837	    adapter->no_tx_desc_avail1);
4838	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4839	    adapter->no_tx_desc_avail2);
4840	device_printf(dev, "Std mbuf failed = %ld\n",
4841	    adapter->mbuf_alloc_failed);
4842	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4843	    adapter->mbuf_cluster_failed);
4844	device_printf(dev, "Driver dropped packets = %ld\n",
4845	    adapter->dropped_pkts);
4846	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4847		adapter->no_tx_dma_setup);
4848}
4849
4850static void
4851em_print_hw_stats(struct adapter *adapter)
4852{
4853	device_t dev = adapter->dev;
4854
4855	device_printf(dev, "Excessive collisions = %lld\n",
4856	    (long long)adapter->stats.ecol);
4857#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4858	device_printf(dev, "Symbol errors = %lld\n",
4859	    (long long)adapter->stats.symerrs);
4860#endif
4861	device_printf(dev, "Sequence errors = %lld\n",
4862	    (long long)adapter->stats.sec);
4863	device_printf(dev, "Defer count = %lld\n",
4864	    (long long)adapter->stats.dc);
4865	device_printf(dev, "Missed Packets = %lld\n",
4866	    (long long)adapter->stats.mpc);
4867	device_printf(dev, "Receive No Buffers = %lld\n",
4868	    (long long)adapter->stats.rnbc);
4869	/* RLEC is inaccurate on some hardware, calculate our own. */
4870	device_printf(dev, "Receive Length Errors = %lld\n",
4871	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4872	device_printf(dev, "Receive errors = %lld\n",
4873	    (long long)adapter->stats.rxerrc);
4874	device_printf(dev, "Crc errors = %lld\n",
4875	    (long long)adapter->stats.crcerrs);
4876	device_printf(dev, "Alignment errors = %lld\n",
4877	    (long long)adapter->stats.algnerrc);
4878	device_printf(dev, "Carrier extension errors = %lld\n",
4879	    (long long)adapter->stats.cexterr);
4880	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4881	device_printf(dev, "watchdog timeouts = %ld\n",
4882	    adapter->watchdog_events);
4883	device_printf(dev, "XON Rcvd = %lld\n",
4884	    (long long)adapter->stats.xonrxc);
4885	device_printf(dev, "XON Xmtd = %lld\n",
4886	    (long long)adapter->stats.xontxc);
4887	device_printf(dev, "XOFF Rcvd = %lld\n",
4888	    (long long)adapter->stats.xoffrxc);
4889	device_printf(dev, "XOFF Xmtd = %lld\n",
4890	    (long long)adapter->stats.xofftxc);
4891	device_printf(dev, "Good Packets Rcvd = %lld\n",
4892	    (long long)adapter->stats.gprc);
4893	device_printf(dev, "Good Packets Xmtd = %lld\n",
4894	    (long long)adapter->stats.gptc);
4895	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4896	    (long long)adapter->stats.tsctc);
4897	device_printf(dev, "TSO Contexts Failed = %lld\n",
4898	    (long long)adapter->stats.tsctfc);
4899}
4900
4901static int
4902em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4903{
4904	struct adapter *adapter;
4905	int error;
4906	int result;
4907
4908	result = -1;
4909	error = sysctl_handle_int(oidp, &result, 0, req);
4910
4911	if (error || !req->newptr)
4912		return (error);
4913
4914	if (result == 1) {
4915		adapter = (struct adapter *)arg1;
4916		em_print_debug_info(adapter);
4917	}
4918
4919	return (error);
4920}
4921
4922
4923static int
4924em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4925{
4926	struct adapter *adapter;
4927	int error;
4928	int result;
4929
4930	result = -1;
4931	error = sysctl_handle_int(oidp, &result, 0, req);
4932
4933	if (error || !req->newptr)
4934		return (error);
4935
4936	if (result == 1) {
4937		adapter = (struct adapter *)arg1;
4938		em_print_hw_stats(adapter);
4939	}
4940
4941	return (error);
4942}
4943
4944static int
4945em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4946{
4947	struct em_int_delay_info *info;
4948	struct adapter *adapter;
4949	uint32_t regval;
4950	int error;
4951	int usecs;
4952	int ticks;
4953
4954	info = (struct em_int_delay_info *)arg1;
4955	usecs = info->value;
4956	error = sysctl_handle_int(oidp, &usecs, 0, req);
4957	if (error != 0 || req->newptr == NULL)
4958		return (error);
4959	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4960		return (EINVAL);
4961	info->value = usecs;
4962	ticks = EM_USECS_TO_TICKS(usecs);
4963
4964	adapter = info->adapter;
4965
4966	EM_LOCK(adapter);
4967	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4968	regval = (regval & ~0xffff) | (ticks & 0xffff);
4969	/* Handle a few special cases. */
4970	switch (info->offset) {
4971	case E1000_RDTR:
4972		break;
4973	case E1000_TIDV:
4974		if (ticks == 0) {
4975			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4976			/* Don't write 0 into the TIDV register. */
4977			regval++;
4978		} else
4979			if (adapter->hw.mac.type != e1000_82575)
4980				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4981		break;
4982	}
4983	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4984	EM_UNLOCK(adapter);
4985	return (0);
4986}
4987
4988static void
4989em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4990	const char *description, struct em_int_delay_info *info,
4991	int offset, int value)
4992{
4993	info->adapter = adapter;
4994	info->offset = offset;
4995	info->value = value;
4996	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4997	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4998	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4999	    info, 0, em_sysctl_int_delay, "I", description);
5000}
5001
5002#ifndef DEVICE_POLLING
5003static void
5004em_add_rx_process_limit(struct adapter *adapter, const char *name,
5005	const char *description, int *limit, int value)
5006{
5007	*limit = value;
5008	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5009	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5010	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5011}
5012#endif
5013