if_igb.c revision 176679
1254721Semaste/******************************************************************************
2254721Semaste
3254721Semaste  Copyright (c) 2001-2008, Intel Corporation
4254721Semaste  All rights reserved.
5254721Semaste
6254721Semaste  Redistribution and use in source and binary forms, with or without
7254721Semaste  modification, are permitted provided that the following conditions are met:
8254721Semaste
9254721Semaste   1. Redistributions of source code must retain the above copyright notice,
10254721Semaste      this list of conditions and the following disclaimer.
11254721Semaste
12254721Semaste   2. Redistributions in binary form must reproduce the above copyright
13254721Semaste      notice, this list of conditions and the following disclaimer in the
14254721Semaste      documentation and/or other materials provided with the distribution.
15254721Semaste
16254721Semaste   3. Neither the name of the Intel Corporation nor the names of its
17254721Semaste      contributors may be used to endorse or promote products derived from
18254721Semaste      this software without specific prior written permission.
19254721Semaste
20254721Semaste  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21254721Semaste  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22254721Semaste  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23254721Semaste  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24254721Semaste  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25254721Semaste  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26254721Semaste  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27254721Semaste  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28254721Semaste  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29254721Semaste  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30254721Semaste  POSSIBILITY OF SUCH DAMAGE.
31254721Semaste
32254721Semaste******************************************************************************/
33254721Semaste/*$FreeBSD: head/sys/dev/igb/if_igb.c 176679 2008-03-01 03:25:33Z jfv $*/
34254721Semaste
35254721Semaste#ifdef HAVE_KERNEL_OPTION_HEADERS
36254721Semaste#include "opt_device_polling.h"
37254721Semaste#endif
38254721Semaste
39254721Semaste#include <sys/param.h>
40254721Semaste#include <sys/systm.h>
41254721Semaste#include <sys/bus.h>
42254721Semaste#include <sys/endian.h>
43254721Semaste#include <sys/kernel.h>
44254721Semaste#include <sys/kthread.h>
45254721Semaste#include <sys/malloc.h>
46254721Semaste#include <sys/mbuf.h>
47254721Semaste#include <sys/module.h>
48254721Semaste#include <sys/rman.h>
49254721Semaste#include <sys/socket.h>
50254721Semaste#include <sys/sockio.h>
51254721Semaste#include <sys/sysctl.h>
52254721Semaste#include <sys/taskqueue.h>
53254721Semaste#include <sys/pcpu.h>
54254721Semaste#include <machine/bus.h>
55254721Semaste#include <machine/resource.h>
56254721Semaste
57254721Semaste#include <net/bpf.h>
58254721Semaste#include <net/ethernet.h>
59254721Semaste#include <net/if.h>
60254721Semaste#include <net/if_arp.h>
61254721Semaste#include <net/if_dl.h>
62254721Semaste#include <net/if_media.h>
63254721Semaste
64254721Semaste#include <net/if_types.h>
65254721Semaste#include <net/if_vlan_var.h>
66254721Semaste
67254721Semaste#include <netinet/in_systm.h>
68254721Semaste#include <netinet/in.h>
69254721Semaste#include <netinet/if_ether.h>
70254721Semaste#include <netinet/ip.h>
71254721Semaste#include <netinet/ip6.h>
72254721Semaste#include <netinet/tcp.h>
73254721Semaste#include <netinet/udp.h>
74254721Semaste
75254721Semaste#include <machine/in_cksum.h>
76254721Semaste#include <dev/pci/pcivar.h>
77254721Semaste#include <dev/pci/pcireg.h>
78254721Semaste
79254721Semaste#include "e1000_api.h"
80254721Semaste#include "e1000_82575.h"
81254721Semaste#include "if_igb.h"
82254721Semaste
83254721Semaste/*********************************************************************
84254721Semaste *  Set this to one to display debug statistics
85254721Semaste *********************************************************************/
86254721Semasteint	igb_display_debug_stats = 0;
87254721Semaste
88254721Semaste/*********************************************************************
89254721Semaste *  Driver version:
90254721Semaste *********************************************************************/
91254721Semastechar igb_driver_version[] = "1.1.4";
92254721Semaste
93254721Semaste
94254721Semaste/*********************************************************************
95254721Semaste *  PCI Device ID Table
96254721Semaste *
97254721Semaste *  Used by probe to select devices to load on
98254721Semaste *  Last field stores an index into e1000_strings
99254721Semaste *  Last entry must be all 0s
100254721Semaste *
101254721Semaste *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102254721Semaste *********************************************************************/
103254721Semaste
104254721Semastestatic igb_vendor_info_t igb_vendor_info_array[] =
105254721Semaste{
106254721Semaste	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
107254721Semaste	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
108254721Semaste						PCI_ANY_ID, PCI_ANY_ID, 0},
109254721Semaste	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
110254721Semaste						PCI_ANY_ID, PCI_ANY_ID, 0},
111254721Semaste	/* required last entry */
112254721Semaste	{ 0, 0, 0, 0, 0}
113254721Semaste};
114254721Semaste
115254721Semaste/*********************************************************************
116254721Semaste *  Table of branding strings for all supported NICs.
117254721Semaste *********************************************************************/
118254721Semaste
119254721Semastestatic char *igb_strings[] = {
120254721Semaste	"Intel(R) PRO/1000 Network Connection"
121254721Semaste};
122254721Semaste
123254721Semaste/*********************************************************************
124254721Semaste *  Function prototypes
125254721Semaste *********************************************************************/
126254721Semastestatic int	igb_probe(device_t);
127254721Semastestatic int	igb_attach(device_t);
128254721Semastestatic int	igb_detach(device_t);
129254721Semastestatic int	igb_shutdown(device_t);
130254721Semastestatic int	igb_suspend(device_t);
131254721Semastestatic int	igb_resume(device_t);
132254721Semastestatic void	igb_start(struct ifnet *);
133254721Semastestatic void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
134254721Semastestatic int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135254721Semastestatic void	igb_watchdog(struct adapter *);
136254721Semastestatic void	igb_init(void *);
137254721Semastestatic void	igb_init_locked(struct adapter *);
138254721Semastestatic void	igb_stop(void *);
139254721Semastestatic void	igb_media_status(struct ifnet *, struct ifmediareq *);
140254721Semastestatic int	igb_media_change(struct ifnet *);
141254721Semastestatic void	igb_identify_hardware(struct adapter *);
142254721Semastestatic int	igb_allocate_pci_resources(struct adapter *);
143254721Semastestatic int	igb_allocate_msix(struct adapter *);
144254721Semastestatic int	igb_allocate_legacy(struct adapter *);
145254721Semastestatic int	igb_setup_msix(struct adapter *);
146254721Semastestatic void	igb_free_pci_resources(struct adapter *);
147254721Semastestatic void	igb_local_timer(void *);
148254721Semastestatic int	igb_hardware_init(struct adapter *);
149254721Semastestatic void	igb_setup_interface(device_t, struct adapter *);
150254721Semastestatic int	igb_allocate_queues(struct adapter *);
151static void	igb_configure_queues(struct adapter *);
152
153static int	igb_allocate_transmit_buffers(struct tx_ring *);
154static void	igb_setup_transmit_structures(struct adapter *);
155static void	igb_setup_transmit_ring(struct tx_ring *);
156static void	igb_initialize_transmit_units(struct adapter *);
157static void	igb_free_transmit_structures(struct adapter *);
158static void	igb_free_transmit_buffers(struct tx_ring *);
159
160static int	igb_allocate_receive_buffers(struct rx_ring *);
161static int	igb_setup_receive_structures(struct adapter *);
162static int	igb_setup_receive_ring(struct rx_ring *);
163static void	igb_initialize_receive_units(struct adapter *);
164static void	igb_free_receive_structures(struct adapter *);
165static void	igb_free_receive_buffers(struct rx_ring *);
166
167static void	igb_enable_intr(struct adapter *);
168static void	igb_disable_intr(struct adapter *);
169static void	igb_update_stats_counters(struct adapter *);
170static bool	igb_txeof(struct tx_ring *);
171static bool	igb_rxeof(struct rx_ring *, int);
172#ifndef __NO_STRICT_ALIGNMENT
173static int	igb_fixup_rx(struct rx_ring *);
174#endif
175static void	igb_rx_checksum(u32, struct mbuf *);
176static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
177static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
178static void	igb_set_promisc(struct adapter *);
179static void	igb_disable_promisc(struct adapter *);
180static void	igb_set_multi(struct adapter *);
181static void	igb_print_hw_stats(struct adapter *);
182static void	igb_update_link_status(struct adapter *);
183static int	igb_get_buf(struct rx_ring *, int);
184static void	igb_enable_hw_vlans(struct adapter *);
185static int	igb_xmit(struct tx_ring *, struct mbuf **);
186static int	igb_dma_malloc(struct adapter *, bus_size_t,
187		    struct igb_dma_alloc *, int);
188static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
189static void	igb_print_debug_info(struct adapter *);
190static void	igb_print_nvm_info(struct adapter *);
191static int 	igb_is_valid_ether_addr(u8 *);
192static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
193static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
194static int	igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
195static void	igb_add_int_delay_sysctl(struct adapter *, const char *,
196		    const char *, struct igb_int_delay_info *, int, int);
197/* Management and WOL Support */
198static void	igb_init_manageability(struct adapter *);
199static void	igb_release_manageability(struct adapter *);
200static void     igb_get_hw_control(struct adapter *);
201static void     igb_release_hw_control(struct adapter *);
202static void     igb_enable_wakeup(device_t);
203
204
205static int	igb_irq_fast(void *);
206static void	igb_add_rx_process_limit(struct adapter *, const char *,
207		    const char *, int *, int);
208static void	igb_handle_rxtx(void *context, int pending);
209static void	igb_handle_tx(void *context, int pending);
210static void	igb_handle_rx(void *context, int pending);
211static void	igb_handle_link(void *context, int pending);
212
213/* These are MSIX only irq handlers */
214static void	igb_msix_rx(void *);
215static void	igb_msix_tx(void *);
216static void	igb_msix_link(void *);
217
218#ifdef DEVICE_POLLING
219static poll_handler_t igb_poll;
220#endif
221
222/*********************************************************************
223 *  FreeBSD Device Interface Entry Points
224 *********************************************************************/
225
226static device_method_t igb_methods[] = {
227	/* Device interface */
228	DEVMETHOD(device_probe, igb_probe),
229	DEVMETHOD(device_attach, igb_attach),
230	DEVMETHOD(device_detach, igb_detach),
231	DEVMETHOD(device_shutdown, igb_shutdown),
232	DEVMETHOD(device_suspend, igb_suspend),
233	DEVMETHOD(device_resume, igb_resume),
234	{0, 0}
235};
236
237static driver_t igb_driver = {
238	"igb", igb_methods, sizeof(struct adapter),
239};
240
241static devclass_t igb_devclass;
242DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
243MODULE_DEPEND(igb, pci, 1, 1, 1);
244MODULE_DEPEND(igb, ether, 1, 1, 1);
245
246/*********************************************************************
247 *  Tunable default values.
248 *********************************************************************/
249
250#define IGB_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
251#define IGB_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
252#define M_TSO_LEN			66
253
254/* Allow common code without TSO */
255#ifndef CSUM_TSO
256#define CSUM_TSO	0
257#endif
258
259static int igb_tx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TIDV);
260static int igb_rx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RDTR);
261static int igb_tx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TADV);
262static int igb_rx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RADV);
263static int igb_rxd = IGB_DEFAULT_RXD;
264static int igb_txd = IGB_DEFAULT_TXD;
265static int igb_smart_pwr_down = FALSE;
266TUNABLE_INT("hw.igb.tx_int_delay", &igb_tx_int_delay_dflt);
267TUNABLE_INT("hw.igb.rx_int_delay", &igb_rx_int_delay_dflt);
268TUNABLE_INT("hw.igb.tx_abs_int_delay", &igb_tx_abs_int_delay_dflt);
269TUNABLE_INT("hw.igb.rx_abs_int_delay", &igb_rx_abs_int_delay_dflt);
270TUNABLE_INT("hw.igb.rxd", &igb_rxd);
271TUNABLE_INT("hw.igb.txd", &igb_txd);
272TUNABLE_INT("hw.igb.smart_pwr_down", &igb_smart_pwr_down);
273
274/* These auto configure if set to 0, based on number of cpus */
275extern int mp_ncpus;
276static int igb_tx_queues = 1;
277static int igb_rx_queues = 1;
278TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
279TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
280
281/* How many packets rxeof tries to clean at a time */
282static int igb_rx_process_limit = 100;
283TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
284
285/*********************************************************************
286 *  Device identification routine
287 *
288 *  igb_probe determines if the driver should be loaded on
289 *  adapter based on PCI vendor/device id of the adapter.
290 *
291 *  return BUS_PROBE_DEFAULT on success, positive on failure
292 *********************************************************************/
293
294static int
295igb_probe(device_t dev)
296{
297	char		adapter_name[60];
298	uint16_t	pci_vendor_id = 0;
299	uint16_t	pci_device_id = 0;
300	uint16_t	pci_subvendor_id = 0;
301	uint16_t	pci_subdevice_id = 0;
302	igb_vendor_info_t *ent;
303
304	INIT_DEBUGOUT("igb_probe: begin");
305
306	pci_vendor_id = pci_get_vendor(dev);
307	if (pci_vendor_id != IGB_VENDOR_ID)
308		return (ENXIO);
309
310	pci_device_id = pci_get_device(dev);
311	pci_subvendor_id = pci_get_subvendor(dev);
312	pci_subdevice_id = pci_get_subdevice(dev);
313
314	ent = igb_vendor_info_array;
315	while (ent->vendor_id != 0) {
316		if ((pci_vendor_id == ent->vendor_id) &&
317		    (pci_device_id == ent->device_id) &&
318
319		    ((pci_subvendor_id == ent->subvendor_id) ||
320		    (ent->subvendor_id == PCI_ANY_ID)) &&
321
322		    ((pci_subdevice_id == ent->subdevice_id) ||
323		    (ent->subdevice_id == PCI_ANY_ID))) {
324			sprintf(adapter_name, "%s %s",
325				igb_strings[ent->index],
326				igb_driver_version);
327			device_set_desc_copy(dev, adapter_name);
328			return (BUS_PROBE_DEFAULT);
329		}
330		ent++;
331	}
332
333	return (ENXIO);
334}
335
336/*********************************************************************
337 *  Device initialization routine
338 *
339 *  The attach entry point is called when the driver is being loaded.
340 *  This routine identifies the type of hardware, allocates all resources
341 *  and initializes the hardware.
342 *
343 *  return 0 on success, positive on failure
344 *********************************************************************/
345
346static int
347igb_attach(device_t dev)
348{
349	struct adapter	*adapter;
350	int		error = 0;
351	u16		eeprom_data;
352
353	INIT_DEBUGOUT("igb_attach: begin");
354
355	adapter = device_get_softc(dev);
356	adapter->dev = adapter->osdep.dev = dev;
357	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
358
359	/* SYSCTL stuff */
360	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
361	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
362	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
363	    igb_sysctl_debug_info, "I", "Debug Information");
364
365	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
366	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
367	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
368	    igb_sysctl_stats, "I", "Statistics");
369
370	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
371
372	/* Determine hardware and mac info */
373	igb_identify_hardware(adapter);
374
375	/* Setup PCI resources */
376	if (igb_allocate_pci_resources(adapter)) {
377		device_printf(dev, "Allocation of PCI resources failed\n");
378		error = ENXIO;
379		goto err_pci;
380	}
381
382	/* Do Shared Code initialization */
383	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
384		device_printf(dev, "Setup of Shared code failed\n");
385		error = ENXIO;
386		goto err_pci;
387	}
388
389	e1000_get_bus_info(&adapter->hw);
390
391	/* Set up some sysctls for the tunable interrupt delays */
392	igb_add_int_delay_sysctl(adapter, "rx_int_delay",
393	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
394	    E1000_REGISTER(&adapter->hw, E1000_RDTR), igb_rx_int_delay_dflt);
395	igb_add_int_delay_sysctl(adapter, "tx_int_delay",
396	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
397	    E1000_REGISTER(&adapter->hw, E1000_TIDV), igb_tx_int_delay_dflt);
398	igb_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
399	    "receive interrupt delay limit in usecs",
400	    &adapter->rx_abs_int_delay,
401	    E1000_REGISTER(&adapter->hw, E1000_RADV),
402	    igb_rx_abs_int_delay_dflt);
403	igb_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
404	    "transmit interrupt delay limit in usecs",
405	    &adapter->tx_abs_int_delay,
406	    E1000_REGISTER(&adapter->hw, E1000_TADV),
407	    igb_tx_abs_int_delay_dflt);
408
409	/* Sysctls for limiting the amount of work done in the taskqueue */
410	igb_add_rx_process_limit(adapter, "rx_processing_limit",
411	    "max number of rx packets to process", &adapter->rx_process_limit,
412	    igb_rx_process_limit);
413
414	/*
415	 * Validate number of transmit and receive descriptors. It
416	 * must not exceed hardware maximum, and must be multiple
417	 * of E1000_DBA_ALIGN.
418	 */
419	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
420	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
421		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
422		    IGB_DEFAULT_TXD, igb_txd);
423		adapter->num_tx_desc = IGB_DEFAULT_TXD;
424	} else
425		adapter->num_tx_desc = igb_txd;
426	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
427	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
428		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
429		    IGB_DEFAULT_RXD, igb_rxd);
430		adapter->num_rx_desc = IGB_DEFAULT_RXD;
431	} else
432		adapter->num_rx_desc = igb_rxd;
433
434	adapter->hw.mac.autoneg = DO_AUTO_NEG;
435	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
436	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
437	adapter->rx_buffer_len = 2048;
438
439	/* Copper options */
440	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
441		adapter->hw.phy.mdix = AUTO_ALL_MODES;
442		adapter->hw.phy.disable_polarity_correction = FALSE;
443		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
444	}
445
446	/*
447	 * Set the frame limits assuming
448	 * standard ethernet sized frames.
449	 */
450	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
451	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
452
453	/*
454	 * This controls when hardware reports transmit completion
455	 * status.
456	 */
457	adapter->hw.mac.report_tx_early = 1;
458
459	/*
460	** Allocate and Setup Queues
461	*/
462	if (igb_allocate_queues(adapter)) {
463		error = ENOMEM;
464		goto err_hw_init;
465	}
466
467	/* Make sure we have a good EEPROM before we read from it */
468	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
469		/*
470		** Some PCI-E parts fail the first check due to
471		** the link being in sleep state, call it again,
472		** if it fails a second time its a real issue.
473		*/
474		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
475			device_printf(dev,
476			    "The EEPROM Checksum Is Not Valid\n");
477			error = EIO;
478			goto err_late;
479		}
480	}
481
482	/* Initialize the hardware */
483	if (igb_hardware_init(adapter)) {
484		device_printf(dev, "Unable to initialize the hardware\n");
485		error = EIO;
486		goto err_late;
487	}
488
489	/* Copy the permanent MAC address out of the EEPROM */
490	if (e1000_read_mac_addr(&adapter->hw) < 0) {
491		device_printf(dev, "EEPROM read error while reading MAC"
492		    " address\n");
493		error = EIO;
494		goto err_late;
495	}
496
497	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
498		device_printf(dev, "Invalid MAC address\n");
499		error = EIO;
500		goto err_late;
501	}
502
503	/*
504	** Configure Interrupts
505	*/
506	if (adapter->msix > 1) /* MSIX */
507		error = igb_allocate_msix(adapter);
508	else /* MSI or Legacy */
509		error = igb_allocate_legacy(adapter);
510	if (error)
511		goto err_late;
512
513	/* Setup OS specific network interface */
514	igb_setup_interface(dev, adapter);
515
516	/* Initialize statistics */
517	igb_update_stats_counters(adapter);
518
519	adapter->hw.mac.get_link_status = 1;
520	igb_update_link_status(adapter);
521
522	/* Indicate SOL/IDER usage */
523	if (e1000_check_reset_block(&adapter->hw))
524		device_printf(dev,
525		    "PHY reset is blocked due to SOL/IDER session.\n");
526
527	/* Determine if we have to control management hardware */
528	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
529
530	/*
531	 * Setup Wake-on-Lan
532	 */
533	/* APME bit in EEPROM is mapped to WUC.APME */
534	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
535	if (eeprom_data)
536		adapter->wol = E1000_WUFC_MAG;
537
538	/* Tell the stack that the interface is not active */
539	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
540
541	INIT_DEBUGOUT("igb_attach: end");
542
543	return (0);
544
545err_late:
546	igb_free_transmit_structures(adapter);
547	igb_free_receive_structures(adapter);
548	igb_release_hw_control(adapter);
549err_hw_init:
550	e1000_remove_device(&adapter->hw);
551err_pci:
552	igb_free_pci_resources(adapter);
553	IGB_CORE_LOCK_DESTROY(adapter);
554
555	return (error);
556}
557
558/*********************************************************************
559 *  Device removal routine
560 *
561 *  The detach entry point is called when the driver is being removed.
562 *  This routine stops the adapter and deallocates all the resources
563 *  that were allocated for driver operation.
564 *
565 *  return 0 on success, positive on failure
566 *********************************************************************/
567
568static int
569igb_detach(device_t dev)
570{
571	struct adapter	*adapter = device_get_softc(dev);
572	struct ifnet	*ifp = adapter->ifp;
573
574	INIT_DEBUGOUT("igb_detach: begin");
575
576	/* Make sure VLANS are not using driver */
577	if (adapter->ifp->if_vlantrunk != NULL) {
578		device_printf(dev,"Vlan in use, detach first\n");
579		return (EBUSY);
580	}
581
582#ifdef DEVICE_POLLING
583	if (ifp->if_capenable & IFCAP_POLLING)
584		ether_poll_deregister(ifp);
585#endif
586
587	IGB_CORE_LOCK(adapter);
588	adapter->in_detach = 1;
589	igb_stop(adapter);
590	IGB_CORE_UNLOCK(adapter);
591
592	e1000_phy_hw_reset(&adapter->hw);
593
594	/* Give control back to firmware */
595	igb_release_manageability(adapter);
596	igb_release_hw_control(adapter);
597
598	if (adapter->wol) {
599		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
600		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
601		igb_enable_wakeup(dev);
602	}
603
604	ether_ifdetach(adapter->ifp);
605
606	callout_drain(&adapter->timer);
607
608	igb_free_pci_resources(adapter);
609	bus_generic_detach(dev);
610	if_free(ifp);
611
612	e1000_remove_device(&adapter->hw);
613	igb_free_transmit_structures(adapter);
614	igb_free_receive_structures(adapter);
615
616	IGB_CORE_LOCK_DESTROY(adapter);
617
618	return (0);
619}
620
621/*********************************************************************
622 *
623 *  Shutdown entry point
624 *
625 **********************************************************************/
626
627static int
628igb_shutdown(device_t dev)
629{
630	return igb_suspend(dev);
631}
632
633/*
634 * Suspend/resume device methods.
635 */
636static int
637igb_suspend(device_t dev)
638{
639	struct adapter *adapter = device_get_softc(dev);
640
641	IGB_CORE_LOCK(adapter);
642
643	igb_stop(adapter);
644
645        igb_release_manageability(adapter);
646	igb_release_hw_control(adapter);
647
648        if (adapter->wol) {
649                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
650                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
651                igb_enable_wakeup(dev);
652        }
653
654	IGB_CORE_UNLOCK(adapter);
655
656	return bus_generic_suspend(dev);
657}
658
659static int
660igb_resume(device_t dev)
661{
662	struct adapter *adapter = device_get_softc(dev);
663	struct ifnet *ifp = adapter->ifp;
664
665	IGB_CORE_LOCK(adapter);
666	igb_init_locked(adapter);
667	igb_init_manageability(adapter);
668
669	if ((ifp->if_flags & IFF_UP) &&
670	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
671		igb_start(ifp);
672
673	IGB_CORE_UNLOCK(adapter);
674
675	return bus_generic_resume(dev);
676}
677
678
679/*********************************************************************
680 *  Transmit entry point
681 *
682 *  igb_start is called by the stack to initiate a transmit.
683 *  The driver will remain in this routine as long as there are
684 *  packets to transmit and transmit resources are available.
685 *  In case resources are not available stack is notified and
686 *  the packet is requeued.
687 **********************************************************************/
688
689static void
690igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
691{
692	struct adapter	*adapter = ifp->if_softc;
693	struct mbuf	*m_head;
694
695	IGB_TX_LOCK_ASSERT(txr);
696
697	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
698	    IFF_DRV_RUNNING)
699		return;
700	if (!adapter->link_active)
701		return;
702
703	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
704
705		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
706		if (m_head == NULL)
707			break;
708		/*
709		 *  Encapsulation can modify our pointer, and or make it
710		 *  NULL on failure.  In that event, we can't requeue.
711		 */
712		if (igb_xmit(txr, &m_head)) {
713			if (m_head == NULL)
714				break;
715			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
716			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
717			break;
718		}
719
720		/* Send a copy of the frame to the BPF listener */
721		ETHER_BPF_MTAP(ifp, m_head);
722
723		/* Set timeout in case hardware has problems transmitting. */
724		txr->watchdog_timer = IGB_TX_TIMEOUT;
725	}
726}
727
728static void
729igb_start(struct ifnet *ifp)
730{
731	struct adapter	*adapter = ifp->if_softc;
732	struct tx_ring	*txr;
733	u32		queue = 0;
734
735	/*
736	** This is really just here for testing
737	** TX multiqueue, ultimately what is
738	** needed is the flow support in the stack
739	** and appropriate logic here to deal with
740	** it. -jfv
741	*/
742	if (adapter->num_tx_queues > 1)
743		queue = (curcpu % adapter->num_tx_queues);
744
745	txr = &adapter->tx_rings[queue];
746	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
747		IGB_TX_LOCK(txr);
748		igb_start_locked(txr, ifp);
749		IGB_TX_UNLOCK(txr);
750	}
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  igb_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775		if (ifa->ifa_addr->sa_family == AF_INET) {
776			/*
777			 * XXX
778			 * Since resetting hardware takes a very long time
779			 * and results in link renegotiation we only
780			 * initialize the hardware only when it is absolutely
781			 * required.
782			 */
783			ifp->if_flags |= IFF_UP;
784			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
785				IGB_CORE_LOCK(adapter);
786				igb_init_locked(adapter);
787				IGB_CORE_UNLOCK(adapter);
788			}
789			arp_ifinit(ifp, ifa);
790		} else
791			error = ether_ioctl(ifp, command, data);
792		break;
793	case SIOCSIFMTU:
794	    {
795		int max_frame_size;
796
797		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
798
799		IGB_CORE_LOCK(adapter);
800		max_frame_size = 9234;
801		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
802		    ETHER_CRC_LEN) {
803			IGB_CORE_UNLOCK(adapter);
804			error = EINVAL;
805			break;
806		}
807
808		ifp->if_mtu = ifr->ifr_mtu;
809		adapter->max_frame_size =
810		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
811		igb_init_locked(adapter);
812		IGB_CORE_UNLOCK(adapter);
813		break;
814	    }
815	case SIOCSIFFLAGS:
816		IOCTL_DEBUGOUT("ioctl rcv'd:\
817		    SIOCSIFFLAGS (Set Interface Flags)");
818		IGB_CORE_LOCK(adapter);
819		if (ifp->if_flags & IFF_UP) {
820			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
821				if ((ifp->if_flags ^ adapter->if_flags) &
822				    IFF_PROMISC) {
823					igb_disable_promisc(adapter);
824					igb_set_promisc(adapter);
825				}
826			} else
827				igb_init_locked(adapter);
828		} else
829			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
830				igb_stop(adapter);
831		adapter->if_flags = ifp->if_flags;
832		IGB_CORE_UNLOCK(adapter);
833		break;
834	case SIOCADDMULTI:
835	case SIOCDELMULTI:
836		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
837		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
838			IGB_CORE_LOCK(adapter);
839			igb_disable_intr(adapter);
840			igb_set_multi(adapter);
841#ifdef DEVICE_POLLING
842			if (!(ifp->if_capenable & IFCAP_POLLING))
843#endif
844				igb_enable_intr(adapter);
845			IGB_CORE_UNLOCK(adapter);
846		}
847		break;
848	case SIOCSIFMEDIA:
849		/* Check SOL/IDER usage */
850		IGB_CORE_LOCK(adapter);
851		if (e1000_check_reset_block(&adapter->hw)) {
852			IGB_CORE_UNLOCK(adapter);
853			device_printf(adapter->dev, "Media change is"
854			    " blocked due to SOL/IDER session.\n");
855			break;
856		}
857		IGB_CORE_UNLOCK(adapter);
858	case SIOCGIFMEDIA:
859		IOCTL_DEBUGOUT("ioctl rcv'd: \
860		    SIOCxIFMEDIA (Get/Set Interface Media)");
861		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
862		break;
863	case SIOCSIFCAP:
864	    {
865		int mask, reinit;
866
867		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
868		reinit = 0;
869		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
870#ifdef DEVICE_POLLING
871		if (mask & IFCAP_POLLING) {
872			if (ifr->ifr_reqcap & IFCAP_POLLING) {
873				error = ether_poll_register(igb_poll, ifp);
874				if (error)
875					return (error);
876				IGB_CORE_LOCK(adapter);
877				igb_disable_intr(adapter);
878				ifp->if_capenable |= IFCAP_POLLING;
879				IGB_CORE_UNLOCK(adapter);
880			} else {
881				error = ether_poll_deregister(ifp);
882				/* Enable interrupt even in error case */
883				IGB_CORE_LOCK(adapter);
884				igb_enable_intr(adapter);
885				ifp->if_capenable &= ~IFCAP_POLLING;
886				IGB_CORE_UNLOCK(adapter);
887			}
888		}
889#endif
890		if (mask & IFCAP_HWCSUM) {
891			ifp->if_capenable ^= IFCAP_HWCSUM;
892			reinit = 1;
893		}
894		if (mask & IFCAP_TSO4) {
895			ifp->if_capenable ^= IFCAP_TSO4;
896			reinit = 1;
897		}
898		if (mask & IFCAP_VLAN_HWTAGGING) {
899			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
900			reinit = 1;
901		}
902		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
903			igb_init(adapter);
904		VLAN_CAPABILITIES(ifp);
905		break;
906	    }
907
908
909	default:
910		error = ether_ioctl(ifp, command, data);
911		break;
912	}
913
914	return (error);
915}
916
917/*********************************************************************
918 *  Watchdog timer:
919 *
920 *  This routine is called from the local timer every second.
921 *  As long as transmit descriptors are being cleaned the value
922 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
923 *  and we then reset the device.
924 *
925 **********************************************************************/
926
927static void
928igb_watchdog(struct adapter *adapter)
929{
930	struct tx_ring	*txr = adapter->tx_rings;
931	bool		tx_hang = FALSE;
932
933	IGB_CORE_LOCK_ASSERT(adapter);
934
935	/*
936	** The timer is set to 5 every time start() queues a packet.
937	** Then txeof keeps resetting it as long as it cleans at
938	** least one descriptor.
939	** Finally, anytime all descriptors are clean the timer is
940	** set to 0.
941	**
942	** With TX Multiqueue we need to check every queue's timer,
943	** if any time out we do the reset.
944	*/
945	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
946		if (txr->watchdog_timer == 0 ||
947		    (--txr->watchdog_timer))
948			continue;
949		else {
950			tx_hang = TRUE;
951			break;
952		}
953	}
954	if (tx_hang == FALSE)
955		return;
956
957	/* If we are in this routine because of pause frames, then
958	 * don't reset the hardware.
959	 */
960	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
961	    E1000_STATUS_TXOFF) {
962		txr = adapter->tx_rings; /* reset pointer */
963		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
964			txr->watchdog_timer = IGB_TX_TIMEOUT;
965		return;
966	}
967
968	if (e1000_check_for_link(&adapter->hw) == 0)
969		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
970
971	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
972		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
973		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
974		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
975		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
976		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
977		    txr->next_to_clean);
978	}
979
980	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
981	adapter->watchdog_events++;
982
983	igb_init_locked(adapter);
984}
985
986/*********************************************************************
987 *  Init entry point
988 *
989 *  This routine is used in two ways. It is used by the stack as
990 *  init entry point in network interface structure. It is also used
991 *  by the driver as a hw/sw initialization routine to get to a
992 *  consistent state.
993 *
994 *  return 0 on success, positive on failure
995 **********************************************************************/
996
997static void
998igb_init_locked(struct adapter *adapter)
999{
1000	struct ifnet	*ifp = adapter->ifp;
1001	device_t	dev = adapter->dev;
1002	u32		pba = 0;
1003
1004	INIT_DEBUGOUT("igb_init: begin");
1005
1006	IGB_CORE_LOCK_ASSERT(adapter);
1007
1008	igb_stop(adapter);
1009
1010	/*
1011	 * Packet Buffer Allocation (PBA)
1012	 * Writing PBA sets the receive portion of the buffer
1013	 * the remainder is used for the transmit buffer.
1014	 */
1015	if (adapter->hw.mac.type == e1000_82575) {
1016		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1017		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1018		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1019	}
1020
1021	/* Get the latest mac address, User can use a LAA */
1022        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1023              ETHER_ADDR_LEN);
1024
1025	/* Put the address into the Receive Address Array */
1026	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1027
1028	/* Initialize the hardware */
1029	if (igb_hardware_init(adapter)) {
1030		device_printf(dev, "Unable to initialize the hardware\n");
1031		return;
1032	}
1033	igb_update_link_status(adapter);
1034
1035	/* Setup VLAN support, basic and offload if available */
1036	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1037	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1038		igb_enable_hw_vlans(adapter);
1039
1040	/* Set hardware offload abilities */
1041	ifp->if_hwassist = 0;
1042	if (ifp->if_capenable & IFCAP_TXCSUM)
1043		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1044	if (ifp->if_capenable & IFCAP_TSO4)
1045		ifp->if_hwassist |= CSUM_TSO;
1046
1047	/* Configure for OS presence */
1048	igb_init_manageability(adapter);
1049
1050	/* Prepare transmit descriptors and buffers */
1051	igb_setup_transmit_structures(adapter);
1052	igb_initialize_transmit_units(adapter);
1053
1054	/* Setup Multicast table */
1055	igb_set_multi(adapter);
1056
1057	/* Prepare receive descriptors and buffers */
1058	if (igb_setup_receive_structures(adapter)) {
1059		device_printf(dev, "Could not setup receive structures\n");
1060		igb_stop(adapter);
1061		return;
1062	}
1063	igb_initialize_receive_units(adapter);
1064
1065	/* Don't lose promiscuous settings */
1066	igb_set_promisc(adapter);
1067
1068	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1069	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1070
1071	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1072	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1073
1074	if (adapter->msix > 1) /* Set up queue routing */
1075		igb_configure_queues(adapter);
1076
1077#ifdef DEVICE_POLLING
1078	/*
1079	 * Only enable interrupts if we are not polling, make sure
1080	 * they are off otherwise.
1081	 */
1082	if (ifp->if_capenable & IFCAP_POLLING)
1083		igb_disable_intr(adapter);
1084	else
1085#endif /* DEVICE_POLLING */
1086	{
1087		E1000_READ_REG(&adapter->hw, E1000_ICR);
1088		igb_enable_intr(adapter);
1089	}
1090
1091
1092	/* Don't reset the phy next time init gets called */
1093	adapter->hw.phy.reset_disable = TRUE;
1094}
1095
1096static void
1097igb_init(void *arg)
1098{
1099	struct adapter *adapter = arg;
1100
1101	IGB_CORE_LOCK(adapter);
1102	igb_init_locked(adapter);
1103	IGB_CORE_UNLOCK(adapter);
1104}
1105
1106
1107#ifdef DEVICE_POLLING
1108/*********************************************************************
1109 *
1110 *  Legacy polling routine
1111 *
1112 *********************************************************************/
1113static void
1114igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1115{
1116	struct adapter *adapter = ifp->if_softc;
1117	struct rx_ring *rxr = adapter->rx_rings;
1118	struct tx_ring *txr = adapter->tx_rings;
1119	uint32_t reg_icr;
1120
1121	IGB_CORE_LOCK(adapter);
1122	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1123		IGB_CORE_UNLOCK(adapter);
1124		return;
1125	}
1126
1127	if (cmd == POLL_AND_CHECK_STATUS) {
1128		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1129		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1130			callout_stop(&adapter->timer);
1131			adapter->hw.mac.get_link_status = 1;
1132			e1000_check_for_link(&adapter->hw);
1133			igb_update_link_status(adapter);
1134			callout_reset(&adapter->timer, hz,
1135			    igb_local_timer, adapter);
1136		}
1137	}
1138	igb_rxeof(rxr, count);
1139	IGB_CORE_UNLOCK(adapter);
1140
1141	/* With polling we cannot do multiqueue */
1142	IGB_TX_LOCK(txr);
1143	igb_txeof(txr);
1144
1145	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1146		igb_start_locked(txr);
1147	IGB_TX_UNLOCK(txr);
1148}
1149#endif /* DEVICE_POLLING */
1150
1151
1152static void
1153igb_handle_link(void *context, int pending)
1154{
1155	struct adapter	*adapter = context;
1156	struct ifnet *ifp;
1157
1158	ifp = adapter->ifp;
1159
1160	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1161		return;
1162
1163	IGB_CORE_LOCK(adapter);
1164	callout_stop(&adapter->timer);
1165	adapter->hw.mac.get_link_status = 1;
1166	e1000_check_for_link(&adapter->hw);
1167	igb_update_link_status(adapter);
1168	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1169	IGB_CORE_UNLOCK(adapter);
1170	/* Rearm this interrupt */
1171	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1172	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, E1000_EIMS_OTHER);
1173}
1174
1175static void
1176igb_handle_rxtx(void *context, int pending)
1177{
1178	struct adapter	*adapter = context;
1179	struct tx_ring	*txr = adapter->tx_rings;
1180	struct rx_ring	*rxr = adapter->rx_rings;
1181	struct ifnet	*ifp;
1182
1183	ifp = adapter->ifp;
1184
1185	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1186		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1187			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1188		IGB_TX_LOCK(txr);
1189		igb_txeof(txr);
1190
1191		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1192			igb_start_locked(txr, ifp);
1193		IGB_TX_UNLOCK(txr);
1194	}
1195
1196	igb_enable_intr(adapter);
1197}
1198
1199static void
1200igb_handle_rx(void *context, int pending)
1201{
1202	struct rx_ring	*rxr = context;
1203	struct adapter	*adapter = rxr->adapter;
1204	struct ifnet	*ifp = adapter->ifp;
1205
1206	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1207		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1208			/* More to clean, schedule another task */
1209			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1210
1211}
1212
1213static void
1214igb_handle_tx(void *context, int pending)
1215{
1216	struct tx_ring	*txr = context;
1217	struct adapter	*adapter = txr->adapter;
1218	struct ifnet	*ifp = adapter->ifp;
1219
1220	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1221		IGB_TX_LOCK(txr);
1222		igb_txeof(txr);
1223		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1224			igb_start_locked(txr, ifp);
1225		IGB_TX_UNLOCK(txr);
1226	}
1227}
1228
1229
1230/*********************************************************************
1231 *
1232 *  MSI/Legacy Deferred
1233 *  Interrupt Service routine
1234 *
1235 *********************************************************************/
1236static int
1237igb_irq_fast(void *arg)
1238{
1239	struct adapter	*adapter = arg;
1240	struct ifnet	*ifp;
1241	uint32_t	reg_icr;
1242
1243	ifp = adapter->ifp;
1244
1245	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1246
1247	/* Hot eject?  */
1248	if (reg_icr == 0xffffffff)
1249		return FILTER_STRAY;
1250
1251	/* Definitely not our interrupt.  */
1252	if (reg_icr == 0x0)
1253		return FILTER_STRAY;
1254
1255	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1256		return FILTER_STRAY;
1257
1258	/*
1259	 * Mask interrupts until the taskqueue is finished running.  This is
1260	 * cheap, just assume that it is needed.  This also works around the
1261	 * MSI message reordering errata on certain systems.
1262	 */
1263	igb_disable_intr(adapter);
1264	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1265
1266	/* Link status change */
1267	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1268		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1269
1270	if (reg_icr & E1000_ICR_RXO)
1271		adapter->rx_overruns++;
1272	return FILTER_HANDLED;
1273}
1274
1275
1276/*********************************************************************
1277 *
1278 *  MSIX TX Interrupt Service routine
1279 *
1280 **********************************************************************/
1281
1282static void
1283igb_msix_tx(void *arg)
1284{
1285	struct tx_ring *txr = arg;
1286	struct adapter *adapter = txr->adapter;
1287	struct ifnet	*ifp = adapter->ifp;
1288
1289	++txr->tx_irq;
1290	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1291		IGB_TX_LOCK(txr);
1292		igb_txeof(txr);
1293		IGB_TX_UNLOCK(txr);
1294		taskqueue_enqueue(adapter->tq, &txr->tx_task);
1295	}
1296	/* Reenable this interrupt */
1297	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1298	return;
1299}
1300
1301/*********************************************************************
1302 *
1303 *  MSIX RX Interrupt Service routine
1304 *
1305 **********************************************************************/
1306
1307static void
1308igb_msix_rx(void *arg)
1309{
1310	struct rx_ring *rxr = arg;
1311	struct adapter *adapter = rxr->adapter;
1312	struct ifnet	*ifp = adapter->ifp;
1313
1314	++rxr->rx_irq;
1315	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1316		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1317			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1318	/* Reenable this interrupt */
1319	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1320	return;
1321}
1322
1323/*********************************************************************
1324 *
1325 *  MSIX Link Interrupt Service routine
1326 *
1327 **********************************************************************/
1328
1329static void
1330igb_msix_link(void *arg)
1331{
1332	struct adapter	*adapter = arg;
1333	u32       	eicr, icr;
1334
1335	++adapter->link_irq;
1336	eicr = E1000_READ_REG(&adapter->hw, E1000_EICR);
1337	if (eicr & E1000_EIMS_OTHER)
1338		icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1339		if (!(icr & E1000_ICR_LSC))
1340			goto spurious;
1341	taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1342
1343spurious:
1344	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1345	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, E1000_EIMS_OTHER);
1346	return;
1347}
1348
1349
1350/*********************************************************************
1351 *
1352 *  Media Ioctl callback
1353 *
1354 *  This routine is called whenever the user queries the status of
1355 *  the interface using ifconfig.
1356 *
1357 **********************************************************************/
1358static void
1359igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1360{
1361	struct adapter *adapter = ifp->if_softc;
1362	u_char fiber_type = IFM_1000_SX;
1363
1364	INIT_DEBUGOUT("igb_media_status: begin");
1365
1366	IGB_CORE_LOCK(adapter);
1367	e1000_check_for_link(&adapter->hw);
1368	igb_update_link_status(adapter);
1369
1370	ifmr->ifm_status = IFM_AVALID;
1371	ifmr->ifm_active = IFM_ETHER;
1372
1373	if (!adapter->link_active) {
1374		IGB_CORE_UNLOCK(adapter);
1375		return;
1376	}
1377
1378	ifmr->ifm_status |= IFM_ACTIVE;
1379
1380	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1381	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1382		ifmr->ifm_active |= fiber_type | IFM_FDX;
1383	else {
1384		switch (adapter->link_speed) {
1385		case 10:
1386			ifmr->ifm_active |= IFM_10_T;
1387			break;
1388		case 100:
1389			ifmr->ifm_active |= IFM_100_TX;
1390			break;
1391		case 1000:
1392			ifmr->ifm_active |= IFM_1000_T;
1393			break;
1394		}
1395		if (adapter->link_duplex == FULL_DUPLEX)
1396			ifmr->ifm_active |= IFM_FDX;
1397		else
1398			ifmr->ifm_active |= IFM_HDX;
1399	}
1400	IGB_CORE_UNLOCK(adapter);
1401}
1402
1403/*********************************************************************
1404 *
1405 *  Media Ioctl callback
1406 *
1407 *  This routine is called when the user changes speed/duplex using
1408 *  media/mediopt option with ifconfig.
1409 *
1410 **********************************************************************/
1411static int
1412igb_media_change(struct ifnet *ifp)
1413{
1414	struct adapter *adapter = ifp->if_softc;
1415	struct ifmedia  *ifm = &adapter->media;
1416
1417	INIT_DEBUGOUT("igb_media_change: begin");
1418
1419	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1420		return (EINVAL);
1421
1422	IGB_CORE_LOCK(adapter);
1423	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1424	case IFM_AUTO:
1425		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1426		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1427		break;
1428	case IFM_1000_LX:
1429	case IFM_1000_SX:
1430	case IFM_1000_T:
1431		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1432		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1433		break;
1434	case IFM_100_TX:
1435		adapter->hw.mac.autoneg = FALSE;
1436		adapter->hw.phy.autoneg_advertised = 0;
1437		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1438			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1439		else
1440			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1441		break;
1442	case IFM_10_T:
1443		adapter->hw.mac.autoneg = FALSE;
1444		adapter->hw.phy.autoneg_advertised = 0;
1445		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1446			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1447		else
1448			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1449		break;
1450	default:
1451		device_printf(adapter->dev, "Unsupported media type\n");
1452	}
1453
1454	/* As the speed/duplex settings my have changed we need to
1455	 * reset the PHY.
1456	 */
1457	adapter->hw.phy.reset_disable = FALSE;
1458
1459	igb_init_locked(adapter);
1460	IGB_CORE_UNLOCK(adapter);
1461
1462	return (0);
1463}
1464
1465
1466/*********************************************************************
1467 *
1468 *  This routine maps the mbufs to Advanced TX descriptors.
1469 *  used by the 82575 adapter.
1470 *
1471 **********************************************************************/
1472
1473static int
1474igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1475{
1476	struct adapter		*adapter = txr->adapter;
1477	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1478	bus_dmamap_t		map;
1479	struct igb_buffer	*tx_buffer, *tx_buffer_mapped;
1480	union e1000_adv_tx_desc	*txd = NULL;
1481	struct mbuf		*m_head;
1482	u32			olinfo_status = 0, cmd_type_len = 0;
1483	int			nsegs, i, j, error, first, last = 0;
1484	u32			hdrlen = 0;
1485
1486	m_head = *m_headp;
1487
1488
1489	/* Set basic descriptor constants */
1490	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1491	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1492	if (m_head->m_flags & M_VLANTAG)
1493		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1494
1495        /*
1496         * Force a cleanup if number of TX descriptors
1497         * available hits the threshold
1498         */
1499	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1500		igb_txeof(txr);
1501		/* Now do we at least have a minimal? */
1502		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1503			txr->no_desc_avail++;
1504			return (ENOBUFS);
1505		}
1506	}
1507
1508	/*
1509         * Map the packet for DMA.
1510	 *
1511	 * Capture the first descriptor index,
1512	 * this descriptor will have the index
1513	 * of the EOP which is the only one that
1514	 * now gets a DONE bit writeback.
1515	 */
1516	first = txr->next_avail_desc;
1517	tx_buffer = &txr->tx_buffers[first];
1518	tx_buffer_mapped = tx_buffer;
1519	map = tx_buffer->map;
1520
1521	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1522	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1523
1524	if (error == EFBIG) {
1525		struct mbuf *m;
1526
1527		m = m_defrag(*m_headp, M_DONTWAIT);
1528		if (m == NULL) {
1529			adapter->mbuf_alloc_failed++;
1530			m_freem(*m_headp);
1531			*m_headp = NULL;
1532			return (ENOBUFS);
1533		}
1534		*m_headp = m;
1535
1536		/* Try it again */
1537		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1538		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1539
1540		if (error == ENOMEM) {
1541			adapter->no_tx_dma_setup++;
1542			return (error);
1543		} else if (error != 0) {
1544			adapter->no_tx_dma_setup++;
1545			m_freem(*m_headp);
1546			*m_headp = NULL;
1547			return (error);
1548		}
1549	} else if (error == ENOMEM) {
1550		adapter->no_tx_dma_setup++;
1551		return (error);
1552	} else if (error != 0) {
1553		adapter->no_tx_dma_setup++;
1554		m_freem(*m_headp);
1555		*m_headp = NULL;
1556		return (error);
1557	}
1558
1559	/* Check again to be sure we have enough descriptors */
1560        if (nsegs > (txr->tx_avail - 2)) {
1561                txr->no_desc_avail++;
1562		bus_dmamap_unload(txr->txtag, map);
1563		return (ENOBUFS);
1564        }
1565	m_head = *m_headp;
1566
1567        /*
1568         * Set up the context descriptor:
1569         * used when any hardware offload is done.
1570	 * This includes CSUM, VLAN, and TSO. It
1571	 * will use the first descriptor.
1572         */
1573        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1574		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1575			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1576			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1577			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1578		} else
1579			return (ENXIO);
1580	} else
1581		/* Do all other context descriptor setup */
1582	if (igb_tx_ctx_setup(txr, m_head))
1583		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1584
1585	/* Calculate payload length */
1586	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1587	    << E1000_ADVTXD_PAYLEN_SHIFT);
1588
1589	/* Set up our transmit descriptors */
1590	i = txr->next_avail_desc;
1591	for (j = 0; j < nsegs; j++) {
1592		bus_size_t seg_len;
1593		bus_addr_t seg_addr;
1594
1595		tx_buffer = &txr->tx_buffers[i];
1596		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1597		seg_addr = segs[j].ds_addr;
1598		seg_len  = segs[j].ds_len;
1599
1600		txd->read.buffer_addr = htole64(seg_addr);
1601		txd->read.cmd_type_len = htole32(
1602		    adapter->txd_cmd | cmd_type_len | seg_len);
1603		txd->read.olinfo_status = htole32(olinfo_status);
1604		last = i;
1605		if (++i == adapter->num_tx_desc)
1606			i = 0;
1607		tx_buffer->m_head = NULL;
1608		tx_buffer->next_eop = -1;
1609	}
1610
1611	txr->next_avail_desc = i;
1612	txr->tx_avail -= nsegs;
1613
1614        tx_buffer->m_head = m_head;
1615	tx_buffer_mapped->map = tx_buffer->map;
1616	tx_buffer->map = map;
1617        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1618
1619        /*
1620         * Last Descriptor of Packet
1621	 * needs End Of Packet (EOP)
1622	 * and Report Status (RS)
1623         */
1624        txd->read.cmd_type_len |=
1625	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1626	/*
1627	 * Keep track in the first buffer which
1628	 * descriptor will be written back
1629	 */
1630	tx_buffer = &txr->tx_buffers[first];
1631	tx_buffer->next_eop = last;
1632
1633	/*
1634	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1635	 * that this frame is available to transmit.
1636	 */
1637	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1638	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1639	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1640	++txr->tx_packets;
1641
1642	return (0);
1643
1644}
1645
1646static void
1647igb_set_promisc(struct adapter *adapter)
1648{
1649	struct ifnet	*ifp = adapter->ifp;
1650	uint32_t	reg_rctl;
1651
1652	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1653
1654	if (ifp->if_flags & IFF_PROMISC) {
1655		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1656		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1657	} else if (ifp->if_flags & IFF_ALLMULTI) {
1658		reg_rctl |= E1000_RCTL_MPE;
1659		reg_rctl &= ~E1000_RCTL_UPE;
1660		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1661	}
1662}
1663
1664static void
1665igb_disable_promisc(struct adapter *adapter)
1666{
1667	uint32_t	reg_rctl;
1668
1669	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1670
1671	reg_rctl &=  (~E1000_RCTL_UPE);
1672	reg_rctl &=  (~E1000_RCTL_MPE);
1673	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1674}
1675
1676
1677/*********************************************************************
1678 *  Multicast Update
1679 *
1680 *  This routine is called whenever multicast address list is updated.
1681 *
1682 **********************************************************************/
1683
1684static void
1685igb_set_multi(struct adapter *adapter)
1686{
1687	struct ifnet	*ifp = adapter->ifp;
1688	struct ifmultiaddr *ifma;
1689	uint32_t reg_rctl = 0;
1690	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
1691	int mcnt = 0;
1692
1693	IOCTL_DEBUGOUT("igb_set_multi: begin");
1694
1695	IF_ADDR_LOCK(ifp);
1696	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1697		if (ifma->ifma_addr->sa_family != AF_LINK)
1698			continue;
1699
1700		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1701			break;
1702
1703		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1704		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1705		mcnt++;
1706	}
1707	IF_ADDR_UNLOCK(ifp);
1708
1709	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1710		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1711		reg_rctl |= E1000_RCTL_MPE;
1712		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1713	} else
1714		e1000_update_mc_addr_list(&adapter->hw, mta,
1715		    mcnt, 1, adapter->hw.mac.rar_entry_count);
1716}
1717
1718
1719/*********************************************************************
1720 *  Timer routine
1721 *
1722 *  This routine checks for link status and updates statistics.
1723 *
1724 **********************************************************************/
1725
1726static void
1727igb_local_timer(void *arg)
1728{
1729	struct adapter	*adapter = arg;
1730	struct ifnet	*ifp = adapter->ifp;
1731
1732	IGB_CORE_LOCK_ASSERT(adapter);
1733
1734	e1000_check_for_link(&adapter->hw);
1735	igb_update_link_status(adapter);
1736	igb_update_stats_counters(adapter);
1737
1738	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1739		igb_print_hw_stats(adapter);
1740
1741	/*
1742	 * Each second we check the watchdog to
1743	 * protect against hardware hangs.
1744	 */
1745	igb_watchdog(adapter);
1746
1747	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1748
1749}
1750
1751static void
1752igb_update_link_status(struct adapter *adapter)
1753{
1754	struct ifnet *ifp = adapter->ifp;
1755	device_t dev = adapter->dev;
1756	struct tx_ring *txr = adapter->tx_rings;
1757
1758	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1759	    E1000_STATUS_LU) {
1760		if (adapter->link_active == 0) {
1761			e1000_get_speed_and_duplex(&adapter->hw,
1762			    &adapter->link_speed, &adapter->link_duplex);
1763			if (bootverbose)
1764				device_printf(dev, "Link is up %d Mbps %s\n",
1765				    adapter->link_speed,
1766				    ((adapter->link_duplex == FULL_DUPLEX) ?
1767				    "Full Duplex" : "Half Duplex"));
1768			adapter->link_active = 1;
1769			ifp->if_baudrate = adapter->link_speed * 1000000;
1770			if_link_state_change(ifp, LINK_STATE_UP);
1771		}
1772	} else {
1773		if (adapter->link_active == 1) {
1774			ifp->if_baudrate = adapter->link_speed = 0;
1775			adapter->link_duplex = 0;
1776			if (bootverbose)
1777				device_printf(dev, "Link is Down\n");
1778			adapter->link_active = 0;
1779			if_link_state_change(ifp, LINK_STATE_DOWN);
1780			/* Turn off watchdogs */
1781			for (int i = 0; i < adapter->num_tx_queues;
1782			    i++, txr++)
1783				txr->watchdog_timer = FALSE;
1784		}
1785	}
1786}
1787
1788/*********************************************************************
1789 *
1790 *  This routine disables all traffic on the adapter by issuing a
1791 *  global reset on the MAC and deallocates TX/RX buffers.
1792 *
1793 **********************************************************************/
1794
1795static void
1796igb_stop(void *arg)
1797{
1798	struct adapter	*adapter = arg;
1799	struct ifnet	*ifp = adapter->ifp;
1800
1801	IGB_CORE_LOCK_ASSERT(adapter);
1802
1803	INIT_DEBUGOUT("igb_stop: begin");
1804
1805	igb_disable_intr(adapter);
1806
1807	callout_stop(&adapter->timer);
1808
1809	/* Tell the stack that the interface is no longer active */
1810	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1811
1812
1813	e1000_reset_hw(&adapter->hw);
1814	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1815}
1816
1817
1818/*********************************************************************
1819 *
1820 *  Determine hardware revision.
1821 *
1822 **********************************************************************/
1823static void
1824igb_identify_hardware(struct adapter *adapter)
1825{
1826	device_t dev = adapter->dev;
1827
1828	/* Make sure our PCI config space has the necessary stuff set */
1829	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1830	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1831	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1832		device_printf(dev, "Memory Access and/or Bus Master bits "
1833		    "were not set!\n");
1834		adapter->hw.bus.pci_cmd_word |=
1835		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1836		pci_write_config(dev, PCIR_COMMAND,
1837		    adapter->hw.bus.pci_cmd_word, 2);
1838	}
1839
1840	/* Save off the information about this board */
1841	adapter->hw.vendor_id = pci_get_vendor(dev);
1842	adapter->hw.device_id = pci_get_device(dev);
1843	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1844	adapter->hw.subsystem_vendor_id =
1845	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
1846	adapter->hw.subsystem_device_id =
1847	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
1848
1849	/* Do Shared Code Init and Setup */
1850	if (e1000_set_mac_type(&adapter->hw)) {
1851		device_printf(dev, "Setup init failure\n");
1852		return;
1853	}
1854}
1855
1856static int
1857igb_allocate_pci_resources(struct adapter *adapter)
1858{
1859	device_t	dev = adapter->dev;
1860	int		rid, error = 0;
1861
1862	rid = PCIR_BAR(0);
1863	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1864	    &rid, RF_ACTIVE);
1865	if (adapter->pci_mem == NULL) {
1866		device_printf(dev, "Unable to allocate bus resource: memory\n");
1867		return (ENXIO);
1868	}
1869	adapter->osdep.mem_bus_space_tag =
1870	    rman_get_bustag(adapter->pci_mem);
1871	adapter->osdep.mem_bus_space_handle =
1872	    rman_get_bushandle(adapter->pci_mem);
1873	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
1874
1875	/*
1876	** Init the resource arrays
1877	*/
1878	for (int i = 0; i < IGB_MSIX_VEC; i++) {
1879		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
1880		adapter->tag[i] = NULL;
1881		adapter->res[i] = NULL;
1882	}
1883
1884	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
1885	adapter->num_rx_queues = 1;
1886
1887	/* This will setup either MSI/X or MSI */
1888	adapter->msix = igb_setup_msix(adapter);
1889
1890	adapter->hw.back = &adapter->osdep;
1891
1892	return (error);
1893}
1894
1895/*********************************************************************
1896 *
1897 *  Setup the Legacy or MSI Interrupt handler
1898 *
1899 **********************************************************************/
1900static int
1901igb_allocate_legacy(struct adapter *adapter)
1902{
1903	device_t dev = adapter->dev;
1904	int error;
1905
1906	/* Turn off all interrupts */
1907	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
1908
1909	/* Legacy RID at 0 */
1910	if (adapter->msix == 0)
1911		adapter->rid[0] = 0;
1912
1913	/* We allocate a single interrupt resource */
1914	adapter->res[0] = bus_alloc_resource_any(dev,
1915	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
1916	if (adapter->res[0] == NULL) {
1917		device_printf(dev, "Unable to allocate bus resource: "
1918		    "interrupt\n");
1919		return (ENXIO);
1920	}
1921
1922	/*
1923	 * Try allocating a fast interrupt and the associated deferred
1924	 * processing contexts.
1925	 */
1926	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
1927	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
1928	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
1929	    taskqueue_thread_enqueue, &adapter->tq);
1930	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
1931	    device_get_nameunit(adapter->dev));
1932	if ((error = bus_setup_intr(dev, adapter->res[0],
1933	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
1934	    &adapter->tag[0])) != 0) {
1935		device_printf(dev, "Failed to register fast interrupt "
1936			    "handler: %d\n", error);
1937		taskqueue_free(adapter->tq);
1938		adapter->tq = NULL;
1939		return (error);
1940	}
1941
1942	return (0);
1943}
1944
1945
1946/*********************************************************************
1947 *
1948 *  Setup the MSIX Interrupt handlers:
1949 *
1950 **********************************************************************/
1951static int
1952igb_allocate_msix(struct adapter *adapter)
1953{
1954	device_t dev = adapter->dev;
1955	struct tx_ring *txr = adapter->tx_rings;
1956	struct rx_ring *rxr = adapter->rx_rings;
1957	int error, vector = 0;
1958
1959	/*
1960	 * Setup the interrupt handlers
1961	 */
1962
1963	/* TX Setup */
1964	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
1965		adapter->res[vector] = bus_alloc_resource_any(dev,
1966		    SYS_RES_IRQ, &adapter->rid[vector],
1967		    RF_SHAREABLE | RF_ACTIVE);
1968		if (adapter->res[vector] == NULL) {
1969			device_printf(dev,
1970			    "Unable to allocate bus resource: "
1971			    "MSIX TX Interrupt\n");
1972			return (ENXIO);
1973		}
1974		error = bus_setup_intr(dev, adapter->res[vector],
1975	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
1976		    txr, &adapter->tag[vector]);
1977		if (error) {
1978			adapter->res[vector] = NULL;
1979			device_printf(dev, "Failed to register TX handler");
1980			return (error);
1981		}
1982		/* Make tasklet for deferred handling - one per queue */
1983		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
1984		if (adapter->hw.mac.type == e1000_82575) {
1985			txr->eims = E1000_EICR_TX_QUEUE0 << i;
1986			/* MSIXBM registers start at 0 */
1987			txr->msix = adapter->rid[vector] - 1;
1988		} else {
1989			txr->eims = 1 << vector;
1990			txr->msix = adapter->rid[vector];
1991		}
1992	}
1993
1994	/* RX Setup */
1995	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
1996		adapter->res[vector] = bus_alloc_resource_any(dev,
1997		    SYS_RES_IRQ, &adapter->rid[vector],
1998		    RF_SHAREABLE | RF_ACTIVE);
1999		if (adapter->res[vector] == NULL) {
2000			device_printf(dev,
2001			    "Unable to allocate bus resource: "
2002			    "MSIX RX Interrupt\n");
2003			return (ENXIO);
2004		}
2005		error = bus_setup_intr(dev, adapter->res[vector],
2006	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2007		    rxr, &adapter->tag[vector]);
2008		if (error) {
2009			adapter->res[vector] = NULL;
2010			device_printf(dev, "Failed to register RX handler");
2011			return (error);
2012		}
2013		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2014		if (adapter->hw.mac.type == e1000_82575) {
2015			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2016			rxr->msix = adapter->rid[vector] - 1;
2017		} else {
2018			rxr->eims = 1 << vector;
2019			rxr->msix = adapter->rid[vector];
2020		}
2021	}
2022
2023	/* And Link */
2024	adapter->res[vector] = bus_alloc_resource_any(dev,
2025	    SYS_RES_IRQ, &adapter->rid[vector],
2026		    RF_SHAREABLE | RF_ACTIVE);
2027	if (adapter->res[vector] == NULL) {
2028		device_printf(dev,
2029		    "Unable to allocate bus resource: "
2030		    "MSIX Link Interrupt\n");
2031		return (ENXIO);
2032	}
2033	if ((error = bus_setup_intr(dev, adapter->res[vector],
2034	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2035	    adapter, &adapter->tag[vector])) != 0) {
2036		device_printf(dev, "Failed to register Link handler");
2037		return (error);
2038	}
2039	if (adapter->hw.mac.type == e1000_82575)
2040		adapter->linkvec = adapter->rid[vector] - 1;
2041	else
2042		adapter->linkvec = adapter->rid[vector];
2043
2044	/* Make tasklet for deferred link interrupt handling */
2045	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2046
2047	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2048	    taskqueue_thread_enqueue, &adapter->tq);
2049	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2050	    device_get_nameunit(adapter->dev));
2051
2052	return (0);
2053}
2054
2055static void
2056igb_configure_queues(struct adapter *adapter)
2057{
2058	struct	e1000_hw *hw = &adapter->hw;
2059	struct	tx_ring	*txr;
2060	struct	rx_ring	*rxr;
2061
2062	/* Turn on MSIX */
2063	{ /* 82575 */
2064		int tmp;
2065
2066                /* enable MSI-X PBA support*/
2067		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2068                tmp |= E1000_CTRL_EXT_PBA_CLR;
2069                /* Auto-Mask interrupts upon ICR read. */
2070                tmp |= E1000_CTRL_EXT_EIAME;
2071                tmp |= E1000_CTRL_EXT_IRCA;
2072                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2073
2074	 	/* Set the interrupt throttling rate. */
2075		for (int i = 0; i < 10; i++)
2076			E1000_WRITE_REG(&adapter->hw,
2077			    E1000_EITR(i), DEFAULT_ITR);
2078
2079		/* TX */
2080		for (int i = 0; i < adapter->num_tx_queues; i++) {
2081			txr = &adapter->tx_rings[i];
2082			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2083			    txr->eims);
2084			adapter->eims_mask |= txr->eims;
2085		}
2086
2087		/* RX */
2088		for (int i = 0; i < adapter->num_rx_queues; i++) {
2089			rxr = &adapter->rx_rings[i];
2090			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2091			    rxr->eims);
2092			adapter->eims_mask |= rxr->eims;
2093		}
2094
2095		/* Link */
2096		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2097		    E1000_EIMS_OTHER);
2098		adapter->eims_mask |= E1000_EIMS_OTHER;
2099	}
2100	return;
2101}
2102
2103
2104static void
2105igb_free_pci_resources(struct adapter *adapter)
2106{
2107	device_t dev = adapter->dev;
2108
2109	/* Make sure the for loop below runs once */
2110	if (adapter->msix == 0)
2111		adapter->msix = 1;
2112
2113	/*
2114	 * First release all the interrupt resources:
2115	 *      notice that since these are just kept
2116	 *      in an array we can do the same logic
2117	 *      whether its MSIX or just legacy.
2118	 */
2119	for (int i = 0; i < adapter->msix; i++) {
2120		if (adapter->tag[i] != NULL) {
2121			bus_teardown_intr(dev, adapter->res[i],
2122			    adapter->tag[i]);
2123			adapter->tag[i] = NULL;
2124		}
2125		if (adapter->res[i] != NULL) {
2126			bus_release_resource(dev, SYS_RES_IRQ,
2127			    adapter->rid[i], adapter->res[i]);
2128		}
2129	}
2130
2131	if (adapter->msix)
2132		pci_release_msi(dev);
2133
2134	if (adapter->msix_mem != NULL)
2135		bus_release_resource(dev, SYS_RES_MEMORY,
2136		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2137
2138	if (adapter->pci_mem != NULL)
2139		bus_release_resource(dev, SYS_RES_MEMORY,
2140		    PCIR_BAR(0), adapter->pci_mem);
2141
2142}
2143
2144/*
2145 * Setup Either MSI/X or MSI
2146 */
2147static int
2148igb_setup_msix(struct adapter *adapter)
2149{
2150	device_t dev = adapter->dev;
2151	int rid, want, queues, msgs;
2152
2153	/* First try MSI/X */
2154	rid = PCIR_BAR(IGB_MSIX_BAR);
2155	adapter->msix_mem = bus_alloc_resource_any(dev,
2156	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2157       	if (!adapter->msix_mem) {
2158		/* May not be enabled */
2159		device_printf(adapter->dev,
2160		    "Unable to map MSIX table \n");
2161		goto msi;
2162	}
2163
2164	msgs = pci_msix_count(dev);
2165	if (msgs == 0) { /* system has msix disabled */
2166		bus_release_resource(dev, SYS_RES_MEMORY,
2167		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2168		adapter->msix_mem = NULL;
2169		goto msi;
2170	}
2171
2172	/* Figure out a reasonable auto config value */
2173	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2174
2175	if (igb_tx_queues == 0)
2176		igb_tx_queues = queues;
2177	if (igb_rx_queues == 0)
2178		igb_rx_queues = queues;
2179	want = igb_tx_queues + igb_rx_queues + 1;
2180	if (msgs >= want)
2181		msgs = want;
2182	else {
2183               	device_printf(adapter->dev,
2184		    "MSIX Configuration Problem, "
2185		    "%d vectors but %d queues wanted!\n",
2186		    msgs, want);
2187		return (ENXIO);
2188	}
2189	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2190               	device_printf(adapter->dev,
2191		    "Using MSIX interrupts with %d vectors\n", msgs);
2192		adapter->num_tx_queues = igb_tx_queues;
2193		adapter->num_rx_queues = igb_rx_queues;
2194		return (msgs);
2195	}
2196msi:
2197       	msgs = pci_msi_count(dev);
2198       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2199               	device_printf(adapter->dev,"Using MSI interrupt\n");
2200	return (msgs);
2201}
2202
2203/*********************************************************************
2204 *
2205 *  Initialize the hardware to a configuration
2206 *  as specified by the adapter structure.
2207 *
2208 **********************************************************************/
2209static int
2210igb_hardware_init(struct adapter *adapter)
2211{
2212	device_t	dev = adapter->dev;
2213	u32		rx_buffer_size;
2214
2215	INIT_DEBUGOUT("igb_hardware_init: begin");
2216
2217	/* Issue a global reset */
2218	e1000_reset_hw(&adapter->hw);
2219
2220	/* Let the firmware know the OS is in control */
2221	igb_get_hw_control(adapter);
2222
2223	/*
2224	 * These parameters control the automatic generation (Tx) and
2225	 * response (Rx) to Ethernet PAUSE frames.
2226	 * - High water mark should allow for at least two frames to be
2227	 *   received after sending an XOFF.
2228	 * - Low water mark works best when it is very near the high water mark.
2229	 *   This allows the receiver to restart by sending XON when it has
2230	 *   drained a bit. Here we use an arbitary value of 1500 which will
2231	 *   restart after one full frame is pulled from the buffer. There
2232	 *   could be several smaller frames in the buffer and if so they will
2233	 *   not trigger the XON until their total number reduces the buffer
2234	 *   by 1500.
2235	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2236	 */
2237		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2238		    E1000_PBA) & 0xffff) << 10 );
2239
2240	adapter->hw.fc.high_water = rx_buffer_size -
2241	    roundup2(adapter->max_frame_size, 1024);
2242	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2243
2244	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2245	adapter->hw.fc.send_xon = TRUE;
2246	adapter->hw.fc.type = e1000_fc_full;
2247
2248	if (e1000_init_hw(&adapter->hw) < 0) {
2249		device_printf(dev, "Hardware Initialization Failed\n");
2250		return (EIO);
2251	}
2252
2253	e1000_check_for_link(&adapter->hw);
2254
2255	return (0);
2256}
2257
2258/*********************************************************************
2259 *
2260 *  Setup networking device structure and register an interface.
2261 *
2262 **********************************************************************/
2263static void
2264igb_setup_interface(device_t dev, struct adapter *adapter)
2265{
2266	struct ifnet   *ifp;
2267
2268	INIT_DEBUGOUT("igb_setup_interface: begin");
2269
2270	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2271	if (ifp == NULL)
2272		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2273	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2274	ifp->if_mtu = ETHERMTU;
2275	ifp->if_init =  igb_init;
2276	ifp->if_softc = adapter;
2277	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2278	ifp->if_ioctl = igb_ioctl;
2279	ifp->if_start = igb_start;
2280	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2281	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2282	IFQ_SET_READY(&ifp->if_snd);
2283
2284	ether_ifattach(ifp, adapter->hw.mac.addr);
2285
2286	ifp->if_capabilities = ifp->if_capenable = 0;
2287
2288	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2289	ifp->if_capabilities |= IFCAP_TSO4;
2290	ifp->if_capenable = ifp->if_capabilities;
2291
2292	/*
2293	 * Tell the upper layer(s) we support long frames.
2294	 */
2295	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2296	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2297	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2298
2299#ifdef DEVICE_POLLING
2300	ifp->if_capabilities |= IFCAP_POLLING;
2301#endif
2302
2303	/*
2304	 * Specify the media types supported by this adapter and register
2305	 * callbacks to update media and link information
2306	 */
2307	ifmedia_init(&adapter->media, IFM_IMASK,
2308	    igb_media_change, igb_media_status);
2309	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2310	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2311		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2312			    0, NULL);
2313		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2314	} else {
2315		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2316		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2317			    0, NULL);
2318		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2319			    0, NULL);
2320		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2321			    0, NULL);
2322		if (adapter->hw.phy.type != e1000_phy_ife) {
2323			ifmedia_add(&adapter->media,
2324				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2325			ifmedia_add(&adapter->media,
2326				IFM_ETHER | IFM_1000_T, 0, NULL);
2327		}
2328	}
2329	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2330	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2331}
2332
2333
2334/*
2335 * Manage DMA'able memory.
2336 */
2337static void
2338igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2339{
2340	if (error)
2341		return;
2342	*(bus_addr_t *) arg = segs[0].ds_addr;
2343}
2344
2345static int
2346igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2347        struct igb_dma_alloc *dma, int mapflags)
2348{
2349	int error;
2350
2351	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2352				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2353				BUS_SPACE_MAXADDR,	/* lowaddr */
2354				BUS_SPACE_MAXADDR,	/* highaddr */
2355				NULL, NULL,		/* filter, filterarg */
2356				size,			/* maxsize */
2357				1,			/* nsegments */
2358				size,			/* maxsegsize */
2359				0,			/* flags */
2360				NULL,			/* lockfunc */
2361				NULL,			/* lockarg */
2362				&dma->dma_tag);
2363	if (error) {
2364		device_printf(adapter->dev,
2365		    "%s: bus_dma_tag_create failed: %d\n",
2366		    __func__, error);
2367		goto fail_0;
2368	}
2369
2370	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2371	    BUS_DMA_NOWAIT, &dma->dma_map);
2372	if (error) {
2373		device_printf(adapter->dev,
2374		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2375		    __func__, (uintmax_t)size, error);
2376		goto fail_2;
2377	}
2378
2379	dma->dma_paddr = 0;
2380	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2381	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2382	if (error || dma->dma_paddr == 0) {
2383		device_printf(adapter->dev,
2384		    "%s: bus_dmamap_load failed: %d\n",
2385		    __func__, error);
2386		goto fail_3;
2387	}
2388
2389	return (0);
2390
2391fail_3:
2392	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2393fail_2:
2394	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2395	bus_dma_tag_destroy(dma->dma_tag);
2396fail_0:
2397	dma->dma_map = NULL;
2398	dma->dma_tag = NULL;
2399
2400	return (error);
2401}
2402
2403static void
2404igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2405{
2406	if (dma->dma_tag == NULL)
2407		return;
2408	if (dma->dma_map != NULL) {
2409		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2410		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2411		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2412		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2413		dma->dma_map = NULL;
2414	}
2415	bus_dma_tag_destroy(dma->dma_tag);
2416	dma->dma_tag = NULL;
2417}
2418
2419
2420/*********************************************************************
2421 *
2422 *  Allocate memory for the transmit and receive rings, and then
2423 *  the descriptors associated with each, called only once at attach.
2424 *
2425 **********************************************************************/
2426static int
2427igb_allocate_queues(struct adapter *adapter)
2428{
2429	device_t dev = adapter->dev;
2430	struct tx_ring *txr;
2431	struct rx_ring *rxr;
2432	int rsize, tsize, error = E1000_SUCCESS;
2433	int txconf = 0, rxconf = 0;
2434	char	name_string[16];
2435
2436	/* First allocate the TX ring struct memory */
2437	if (!(adapter->tx_rings =
2438	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2439	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2440		device_printf(dev, "Unable to allocate TX ring memory\n");
2441		error = ENOMEM;
2442		goto fail;
2443	}
2444	txr = adapter->tx_rings;
2445
2446	/* Next allocate the RX */
2447	if (!(adapter->rx_rings =
2448	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2449	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2450		device_printf(dev, "Unable to allocate RX ring memory\n");
2451		error = ENOMEM;
2452		goto rx_fail;
2453	}
2454	rxr = adapter->rx_rings;
2455
2456	tsize = roundup2(adapter->num_tx_desc *
2457	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2458	/*
2459	 * Now set up the TX queues, txconf is needed to handle the
2460	 * possibility that things fail midcourse and we need to
2461	 * undo memory gracefully
2462	 */
2463	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2464		/* Set up some basics */
2465		txr = &adapter->tx_rings[i];
2466		txr->adapter = adapter;
2467		txr->me = i;
2468
2469		/* Initialize the TX lock */
2470		snprintf(name_string, sizeof(name_string), "%s:tx(%d)",
2471		    device_get_nameunit(dev), txr->me);
2472		mtx_init(&txr->tx_mtx, name_string, NULL, MTX_DEF);
2473
2474		if (igb_dma_malloc(adapter, tsize,
2475			&txr->txdma, BUS_DMA_NOWAIT)) {
2476			device_printf(dev,
2477			    "Unable to allocate TX Descriptor memory\n");
2478			error = ENOMEM;
2479			goto err_tx_desc;
2480		}
2481		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2482		bzero((void *)txr->tx_base, tsize);
2483
2484        	/* Now allocate transmit buffers for the ring */
2485        	if (igb_allocate_transmit_buffers(txr)) {
2486			device_printf(dev,
2487			    "Critical Failure setting up transmit buffers\n");
2488			error = ENOMEM;
2489			goto err_tx_desc;
2490        	}
2491
2492	}
2493
2494	/*
2495	 * Next the RX queues...
2496	 */
2497	rsize = roundup2(adapter->num_rx_desc *
2498	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2499	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2500		rxr = &adapter->rx_rings[i];
2501		rxr->adapter = adapter;
2502		rxr->me = i;
2503
2504		/* Initialize the RX lock */
2505		snprintf(name_string, sizeof(name_string), "%s:rx(%d)",
2506		    device_get_nameunit(dev), txr->me);
2507		mtx_init(&rxr->rx_mtx, name_string, NULL, MTX_DEF);
2508
2509		if (igb_dma_malloc(adapter, rsize,
2510			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2511			device_printf(dev,
2512			    "Unable to allocate RxDescriptor memory\n");
2513			error = ENOMEM;
2514			goto err_rx_desc;
2515		}
2516		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2517		bzero((void *)rxr->rx_base, rsize);
2518
2519        	/* Allocate receive buffers for the ring*/
2520		if (igb_allocate_receive_buffers(rxr)) {
2521			device_printf(dev,
2522			    "Critical Failure setting up receive buffers\n");
2523			error = ENOMEM;
2524			goto err_rx_desc;
2525		}
2526	}
2527
2528	return (0);
2529
2530err_rx_desc:
2531	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2532		igb_dma_free(adapter, &rxr->rxdma);
2533err_tx_desc:
2534	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2535		igb_dma_free(adapter, &txr->txdma);
2536	free(adapter->rx_rings, M_DEVBUF);
2537rx_fail:
2538	free(adapter->tx_rings, M_DEVBUF);
2539fail:
2540	return (error);
2541}
2542
2543/*********************************************************************
2544 *
2545 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2546 *  the information needed to transmit a packet on the wire. This is
2547 *  called only once at attach, setup is done every reset.
2548 *
2549 **********************************************************************/
2550static int
2551igb_allocate_transmit_buffers(struct tx_ring *txr)
2552{
2553	struct adapter *adapter = txr->adapter;
2554	device_t dev = adapter->dev;
2555	struct igb_buffer *txbuf;
2556	int error, i;
2557
2558	/*
2559	 * Setup DMA descriptor areas.
2560	 */
2561	if ((error = bus_dma_tag_create(NULL,		/* parent */
2562			       PAGE_SIZE, 0,		/* alignment, bounds */
2563			       BUS_SPACE_MAXADDR,	/* lowaddr */
2564			       BUS_SPACE_MAXADDR,	/* highaddr */
2565			       NULL, NULL,		/* filter, filterarg */
2566			       IGB_TSO_SIZE,		/* maxsize */
2567			       IGB_MAX_SCATTER,		/* nsegments */
2568			       PAGE_SIZE,		/* maxsegsize */
2569			       0,			/* flags */
2570			       NULL,			/* lockfunc */
2571			       NULL,			/* lockfuncarg */
2572			       &txr->txtag))) {
2573		device_printf(dev,"Unable to allocate TX DMA tag\n");
2574		goto fail;
2575	}
2576
2577	if (!(txr->tx_buffers =
2578	    (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2579	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2580		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2581		error = ENOMEM;
2582		goto fail;
2583	}
2584
2585        /* Create the descriptor buffer dma maps */
2586	txbuf = txr->tx_buffers;
2587	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2588		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2589		if (error != 0) {
2590			device_printf(dev, "Unable to create TX DMA map\n");
2591			goto fail;
2592		}
2593	}
2594
2595	return 0;
2596fail:
2597	/* We free all, it handles case where we are in the middle */
2598	igb_free_transmit_structures(adapter);
2599	return (error);
2600}
2601
2602/*********************************************************************
2603 *
2604 *  Initialize a transmit ring.
2605 *
2606 **********************************************************************/
2607static void
2608igb_setup_transmit_ring(struct tx_ring *txr)
2609{
2610	struct adapter *adapter = txr->adapter;
2611	struct igb_buffer *txbuf;
2612	int i;
2613
2614	/* Clear the old ring contents */
2615	bzero((void *)txr->tx_base,
2616	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2617	/* Reset indices */
2618	txr->next_avail_desc = 0;
2619	txr->next_to_clean = 0;
2620
2621	/* Free any existing tx buffers. */
2622        txbuf = txr->tx_buffers;
2623	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2624		if (txbuf->m_head != NULL) {
2625			bus_dmamap_sync(txr->txtag, txbuf->map,
2626			    BUS_DMASYNC_POSTWRITE);
2627			bus_dmamap_unload(txr->txtag, txbuf->map);
2628			m_freem(txbuf->m_head);
2629			txbuf->m_head = NULL;
2630		}
2631		/* clear the watch index */
2632		txbuf->next_eop = -1;
2633        }
2634
2635	/* Set number of descriptors available */
2636	txr->tx_avail = adapter->num_tx_desc;
2637
2638	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2639	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2640
2641}
2642
2643/*********************************************************************
2644 *
2645 *  Initialize all transmit rings.
2646 *
2647 **********************************************************************/
2648static void
2649igb_setup_transmit_structures(struct adapter *adapter)
2650{
2651	struct tx_ring *txr = adapter->tx_rings;
2652
2653	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2654		igb_setup_transmit_ring(txr);
2655
2656	return;
2657}
2658
2659/*********************************************************************
2660 *
2661 *  Enable transmit unit.
2662 *
2663 **********************************************************************/
2664static void
2665igb_initialize_transmit_units(struct adapter *adapter)
2666{
2667	struct tx_ring	*txr = adapter->tx_rings;
2668	u32		tctl, txdctl, tipg = 0;
2669
2670	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2671
2672	/* Setup the Base and Length of the Tx Descriptor Rings */
2673	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2674		u64 bus_addr = txr->txdma.dma_paddr;
2675
2676		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2677		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2678		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2679		    (uint32_t)(bus_addr >> 32));
2680		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2681		    (uint32_t)bus_addr);
2682
2683		/* Setup the HW Tx Head and Tail descriptor pointers */
2684		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2685		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2686
2687		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2688		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2689		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2690
2691		/* Setup Transmit Descriptor Base Settings */
2692		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2693
2694		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2695		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2696		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2697	}
2698
2699	/* Set the default values for the Tx Inter Packet Gap timer */
2700	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2701	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
2702		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2703	else
2704		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2705
2706	tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2707	tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2708
2709	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2710	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2711	E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value);
2712
2713	/* Program the Transmit Control Register */
2714	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2715	tctl &= ~E1000_TCTL_CT;
2716	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2717		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2718
2719	/* This write will effectively turn on the transmit unit. */
2720	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2721
2722}
2723
2724/*********************************************************************
2725 *
2726 *  Free all transmit rings.
2727 *
2728 **********************************************************************/
2729static void
2730igb_free_transmit_structures(struct adapter *adapter)
2731{
2732	struct tx_ring *txr = adapter->tx_rings;
2733
2734	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2735		IGB_TX_LOCK(txr);
2736		igb_free_transmit_buffers(txr);
2737		igb_dma_free(adapter, &txr->txdma);
2738		IGB_TX_UNLOCK(txr);
2739		IGB_TX_LOCK_DESTROY(txr);
2740	}
2741	free(adapter->tx_rings, M_DEVBUF);
2742}
2743
2744/*********************************************************************
2745 *
2746 *  Free transmit ring related data structures.
2747 *
2748 **********************************************************************/
2749static void
2750igb_free_transmit_buffers(struct tx_ring *txr)
2751{
2752	struct adapter *adapter = txr->adapter;
2753	struct igb_buffer *tx_buffer;
2754	int             i;
2755
2756	INIT_DEBUGOUT("free_transmit_ring: begin");
2757
2758	if (txr->tx_buffers == NULL)
2759		return;
2760
2761	tx_buffer = txr->tx_buffers;
2762	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2763		if (tx_buffer->m_head != NULL) {
2764			bus_dmamap_sync(txr->txtag, tx_buffer->map,
2765			    BUS_DMASYNC_POSTWRITE);
2766			bus_dmamap_unload(txr->txtag,
2767			    tx_buffer->map);
2768			m_freem(tx_buffer->m_head);
2769			tx_buffer->m_head = NULL;
2770			if (tx_buffer->map != NULL) {
2771				bus_dmamap_destroy(txr->txtag,
2772				    tx_buffer->map);
2773				tx_buffer->map = NULL;
2774			}
2775		} else if (tx_buffer->map != NULL) {
2776			bus_dmamap_unload(txr->txtag,
2777			    tx_buffer->map);
2778			bus_dmamap_destroy(txr->txtag,
2779			    tx_buffer->map);
2780			tx_buffer->map = NULL;
2781		}
2782	}
2783
2784	if (txr->tx_buffers != NULL) {
2785		free(txr->tx_buffers, M_DEVBUF);
2786		txr->tx_buffers = NULL;
2787	}
2788	if (txr->txtag != NULL) {
2789		bus_dma_tag_destroy(txr->txtag);
2790		txr->txtag = NULL;
2791	}
2792	return;
2793}
2794
2795/**********************************************************************
2796 *
2797 *  Setup work for hardware segmentation offload (TSO) on
2798 *  adapters using advanced tx descriptors (82575)
2799 *
2800 **********************************************************************/
2801static boolean_t
2802igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
2803{
2804	struct adapter *adapter = txr->adapter;
2805	struct e1000_adv_tx_context_desc *TXD;
2806	struct igb_buffer        *tx_buffer;
2807	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2808	u32 mss_l4len_idx = 0;
2809	u16 vtag = 0;
2810	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2811	struct ether_vlan_header *eh;
2812	struct ip *ip;
2813	struct tcphdr *th;
2814
2815
2816	/*
2817	 * Determine where frame payload starts.
2818	 * Jump over vlan headers if already present
2819	 */
2820	eh = mtod(mp, struct ether_vlan_header *);
2821	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
2822		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2823	else
2824		ehdrlen = ETHER_HDR_LEN;
2825
2826	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2827	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2828		return FALSE;
2829
2830	/* Only supports IPV4 for now */
2831	ctxd = txr->next_avail_desc;
2832	tx_buffer = &txr->tx_buffers[ctxd];
2833	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2834
2835	ip = (struct ip *)(mp->m_data + ehdrlen);
2836	if (ip->ip_p != IPPROTO_TCP)
2837                return FALSE;   /* 0 */
2838	ip->ip_len = 0;
2839	ip->ip_sum = 0;
2840	ip_hlen = ip->ip_hl << 2;
2841	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2842	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2843	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2844	tcp_hlen = th->th_off << 2;
2845	/*
2846	 * Calculate header length, this is used
2847	 * in the transmit desc in igb_xmit
2848	 */
2849	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
2850
2851	/* VLAN MACLEN IPLEN */
2852	if (mp->m_flags & M_VLANTAG) {
2853		vtag = htole16(mp->m_pkthdr.ether_vtag);
2854		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2855	}
2856
2857	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
2858	vlan_macip_lens |= ip_hlen;
2859	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
2860
2861	/* ADV DTYPE TUCMD */
2862	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2863	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2864	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2865	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
2866
2867	/* MSS L4LEN IDX */
2868	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
2869	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
2870	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2871
2872	TXD->seqnum_seed = htole32(0);
2873	tx_buffer->m_head = NULL;
2874	tx_buffer->next_eop = -1;
2875
2876	if (++ctxd == adapter->num_tx_desc)
2877		ctxd = 0;
2878
2879	txr->tx_avail--;
2880	txr->next_avail_desc = ctxd;
2881	return TRUE;
2882}
2883
2884
2885/*********************************************************************
2886 *
2887 *  Context Descriptor setup for VLAN or CSUM
2888 *
2889 **********************************************************************/
2890
2891static boolean_t
2892igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
2893{
2894	struct adapter *adapter = txr->adapter;
2895	struct e1000_adv_tx_context_desc *TXD;
2896	struct igb_buffer        *tx_buffer;
2897	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2898	struct ether_vlan_header *eh;
2899	struct ip *ip = NULL;
2900	struct ip6_hdr *ip6;
2901	int  ehdrlen, ip_hlen = 0;
2902	u16	etype;
2903	u8	ipproto = 0;
2904	bool	offload = TRUE;
2905	u16 vtag = 0;
2906
2907	int ctxd = txr->next_avail_desc;
2908	tx_buffer = &txr->tx_buffers[ctxd];
2909	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2910
2911	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2912		offload = FALSE; /* Only here to handle VLANs */
2913	/*
2914	** In advanced descriptors the vlan tag must
2915	** be placed into the descriptor itself.
2916	*/
2917	if (mp->m_flags & M_VLANTAG) {
2918		vtag = htole16(mp->m_pkthdr.ether_vtag);
2919		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2920	} else if (offload == FALSE)
2921		return FALSE;
2922	/*
2923	 * Determine where frame payload starts.
2924	 * Jump over vlan headers if already present,
2925	 * helpful for QinQ too.
2926	 */
2927	eh = mtod(mp, struct ether_vlan_header *);
2928	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2929		etype = ntohs(eh->evl_proto);
2930		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2931	} else {
2932		etype = ntohs(eh->evl_encap_proto);
2933		ehdrlen = ETHER_HDR_LEN;
2934	}
2935
2936	/* Set the ether header length */
2937	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
2938
2939	switch (etype) {
2940		case ETHERTYPE_IP:
2941			ip = (struct ip *)(mp->m_data + ehdrlen);
2942			ip_hlen = ip->ip_hl << 2;
2943			if (mp->m_len < ehdrlen + ip_hlen) {
2944				offload = FALSE;
2945				break;
2946			}
2947			ipproto = ip->ip_p;
2948			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2949			break;
2950		case ETHERTYPE_IPV6:
2951			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2952			ip_hlen = sizeof(struct ip6_hdr);
2953			if (mp->m_len < ehdrlen + ip_hlen)
2954				return FALSE; /* failure */
2955			ipproto = ip6->ip6_nxt;
2956			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2957			break;
2958		default:
2959			offload = FALSE;
2960			break;
2961	}
2962
2963	vlan_macip_lens |= ip_hlen;
2964	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2965
2966	switch (ipproto) {
2967		case IPPROTO_TCP:
2968			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2969				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2970			break;
2971		case IPPROTO_UDP:
2972		{
2973			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2974				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
2975			break;
2976		}
2977		default:
2978			offload = FALSE;
2979			break;
2980	}
2981
2982	/* Now copy bits into descriptor */
2983	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
2984	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
2985	TXD->seqnum_seed = htole32(0);
2986	TXD->mss_l4len_idx = htole32(0);
2987
2988	tx_buffer->m_head = NULL;
2989	tx_buffer->next_eop = -1;
2990
2991	/* We've consumed the first desc, adjust counters */
2992	if (++ctxd == adapter->num_tx_desc)
2993		ctxd = 0;
2994	txr->next_avail_desc = ctxd;
2995	--txr->tx_avail;
2996
2997        return (offload);
2998}
2999
3000
3001/**********************************************************************
3002 *
3003 *  Examine each tx_buffer in the used queue. If the hardware is done
3004 *  processing the packet then free associated resources. The
3005 *  tx_buffer is put back on the free queue.
3006 *
3007 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3008 **********************************************************************/
3009static bool
3010igb_txeof(struct tx_ring *txr)
3011{
3012	struct adapter	*adapter = txr->adapter;
3013        int first, last, done, num_avail;
3014        struct igb_buffer *tx_buffer;
3015        struct e1000_tx_desc   *tx_desc, *eop_desc;
3016	struct ifnet   *ifp = adapter->ifp;
3017
3018	IGB_TX_LOCK_ASSERT(txr);
3019
3020        if (txr->tx_avail == adapter->num_tx_desc)
3021                return FALSE;
3022
3023        num_avail = txr->tx_avail;
3024        first = txr->next_to_clean;
3025        tx_desc = &txr->tx_base[first];
3026        tx_buffer = &txr->tx_buffers[first];
3027	last = tx_buffer->next_eop;
3028        eop_desc = &txr->tx_base[last];
3029
3030	/*
3031	 * What this does is get the index of the
3032	 * first descriptor AFTER the EOP of the
3033	 * first packet, that way we can do the
3034	 * simple comparison on the inner while loop.
3035	 */
3036	if (++last == adapter->num_tx_desc)
3037 		last = 0;
3038	done = last;
3039
3040        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3041            BUS_DMASYNC_POSTREAD);
3042
3043        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3044		/* We clean the range of the packet */
3045		while (first != done) {
3046                	tx_desc->upper.data = 0;
3047                	tx_desc->lower.data = 0;
3048                	tx_desc->buffer_addr = 0;
3049                	num_avail++;
3050
3051			if (tx_buffer->m_head) {
3052				ifp->if_opackets++;
3053				bus_dmamap_sync(txr->txtag,
3054				    tx_buffer->map,
3055				    BUS_DMASYNC_POSTWRITE);
3056				bus_dmamap_unload(txr->txtag,
3057				    tx_buffer->map);
3058
3059                        	m_freem(tx_buffer->m_head);
3060                        	tx_buffer->m_head = NULL;
3061                	}
3062			tx_buffer->next_eop = -1;
3063
3064	                if (++first == adapter->num_tx_desc)
3065				first = 0;
3066
3067	                tx_buffer = &txr->tx_buffers[first];
3068			tx_desc = &txr->tx_base[first];
3069		}
3070		/* See if we can continue to the next packet */
3071		last = tx_buffer->next_eop;
3072		if (last != -1) {
3073        		eop_desc = &txr->tx_base[last];
3074			/* Get new done point */
3075			if (++last == adapter->num_tx_desc) last = 0;
3076			done = last;
3077		} else
3078			break;
3079        }
3080        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3081            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3082
3083        txr->next_to_clean = first;
3084
3085        /*
3086         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3087         * that it is OK to send packets.
3088         * If there are no pending descriptors, clear the timeout. Otherwise,
3089         * if some descriptors have been freed, restart the timeout.
3090         */
3091        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3092                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3093		/* All clean, turn off the timer */
3094                if (num_avail == adapter->num_tx_desc) {
3095			txr->watchdog_timer = 0;
3096        		txr->tx_avail = num_avail;
3097			return FALSE;
3098		}
3099		/* Some cleaned, reset the timer */
3100                else if (num_avail != txr->tx_avail)
3101			txr->watchdog_timer = IGB_TX_TIMEOUT;
3102        }
3103        txr->tx_avail = num_avail;
3104        return TRUE;
3105}
3106
3107
3108/*********************************************************************
3109 *
3110 *  Get a buffer from system mbuf buffer pool.
3111 *
3112 **********************************************************************/
3113static int
3114igb_get_buf(struct rx_ring *rxr, int i)
3115{
3116	struct adapter		*adapter = rxr->adapter;
3117	struct mbuf		*m;
3118	bus_dma_segment_t	segs[1];
3119	bus_dmamap_t		map;
3120	struct igb_buffer	*rx_buffer;
3121	int			error, nsegs;
3122
3123	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3124	if (m == NULL) {
3125		adapter->mbuf_cluster_failed++;
3126		return (ENOBUFS);
3127	}
3128	m->m_len = m->m_pkthdr.len = MCLBYTES;
3129
3130	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3131		m_adj(m, ETHER_ALIGN);
3132
3133	/*
3134	 * Using memory from the mbuf cluster pool, invoke the
3135	 * bus_dma machinery to arrange the memory mapping.
3136	 */
3137	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3138	    rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3139	if (error != 0) {
3140		m_free(m);
3141		return (error);
3142	}
3143
3144	/* If nsegs is wrong then the stack is corrupt. */
3145	KASSERT(nsegs == 1, ("Too many segments returned!"));
3146
3147	rx_buffer = &rxr->rx_buffers[i];
3148	if (rx_buffer->m_head != NULL)
3149		bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3150
3151	map = rx_buffer->map;
3152	rx_buffer->map = rxr->rx_spare_map;
3153	rxr->rx_spare_map = map;
3154	bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3155	rx_buffer->m_head = m;
3156
3157	rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3158	return (0);
3159}
3160
3161
3162/*********************************************************************
3163 *
3164 *  Allocate memory for rx_buffer structures. Since we use one
3165 *  rx_buffer per received packet, the maximum number of rx_buffer's
3166 *  that we'll need is equal to the number of receive descriptors
3167 *  that we've allocated.
3168 *
3169 **********************************************************************/
3170static int
3171igb_allocate_receive_buffers(struct rx_ring *rxr)
3172{
3173	struct	adapter 	*adapter = rxr->adapter;
3174	device_t 		dev = adapter->dev;
3175	struct igb_buffer 	*rxbuf;
3176	int             	i, bsize, error;
3177
3178	bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3179	if (!(rxr->rx_buffers =
3180	    (struct igb_buffer *) malloc(bsize,
3181	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3182		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3183		error = ENOMEM;
3184		goto fail;
3185	}
3186
3187	if ((error = bus_dma_tag_create(NULL,		/* parent */
3188				   PAGE_SIZE, 0,	/* alignment, bounds */
3189				   BUS_SPACE_MAXADDR,	/* lowaddr */
3190				   BUS_SPACE_MAXADDR,	/* highaddr */
3191				   NULL, NULL,		/* filter, filterarg */
3192				   MCLBYTES,		/* maxsize */
3193				   1,			/* nsegments */
3194				   MCLBYTES,		/* maxsegsize */
3195				   0,			/* flags */
3196				   NULL,		/* lockfunc */
3197				   NULL,		/* lockfuncarg */
3198				   &rxr->rxtag))) {
3199		device_printf(dev, "Unable to create RX Small DMA tag\n");
3200		goto fail;
3201	}
3202
3203	/* Create the spare map (used by getbuf) */
3204        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3205	     &rxr->rx_spare_map);
3206	if (error) {
3207		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3208		    __func__, error);
3209		goto fail;
3210	}
3211
3212	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3213		rxbuf = &rxr->rx_buffers[i];
3214		error = bus_dmamap_create(rxr->rxtag,
3215		    BUS_DMA_NOWAIT, &rxbuf->map);
3216		if (error) {
3217			device_printf(dev, "Unable to create Small RX DMA map\n");
3218			goto fail;
3219		}
3220	}
3221
3222	return (0);
3223
3224fail:
3225	/* Frees all, but can handle partial completion */
3226	igb_free_receive_structures(adapter);
3227	return (error);
3228}
3229
3230/*********************************************************************
3231 *
3232 *  Initialize a receive ring and its buffers.
3233 *
3234 **********************************************************************/
3235static int
3236igb_setup_receive_ring(struct rx_ring *rxr)
3237{
3238	struct	adapter	*adapter;
3239	struct igb_buffer *rxbuf;
3240	int j, rsize;
3241
3242	adapter = rxr->adapter;
3243	rsize = roundup2(adapter->num_rx_desc *
3244	    sizeof(union e1000_adv_rx_desc), 4096);
3245	/* Clear the ring contents */
3246	bzero((void *)rxr->rx_base, rsize);
3247
3248	/*
3249	** Free current RX buffers: the size buffer
3250	** that is loaded is indicated by the buffer
3251	** bigbuf value.
3252	*/
3253	for (int i = 0; i < adapter->num_rx_desc; i++) {
3254		rxbuf = &rxr->rx_buffers[i];
3255		if (rxbuf->m_head != NULL) {
3256			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3257			    BUS_DMASYNC_POSTREAD);
3258			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3259			m_freem(rxbuf->m_head);
3260			rxbuf->m_head = NULL;
3261		}
3262	}
3263
3264	for (j = 0; j < adapter->num_rx_desc; j++) {
3265		if (igb_get_buf(rxr, j) == ENOBUFS) {
3266			rxr->rx_buffers[j].m_head = NULL;
3267			rxr->rx_base[j].read.pkt_addr = 0;
3268			goto fail;
3269		}
3270	}
3271
3272	/* Setup our descriptor indices */
3273	rxr->next_to_check = 0;
3274	rxr->last_cleaned = 0;
3275
3276	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3277	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3278
3279	return (0);
3280fail:
3281	/*
3282	 * We need to clean up any buffers allocated so far
3283	 * 'j' is the failing index, decrement it to get the
3284	 * last success.
3285	 */
3286	for (--j; j < 0; j--) {
3287		rxbuf = &rxr->rx_buffers[j];
3288		if (rxbuf->m_head != NULL) {
3289			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3290			    BUS_DMASYNC_POSTREAD);
3291			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3292			m_freem(rxbuf->m_head);
3293			rxbuf->m_head = NULL;
3294		}
3295	}
3296	return (ENOBUFS);
3297}
3298
3299/*********************************************************************
3300 *
3301 *  Initialize all receive rings.
3302 *
3303 **********************************************************************/
3304static int
3305igb_setup_receive_structures(struct adapter *adapter)
3306{
3307	struct rx_ring *rxr = adapter->rx_rings;
3308	int i, j;
3309
3310	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3311		if (igb_setup_receive_ring(rxr))
3312			goto fail;
3313
3314	return (0);
3315fail:
3316	/*
3317	 * Free RX buffers allocated so far, we will only handle
3318	 * the rings that completed, the failing case will have
3319	 * cleaned up for itself. The value of 'i' will be the
3320	 * failed ring so we must pre-decrement it.
3321	 */
3322	rxr = adapter->rx_rings;
3323	for (--i; i > 0; i--, rxr++) {
3324		for (j = 0; j < adapter->num_rx_desc; j++) {
3325			struct igb_buffer *rxbuf;
3326			rxbuf = &rxr->rx_buffers[j];
3327			if (rxbuf->m_head != NULL) {
3328				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3329			  	  BUS_DMASYNC_POSTREAD);
3330				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3331				m_freem(rxbuf->m_head);
3332				rxbuf->m_head = NULL;
3333			}
3334		}
3335	}
3336
3337	return (ENOBUFS);
3338}
3339
3340/*********************************************************************
3341 *
3342 *  Enable receive unit.
3343 *
3344 **********************************************************************/
3345static void
3346igb_initialize_receive_units(struct adapter *adapter)
3347{
3348	struct rx_ring	*rxr = adapter->rx_rings;
3349	struct ifnet	*ifp = adapter->ifp;
3350	u32		rctl, rxcsum, psize;
3351
3352	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3353
3354	/*
3355	 * Make sure receives are disabled while setting
3356	 * up the descriptor ring
3357	 */
3358	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3359	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3360
3361	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3362	    adapter->rx_abs_int_delay.value);
3363
3364	/* Setup the Base and Length of the Rx Descriptor Rings */
3365	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3366		u64 bus_addr = rxr->rxdma.dma_paddr;
3367		u32 rxdctl, srrctl;
3368
3369		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3370		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3371		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3372		    (uint32_t)(bus_addr >> 32));
3373		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3374		    (uint32_t)bus_addr);
3375		/* Use Advanced Descriptor type */
3376		srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3377		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3378		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3379		/* Enable this Queue */
3380		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3381		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3382		rxdctl &= 0xFFF00000;
3383		rxdctl |= IGB_RX_PTHRESH;
3384		rxdctl |= IGB_RX_HTHRESH << 8;
3385		rxdctl |= IGB_RX_WTHRESH << 16;
3386		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3387	}
3388
3389	/*
3390	** Setup for RX MultiQueue
3391	*/
3392	if (adapter->num_rx_queues >1) {
3393		u32 random[10], mrqc, shift = 0;
3394		union igb_reta {
3395			u32 dword;
3396			u8  bytes[4];
3397		} reta;
3398
3399		arc4rand(&random, sizeof(random), 0);
3400		if (adapter->hw.mac.type == e1000_82575)
3401			shift = 6;
3402		/* Warning FM follows */
3403		for (int i = 0; i < 128; i++) {
3404			reta.bytes[i & 3] =
3405			    (i % adapter->num_rx_queues) << shift;
3406			if ((i & 3) == 3)
3407				E1000_WRITE_REG(&adapter->hw,
3408				    E1000_RETA(i & ~3), reta.dword);
3409		}
3410		/* Now fill in hash table */
3411		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3412		for (int i = 0; i < 10; i++)
3413			E1000_WRITE_REG_ARRAY(&adapter->hw,
3414			    E1000_RSSRK(0), i, random[i]);
3415
3416		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3417		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3418		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3419		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3420		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3421		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3422		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3423		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3424
3425		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3426
3427		/*
3428		** NOTE: Receive Full-Packet Checksum Offload
3429		** is mutually exclusive with Multiqueue. However
3430		** this is not the same as TCP/IP checksums which
3431		** still work.
3432		*/
3433		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3434		rxcsum |= E1000_RXCSUM_PCSD;
3435		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3436	} else if (ifp->if_capenable & IFCAP_RXCSUM) {
3437		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3438		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3439		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3440	}
3441
3442	/* Setup the Receive Control Register */
3443	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3444	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3445		   E1000_RCTL_RDMTS_HALF |
3446		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3447
3448	/* Make sure VLAN Filters are off */
3449	rctl &= ~E1000_RCTL_VFE;
3450
3451	rctl &= ~E1000_RCTL_SBP;
3452
3453	switch (adapter->rx_buffer_len) {
3454	default:
3455	case 2048:
3456		rctl |= E1000_RCTL_SZ_2048;
3457		break;
3458	case 4096:
3459		rctl |= E1000_RCTL_SZ_4096 |
3460		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3461		break;
3462	case 8192:
3463		rctl |= E1000_RCTL_SZ_8192 |
3464		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3465		break;
3466	case 16384:
3467		rctl |= E1000_RCTL_SZ_16384 |
3468		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3469		break;
3470	}
3471
3472	if (ifp->if_mtu > ETHERMTU) {
3473		/* Set maximum packet len */
3474		psize = adapter->max_frame_size;
3475		/* are we on a vlan? */
3476		if (adapter->ifp->if_vlantrunk != NULL)
3477			psize += VLAN_TAG_SIZE;
3478		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3479		rctl |= E1000_RCTL_LPE;
3480	} else
3481		rctl &= ~E1000_RCTL_LPE;
3482
3483	/* Enable Receives */
3484	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3485
3486	/*
3487	 * Setup the HW Rx Head and Tail Descriptor Pointers
3488	 *   - needs to be after enable
3489	 */
3490	for (int i = 0; i < adapter->num_rx_queues; i++) {
3491		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3492		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3493		     adapter->num_rx_desc - 1);
3494	}
3495	return;
3496}
3497
3498/*********************************************************************
3499 *
3500 *  Free receive rings.
3501 *
3502 **********************************************************************/
3503static void
3504igb_free_receive_structures(struct adapter *adapter)
3505{
3506	struct rx_ring *rxr = adapter->rx_rings;
3507
3508	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3509		igb_free_receive_buffers(rxr);
3510		igb_dma_free(adapter, &rxr->rxdma);
3511	}
3512
3513	free(adapter->rx_rings, M_DEVBUF);
3514}
3515
3516/*********************************************************************
3517 *
3518 *  Free receive ring data structures.
3519 *
3520 **********************************************************************/
3521static void
3522igb_free_receive_buffers(struct rx_ring *rxr)
3523{
3524	struct adapter	*adapter = rxr->adapter;
3525	struct igb_buffer *rx_buffer;
3526
3527	INIT_DEBUGOUT("free_receive_structures: begin");
3528
3529	if (rxr->rx_spare_map) {
3530		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3531		rxr->rx_spare_map = NULL;
3532	}
3533
3534	/* Cleanup any existing buffers */
3535	if (rxr->rx_buffers != NULL) {
3536		rx_buffer = &rxr->rx_buffers[0];
3537		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3538			if (rx_buffer->m_head != NULL) {
3539				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3540				    BUS_DMASYNC_POSTREAD);
3541				bus_dmamap_unload(rxr->rxtag,
3542				    rx_buffer->map);
3543				m_freem(rx_buffer->m_head);
3544				rx_buffer->m_head = NULL;
3545			} else if (rx_buffer->map != NULL)
3546				bus_dmamap_unload(rxr->rxtag,
3547				    rx_buffer->map);
3548			if (rx_buffer->map != NULL) {
3549				bus_dmamap_destroy(rxr->rxtag,
3550				    rx_buffer->map);
3551				rx_buffer->map = NULL;
3552			}
3553		}
3554	}
3555
3556	if (rxr->rx_buffers != NULL) {
3557		free(rxr->rx_buffers, M_DEVBUF);
3558		rxr->rx_buffers = NULL;
3559	}
3560
3561	if (rxr->rxtag != NULL) {
3562		bus_dma_tag_destroy(rxr->rxtag);
3563		rxr->rxtag = NULL;
3564	}
3565}
3566/*********************************************************************
3567 *
3568 *  This routine executes in interrupt context. It replenishes
3569 *  the mbufs in the descriptor and sends data which has been
3570 *  dma'ed into host memory to upper layer.
3571 *
3572 *  We loop at most count times if count is > 0, or until done if
3573 *  count < 0.
3574 *
3575 *  Return TRUE if all clean, FALSE otherwise
3576 *********************************************************************/
3577static bool
3578igb_rxeof(struct rx_ring *rxr, int count)
3579{
3580	struct adapter	*adapter = rxr->adapter;
3581	struct ifnet	*ifp;
3582	struct mbuf	*mp;
3583	uint8_t		accept_frame = 0;
3584	uint8_t		eop = 0;
3585	uint16_t 	len, desc_len, prev_len_adj;
3586	int		i;
3587	union e1000_adv_rx_desc   *cur;
3588	u32		staterr;
3589
3590	IGB_RX_LOCK(rxr);
3591	ifp = adapter->ifp;
3592	i = rxr->next_to_check;
3593	cur = &rxr->rx_base[i];
3594	staterr = cur->wb.upper.status_error;
3595
3596	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3597	    BUS_DMASYNC_POSTREAD);
3598
3599	if (!(staterr & E1000_RXD_STAT_DD)) {
3600		IGB_RX_UNLOCK(rxr);
3601		return FALSE;
3602	}
3603
3604	while ((staterr & E1000_RXD_STAT_DD) &&
3605	    (count != 0) &&
3606	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3607		struct mbuf *m = NULL;
3608
3609		mp = rxr->rx_buffers[i].m_head;
3610		/*
3611		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3612		 * needs to access the last received byte in the mbuf.
3613		 */
3614		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3615		    BUS_DMASYNC_POSTREAD);
3616
3617		accept_frame = 1;
3618		prev_len_adj = 0;
3619		desc_len = le16toh(cur->wb.upper.length);
3620		if (staterr & E1000_RXD_STAT_EOP) {
3621			count--;
3622			eop = 1;
3623			if (desc_len < ETHER_CRC_LEN) {
3624				len = 0;
3625				prev_len_adj = ETHER_CRC_LEN - desc_len;
3626			} else
3627				len = desc_len - ETHER_CRC_LEN;
3628		} else {
3629			eop = 0;
3630			len = desc_len;
3631		}
3632
3633		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3634			u32	pkt_len = desc_len;
3635
3636			if (rxr->fmp != NULL)
3637				pkt_len += rxr->fmp->m_pkthdr.len;
3638
3639			accept_frame = 0;
3640		}
3641
3642		if (accept_frame) {
3643			if (igb_get_buf(rxr, i) != 0) {
3644				ifp->if_iqdrops++;
3645				goto discard;
3646			}
3647
3648			/* Assign correct length to the current fragment */
3649			mp->m_len = len;
3650
3651			if (rxr->fmp == NULL) {
3652				mp->m_pkthdr.len = len;
3653				rxr->fmp = mp; /* Store the first mbuf */
3654				rxr->lmp = mp;
3655			} else {
3656				/* Chain mbuf's together */
3657				mp->m_flags &= ~M_PKTHDR;
3658				/*
3659				 * Adjust length of previous mbuf in chain if
3660				 * we received less than 4 bytes in the last
3661				 * descriptor.
3662				 */
3663				if (prev_len_adj > 0) {
3664					rxr->lmp->m_len -= prev_len_adj;
3665					rxr->fmp->m_pkthdr.len -=
3666					    prev_len_adj;
3667				}
3668				rxr->lmp->m_next = mp;
3669				rxr->lmp = rxr->lmp->m_next;
3670				rxr->fmp->m_pkthdr.len += len;
3671			}
3672
3673			if (eop) {
3674				rxr->fmp->m_pkthdr.rcvif = ifp;
3675				ifp->if_ipackets++;
3676				rxr->rx_packets++;
3677				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
3678
3679				igb_rx_checksum(staterr, rxr->fmp);
3680#ifndef __NO_STRICT_ALIGNMENT
3681				if (adapter->max_frame_size >
3682				    (MCLBYTES - ETHER_ALIGN) &&
3683				    igb_fixup_rx(rxr) != 0)
3684					goto skip;
3685#endif
3686				if (staterr & E1000_RXD_STAT_VP) {
3687					rxr->fmp->m_pkthdr.ether_vtag =
3688					    le16toh(cur->wb.upper.vlan);
3689					rxr->fmp->m_flags |= M_VLANTAG;
3690				}
3691#ifndef __NO_STRICT_ALIGNMENT
3692skip:
3693#endif
3694				m = rxr->fmp;
3695				rxr->fmp = NULL;
3696				rxr->lmp = NULL;
3697			}
3698		} else {
3699			ifp->if_ierrors++;
3700discard:
3701			/* Reuse loaded DMA map and just update mbuf chain */
3702			mp = rxr->rx_buffers[i].m_head;
3703			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3704			mp->m_data = mp->m_ext.ext_buf;
3705			mp->m_next = NULL;
3706			if (adapter->max_frame_size <=
3707			    (MCLBYTES - ETHER_ALIGN))
3708				m_adj(mp, ETHER_ALIGN);
3709			if (rxr->fmp != NULL) {
3710				m_freem(rxr->fmp);
3711				rxr->fmp = NULL;
3712				rxr->lmp = NULL;
3713			}
3714			m = NULL;
3715		}
3716
3717		/* Zero out the receive descriptors status. */
3718		cur->wb.upper.status_error = 0;
3719		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3720		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3721
3722		rxr->last_cleaned = i; /* For updating tail */
3723
3724		/* Advance our pointers to the next descriptor. */
3725		if (++i == adapter->num_rx_desc)
3726			i = 0;
3727
3728		if (m != NULL) {
3729			rxr->next_to_check = i;
3730			/* Pass up to the stack */
3731			IGB_RX_UNLOCK(rxr);
3732			(*ifp->if_input)(ifp, m);
3733			IGB_RX_LOCK(rxr);
3734			i = rxr->next_to_check;
3735		}
3736		/* Get the next descriptor */
3737		cur = &rxr->rx_base[i];
3738		staterr = cur->wb.upper.status_error;
3739	}
3740	rxr->next_to_check = i;
3741
3742	if (--i < 0)
3743		i = adapter->num_rx_desc - 1;
3744
3745	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3746	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
3747	IGB_RX_UNLOCK(rxr);
3748
3749	if (!((staterr) & E1000_RXD_STAT_DD))
3750		return FALSE;
3751
3752	return TRUE;
3753}
3754
3755#ifndef __NO_STRICT_ALIGNMENT
3756/*
3757 * When jumbo frames are enabled we should realign entire payload on
3758 * architecures with strict alignment. This is serious design mistake of 8254x
3759 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3760 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3761 * payload. On architecures without strict alignment restrictions 8254x still
3762 * performs unaligned memory access which would reduce the performance too.
3763 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3764 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3765 * existing mbuf chain.
3766 *
3767 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3768 * not used at all on architectures with strict alignment.
3769 */
3770static int
3771igb_fixup_rx(struct rx_ring *rxr)
3772{
3773	struct adapter *adapter = rxr->adapter;
3774	struct mbuf *m, *n;
3775	int error;
3776
3777	error = 0;
3778	m = rxr->fmp;
3779	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3780		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3781		m->m_data += ETHER_HDR_LEN;
3782	} else {
3783		MGETHDR(n, M_DONTWAIT, MT_DATA);
3784		if (n != NULL) {
3785			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3786			m->m_data += ETHER_HDR_LEN;
3787			m->m_len -= ETHER_HDR_LEN;
3788			n->m_len = ETHER_HDR_LEN;
3789			M_MOVE_PKTHDR(n, m);
3790			n->m_next = m;
3791			rxr->fmp = n;
3792		} else {
3793			adapter->dropped_pkts++;
3794			m_freem(rxr->fmp);
3795			rxr->fmp = NULL;
3796			error = ENOMEM;
3797		}
3798	}
3799
3800	return (error);
3801}
3802#endif
3803
3804/*********************************************************************
3805 *
3806 *  Verify that the hardware indicated that the checksum is valid.
3807 *  Inform the stack about the status of checksum so that stack
3808 *  doesn't spend time verifying the checksum.
3809 *
3810 *********************************************************************/
3811static void
3812igb_rx_checksum(u32 staterr, struct mbuf *mp)
3813{
3814	u16 status = (u16)staterr;
3815	u8  errors = (u8) (staterr >> 24);
3816
3817	/* Ignore Checksum bit is set */
3818	if (status & E1000_RXD_STAT_IXSM) {
3819		mp->m_pkthdr.csum_flags = 0;
3820		return;
3821	}
3822
3823	if (status & E1000_RXD_STAT_IPCS) {
3824		/* Did it pass? */
3825		if (!(errors & E1000_RXD_ERR_IPE)) {
3826			/* IP Checksum Good */
3827			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3828			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3829
3830		} else
3831			mp->m_pkthdr.csum_flags = 0;
3832	}
3833
3834	if (status & E1000_RXD_STAT_TCPCS) {
3835		/* Did it pass? */
3836		if (!(errors & E1000_RXD_ERR_TCPE)) {
3837			mp->m_pkthdr.csum_flags |=
3838			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3839			mp->m_pkthdr.csum_data = htons(0xffff);
3840		}
3841	}
3842	return;
3843}
3844
3845/*
3846 * This turns on the hardware offload of the VLAN
3847 * tag insertion and strip
3848 */
3849static void
3850igb_enable_hw_vlans(struct adapter *adapter)
3851{
3852	uint32_t ctrl;
3853
3854	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
3855	ctrl |= E1000_CTRL_VME;
3856	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
3857}
3858
3859static void
3860igb_enable_intr(struct adapter *adapter)
3861{
3862	/* With RSS set up what to auto clear */
3863	if (adapter->msix_mem) {
3864		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
3865		    adapter->eims_mask);
3866		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
3867		    adapter->eims_mask);
3868		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3869		    E1000_IMS_LSC);
3870	} else {
3871		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3872		    IMS_ENABLE_MASK);
3873	}
3874	E1000_WRITE_FLUSH(&adapter->hw);
3875
3876	return;
3877}
3878
3879static void
3880igb_disable_intr(struct adapter *adapter)
3881{
3882	if (adapter->msix_mem) {
3883		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
3884		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
3885	}
3886		E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
3887	E1000_WRITE_FLUSH(&adapter->hw);
3888	return;
3889}
3890
3891/*
3892 * Bit of a misnomer, what this really means is
3893 * to enable OS management of the system... aka
3894 * to disable special hardware management features
3895 */
3896static void
3897igb_init_manageability(struct adapter *adapter)
3898{
3899	/* A shared code workaround */
3900#define E1000_82542_MANC2H E1000_MANC2H
3901	if (adapter->has_manage) {
3902		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
3903		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3904
3905		/* disable hardware interception of ARP */
3906		manc &= ~(E1000_MANC_ARP_EN);
3907
3908                /* enable receiving management packets to the host */
3909		manc |= E1000_MANC_EN_MNG2HOST;
3910#define E1000_MNG2HOST_PORT_623 (1 << 5)
3911#define E1000_MNG2HOST_PORT_664 (1 << 6)
3912		manc2h |= E1000_MNG2HOST_PORT_623;
3913		manc2h |= E1000_MNG2HOST_PORT_664;
3914		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
3915
3916		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3917	}
3918}
3919
3920/*
3921 * Give control back to hardware management
3922 * controller if there is one.
3923 */
3924static void
3925igb_release_manageability(struct adapter *adapter)
3926{
3927	if (adapter->has_manage) {
3928		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3929
3930		/* re-enable hardware interception of ARP */
3931		manc |= E1000_MANC_ARP_EN;
3932		manc &= ~E1000_MANC_EN_MNG2HOST;
3933
3934		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3935	}
3936}
3937
3938/*
3939 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
3940 * For ASF and Pass Through versions of f/w this means that
3941 * the driver is loaded.
3942 *
3943 */
3944static void
3945igb_get_hw_control(struct adapter *adapter)
3946{
3947	u32 ctrl_ext;
3948
3949	/* Let firmware know the driver has taken over */
3950	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3951	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3952	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
3953}
3954
3955/*
3956 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
3957 * For ASF and Pass Through versions of f/w this means that the
3958 * driver is no longer loaded.
3959 *
3960 */
3961static void
3962igb_release_hw_control(struct adapter *adapter)
3963{
3964	u32 ctrl_ext;
3965
3966	/* Let firmware taken over control of h/w */
3967	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3968	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3969	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
3970}
3971
3972static int
3973igb_is_valid_ether_addr(uint8_t *addr)
3974{
3975	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3976
3977	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3978		return (FALSE);
3979	}
3980
3981	return (TRUE);
3982}
3983
3984/*
3985 * NOTE: the following routines using the e1000
3986 * 	naming style are provided to the shared
3987 *	code which expects that rather than 'em'
3988 */
3989
3990void
3991e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
3992{
3993	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
3994}
3995
3996void
3997e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
3998{
3999	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4000}
4001
4002void
4003e1000_pci_set_mwi(struct e1000_hw *hw)
4004{
4005	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4006	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4007}
4008
4009void
4010e1000_pci_clear_mwi(struct e1000_hw *hw)
4011{
4012	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4013	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4014}
4015
4016/*
4017 * Read the PCI Express capabilities
4018 */
4019int32_t
4020e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4021{
4022	u32	result;
4023
4024	pci_find_extcap(((struct e1000_osdep *)hw->back)->dev,
4025	    reg, &result);
4026	*value = (u16)result;
4027	return (E1000_SUCCESS);
4028}
4029
4030int32_t
4031e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4032{
4033	int32_t error = 0;
4034
4035	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4036	if (hw->dev_spec == NULL)
4037		error = ENOMEM;
4038
4039	return (error);
4040}
4041
4042void
4043e1000_free_dev_spec_struct(struct e1000_hw *hw)
4044{
4045	if (hw->dev_spec != NULL)
4046		free(hw->dev_spec, M_DEVBUF);
4047	return;
4048}
4049
4050/*
4051 * Enable PCI Wake On Lan capability
4052 */
4053void
4054igb_enable_wakeup(device_t dev)
4055{
4056	u16     cap, status;
4057	u8      id;
4058
4059	/* First find the capabilities pointer*/
4060	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4061	/* Read the PM Capabilities */
4062	id = pci_read_config(dev, cap, 1);
4063	if (id != PCIY_PMG)     /* Something wrong */
4064		return;
4065	/* OK, we have the power capabilities, so
4066	   now get the status register */
4067	cap += PCIR_POWER_STATUS;
4068	status = pci_read_config(dev, cap, 2);
4069	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4070	pci_write_config(dev, cap, status, 2);
4071	return;
4072}
4073
4074
4075/**********************************************************************
4076 *
4077 *  Update the board statistics counters.
4078 *
4079 **********************************************************************/
4080static void
4081igb_update_stats_counters(struct adapter *adapter)
4082{
4083	struct ifnet   *ifp;
4084
4085	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4086	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4087		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4088		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4089	}
4090	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4091	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4092	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4093	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4094
4095	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4096	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4097	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4098	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4099	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4100	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4101	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4102	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4103	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4104	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4105	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4106	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4107	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4108	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4109	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4110	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4111	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4112	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4113	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4114	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4115
4116	/* For the 64-bit byte counters the low dword must be read first. */
4117	/* Both registers clear on the read of the high dword */
4118
4119	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4120	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4121
4122	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4123	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4124	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4125	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4126	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4127
4128	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4129	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4130
4131	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4132	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4133	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4134	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4135	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4136	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4137	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4138	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4139	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4140	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4141
4142	adapter->stats.algnerrc +=
4143		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4144	adapter->stats.rxerrc +=
4145		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4146	adapter->stats.tncrs +=
4147		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4148	adapter->stats.cexterr +=
4149		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4150	adapter->stats.tsctc +=
4151		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4152	adapter->stats.tsctfc +=
4153		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4154	ifp = adapter->ifp;
4155
4156	ifp->if_collisions = adapter->stats.colc;
4157
4158	/* Rx Errors */
4159	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4160	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4161	    adapter->stats.ruc + adapter->stats.roc +
4162	    adapter->stats.mpc + adapter->stats.cexterr;
4163
4164	/* Tx Errors */
4165	ifp->if_oerrors = adapter->stats.ecol +
4166	    adapter->stats.latecol + adapter->watchdog_events;
4167}
4168
4169
4170/**********************************************************************
4171 *
4172 *  This routine is called only when igb_display_debug_stats is enabled.
4173 *  This routine provides a way to take a look at important statistics
4174 *  maintained by the driver and hardware.
4175 *
4176 **********************************************************************/
4177static void
4178igb_print_debug_info(struct adapter *adapter)
4179{
4180	device_t dev = adapter->dev;
4181	struct rx_ring *rxr = adapter->rx_rings;
4182	struct tx_ring *txr = adapter->tx_rings;
4183	uint8_t *hw_addr = adapter->hw.hw_addr;
4184
4185	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4186	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4187	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4188	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4189	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4190	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4191	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4192	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4193	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4194	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4195	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4196	    adapter->hw.fc.high_water,
4197	    adapter->hw.fc.low_water);
4198	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4199	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4200	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4201	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4202	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4203	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4204
4205	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4206		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4207		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4208		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4209		device_printf(dev, "no descriptors avail event = %lu\n",
4210		    txr->no_desc_avail);
4211		device_printf(dev, "TX(%d) IRQ Handled = %lu\n", txr->me,
4212		    txr->tx_irq);
4213		device_printf(dev, "TX(%d) Packets sent = %lu\n", txr->me,
4214		    txr->tx_packets);
4215	}
4216
4217	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4218		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4219		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4220		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4221		device_printf(dev, "RX(%d) Packets received = %lu\n", rxr->me,
4222		    rxr->rx_packets);
4223		device_printf(dev, "RX(%d) Byte count = %lu\n", rxr->me,
4224		    rxr->rx_bytes);
4225		device_printf(dev, "RX(%d) IRQ Handled = %lu\n", rxr->me,
4226		    rxr->rx_irq);
4227	}
4228	device_printf(dev, "LINK IRQ Handled = %u\n", adapter->link_irq);
4229
4230	device_printf(dev, "Std mbuf failed = %ld\n",
4231	    adapter->mbuf_alloc_failed);
4232	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4233	    adapter->mbuf_cluster_failed);
4234	device_printf(dev, "Driver dropped packets = %ld\n",
4235	    adapter->dropped_pkts);
4236	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4237		adapter->no_tx_dma_setup);
4238}
4239
4240static void
4241igb_print_hw_stats(struct adapter *adapter)
4242{
4243	device_t dev = adapter->dev;
4244
4245	device_printf(dev, "Excessive collisions = %lld\n",
4246	    (long long)adapter->stats.ecol);
4247#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4248	device_printf(dev, "Symbol errors = %lld\n",
4249	    (long long)adapter->stats.symerrs);
4250#endif
4251	device_printf(dev, "Sequence errors = %lld\n",
4252	    (long long)adapter->stats.sec);
4253	device_printf(dev, "Defer count = %lld\n",
4254	    (long long)adapter->stats.dc);
4255	device_printf(dev, "Missed Packets = %lld\n",
4256	    (long long)adapter->stats.mpc);
4257	device_printf(dev, "Receive No Buffers = %lld\n",
4258	    (long long)adapter->stats.rnbc);
4259	/* RLEC is inaccurate on some hardware, calculate our own. */
4260	device_printf(dev, "Receive Length Errors = %lld\n",
4261	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4262	device_printf(dev, "Receive errors = %lld\n",
4263	    (long long)adapter->stats.rxerrc);
4264	device_printf(dev, "Crc errors = %lld\n",
4265	    (long long)adapter->stats.crcerrs);
4266	device_printf(dev, "Alignment errors = %lld\n",
4267	    (long long)adapter->stats.algnerrc);
4268	/* On 82575 these are collision counts */
4269	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4270	    (long long)adapter->stats.cexterr);
4271	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4272	device_printf(dev, "watchdog timeouts = %ld\n",
4273	    adapter->watchdog_events);
4274	device_printf(dev, "XON Rcvd = %lld\n",
4275	    (long long)adapter->stats.xonrxc);
4276	device_printf(dev, "XON Xmtd = %lld\n",
4277	    (long long)adapter->stats.xontxc);
4278	device_printf(dev, "XOFF Rcvd = %lld\n",
4279	    (long long)adapter->stats.xoffrxc);
4280	device_printf(dev, "XOFF Xmtd = %lld\n",
4281	    (long long)adapter->stats.xofftxc);
4282	device_printf(dev, "Good Packets Rcvd = %lld\n",
4283	    (long long)adapter->stats.gprc);
4284	device_printf(dev, "Good Packets Xmtd = %lld\n",
4285	    (long long)adapter->stats.gptc);
4286	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4287	    (long long)adapter->stats.tsctc);
4288	device_printf(dev, "TSO Contexts Failed = %lld\n",
4289	    (long long)adapter->stats.tsctfc);
4290}
4291
4292/**********************************************************************
4293 *
4294 *  This routine provides a way to dump out the adapter eeprom,
4295 *  often a useful debug/service tool. This only dumps the first
4296 *  32 words, stuff that matters is in that extent.
4297 *
4298 **********************************************************************/
4299static void
4300igb_print_nvm_info(struct adapter *adapter)
4301{
4302	u16	eeprom_data;
4303	int	i, j, row = 0;
4304
4305	/* Its a bit crude, but it gets the job done */
4306	printf("\nInterface EEPROM Dump:\n");
4307	printf("Offset\n0x0000  ");
4308	for (i = 0, j = 0; i < 32; i++, j++) {
4309		if (j == 8) { /* Make the offset block */
4310			j = 0; ++row;
4311			printf("\n0x00%x0  ",row);
4312		}
4313		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4314		printf("%04x ", eeprom_data);
4315	}
4316	printf("\n");
4317}
4318
4319static int
4320igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4321{
4322	struct adapter *adapter;
4323	int error;
4324	int result;
4325
4326	result = -1;
4327	error = sysctl_handle_int(oidp, &result, 0, req);
4328
4329	if (error || !req->newptr)
4330		return (error);
4331
4332	if (result == 1) {
4333		adapter = (struct adapter *)arg1;
4334		igb_print_debug_info(adapter);
4335	}
4336	/*
4337	 * This value will cause a hex dump of the
4338	 * first 32 16-bit words of the EEPROM to
4339	 * the screen.
4340	 */
4341	if (result == 2) {
4342		adapter = (struct adapter *)arg1;
4343		igb_print_nvm_info(adapter);
4344        }
4345
4346	return (error);
4347}
4348
4349
4350static int
4351igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4352{
4353	struct adapter *adapter;
4354	int error;
4355	int result;
4356
4357	result = -1;
4358	error = sysctl_handle_int(oidp, &result, 0, req);
4359
4360	if (error || !req->newptr)
4361		return (error);
4362
4363	if (result == 1) {
4364		adapter = (struct adapter *)arg1;
4365		igb_print_hw_stats(adapter);
4366	}
4367
4368	return (error);
4369}
4370
4371static int
4372igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4373{
4374	struct igb_int_delay_info *info;
4375	struct adapter *adapter;
4376	uint32_t regval;
4377	int error;
4378	int usecs;
4379	int ticks;
4380
4381	info = (struct igb_int_delay_info *)arg1;
4382	usecs = info->value;
4383	error = sysctl_handle_int(oidp, &usecs, 0, req);
4384	if (error != 0 || req->newptr == NULL)
4385		return (error);
4386	if (usecs < 0 || usecs > IGB_TICKS_TO_USECS(65535))
4387		return (EINVAL);
4388	info->value = usecs;
4389	ticks = IGB_USECS_TO_TICKS(usecs);
4390
4391	adapter = info->adapter;
4392
4393	IGB_CORE_LOCK(adapter);
4394	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4395	regval = (regval & ~0xffff) | (ticks & 0xffff);
4396	/* Handle a few special cases. */
4397	switch (info->offset) {
4398	case E1000_RDTR:
4399		break;
4400	case E1000_TIDV:
4401		if (ticks == 0) {
4402			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4403			/* Don't write 0 into the TIDV register. */
4404			regval++;
4405		} else
4406			if (adapter->hw.mac.type < e1000_82575)
4407				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4408		break;
4409	}
4410	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4411	IGB_CORE_UNLOCK(adapter);
4412	return (0);
4413}
4414
4415static void
4416igb_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4417	const char *description, struct igb_int_delay_info *info,
4418	int offset, int value)
4419{
4420	info->adapter = adapter;
4421	info->offset = offset;
4422	info->value = value;
4423	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4424	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4425	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4426	    info, 0, igb_sysctl_int_delay, "I", description);
4427}
4428
4429static void
4430igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4431	const char *description, int *limit, int value)
4432{
4433	*limit = value;
4434	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4435	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4436	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4437}
4438
4439
4440