if_igb.c revision 206023
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 206023 2010-03-31 23:24:42Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 1.9.3";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	/* required last entry */
138	{ 0, 0, 0, 0, 0}
139};
140
141/*********************************************************************
142 *  Table of branding strings for all supported NICs.
143 *********************************************************************/
144
145static char *igb_strings[] = {
146	"Intel(R) PRO/1000 Network Connection"
147};
148
149/*********************************************************************
150 *  Function prototypes
151 *********************************************************************/
152static int	igb_probe(device_t);
153static int	igb_attach(device_t);
154static int	igb_detach(device_t);
155static int	igb_shutdown(device_t);
156static int	igb_suspend(device_t);
157static int	igb_resume(device_t);
158static void	igb_start(struct ifnet *);
159static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160#if __FreeBSD_version >= 800000
161static int	igb_mq_start(struct ifnet *, struct mbuf *);
162static int	igb_mq_start_locked(struct ifnet *,
163		    struct tx_ring *, struct mbuf *);
164static void	igb_qflush(struct ifnet *);
165#endif
166static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
167static void	igb_init(void *);
168static void	igb_init_locked(struct adapter *);
169static void	igb_stop(void *);
170static void	igb_media_status(struct ifnet *, struct ifmediareq *);
171static int	igb_media_change(struct ifnet *);
172static void	igb_identify_hardware(struct adapter *);
173static int	igb_allocate_pci_resources(struct adapter *);
174static int	igb_allocate_msix(struct adapter *);
175static int	igb_allocate_legacy(struct adapter *);
176static int	igb_setup_msix(struct adapter *);
177static void	igb_free_pci_resources(struct adapter *);
178static void	igb_local_timer(void *);
179static void	igb_reset(struct adapter *);
180static void	igb_setup_interface(device_t, struct adapter *);
181static int	igb_allocate_queues(struct adapter *);
182static void	igb_configure_queues(struct adapter *);
183
184static int	igb_allocate_transmit_buffers(struct tx_ring *);
185static void	igb_setup_transmit_structures(struct adapter *);
186static void	igb_setup_transmit_ring(struct tx_ring *);
187static void	igb_initialize_transmit_units(struct adapter *);
188static void	igb_free_transmit_structures(struct adapter *);
189static void	igb_free_transmit_buffers(struct tx_ring *);
190
191static int	igb_allocate_receive_buffers(struct rx_ring *);
192static int	igb_setup_receive_structures(struct adapter *);
193static int	igb_setup_receive_ring(struct rx_ring *);
194static void	igb_initialize_receive_units(struct adapter *);
195static void	igb_free_receive_structures(struct adapter *);
196static void	igb_free_receive_buffers(struct rx_ring *);
197static void	igb_free_receive_ring(struct rx_ring *);
198
199static void	igb_enable_intr(struct adapter *);
200static void	igb_disable_intr(struct adapter *);
201static void	igb_update_stats_counters(struct adapter *);
202static bool	igb_txeof(struct tx_ring *);
203
204static __inline	void igb_rx_discard(struct rx_ring *, int);
205static __inline void igb_rx_input(struct rx_ring *,
206		    struct ifnet *, struct mbuf *, u32);
207
208static bool	igb_rxeof(struct igb_queue *, int);
209static void	igb_rx_checksum(u32, struct mbuf *, u32);
210static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212static void	igb_set_promisc(struct adapter *);
213static void	igb_disable_promisc(struct adapter *);
214static void	igb_set_multi(struct adapter *);
215static void	igb_print_hw_stats(struct adapter *);
216static void	igb_update_link_status(struct adapter *);
217static void	igb_refresh_mbufs(struct rx_ring *, int);
218
219static void	igb_register_vlan(void *, struct ifnet *, u16);
220static void	igb_unregister_vlan(void *, struct ifnet *, u16);
221static void	igb_setup_vlan_hw_support(struct adapter *);
222
223static int	igb_xmit(struct tx_ring *, struct mbuf **);
224static int	igb_dma_malloc(struct adapter *, bus_size_t,
225		    struct igb_dma_alloc *, int);
226static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227static void	igb_print_debug_info(struct adapter *);
228static void	igb_print_nvm_info(struct adapter *);
229static int 	igb_is_valid_ether_addr(u8 *);
230static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232/* Management and WOL Support */
233static void	igb_init_manageability(struct adapter *);
234static void	igb_release_manageability(struct adapter *);
235static void     igb_get_hw_control(struct adapter *);
236static void     igb_release_hw_control(struct adapter *);
237static void     igb_enable_wakeup(device_t);
238static void     igb_led_func(void *, int);
239
240static int	igb_irq_fast(void *);
241static void	igb_add_rx_process_limit(struct adapter *, const char *,
242		    const char *, int *, int);
243static void	igb_handle_rxtx(void *context, int pending);
244static void	igb_handle_que(void *context, int pending);
245static void	igb_handle_link(void *context, int pending);
246
247/* These are MSIX only irq handlers */
248static void	igb_msix_que(void *);
249static void	igb_msix_link(void *);
250
251#ifdef DEVICE_POLLING
252static poll_handler_t igb_poll;
253#endif /* POLLING */
254
255/*********************************************************************
256 *  FreeBSD Device Interface Entry Points
257 *********************************************************************/
258
259static device_method_t igb_methods[] = {
260	/* Device interface */
261	DEVMETHOD(device_probe, igb_probe),
262	DEVMETHOD(device_attach, igb_attach),
263	DEVMETHOD(device_detach, igb_detach),
264	DEVMETHOD(device_shutdown, igb_shutdown),
265	DEVMETHOD(device_suspend, igb_suspend),
266	DEVMETHOD(device_resume, igb_resume),
267	{0, 0}
268};
269
270static driver_t igb_driver = {
271	"igb", igb_methods, sizeof(struct adapter),
272};
273
274static devclass_t igb_devclass;
275DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276MODULE_DEPEND(igb, pci, 1, 1, 1);
277MODULE_DEPEND(igb, ether, 1, 1, 1);
278
279/*********************************************************************
280 *  Tunable default values.
281 *********************************************************************/
282
283/* Descriptor defaults */
284static int igb_rxd = IGB_DEFAULT_RXD;
285static int igb_txd = IGB_DEFAULT_TXD;
286TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287TUNABLE_INT("hw.igb.txd", &igb_txd);
288
289/*
290** AIM: Adaptive Interrupt Moderation
291** which means that the interrupt rate
292** is varied over time based on the
293** traffic for that interrupt vector
294*/
295static int igb_enable_aim = TRUE;
296TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297
298/*
299 * MSIX should be the default for best performance,
300 * but this allows it to be forced off for testing.
301 */
302static int igb_enable_msix = 1;
303TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
304
305/*
306 * Header split has seemed to be beneficial in
307 * many circumstances tested, however there have
308 * been some stability issues, so the default is
309 * off.
310 */
311static bool igb_header_split = FALSE;
312TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
313
314/*
315** This will autoconfigure based on
316** the number of CPUs if left at 0.
317*/
318static int igb_num_queues = 0;
319TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
320
321/* How many packets rxeof tries to clean at a time */
322static int igb_rx_process_limit = 100;
323TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
324
325/* Flow control setting - default to FULL */
326static int igb_fc_setting = e1000_fc_full;
327TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
328
329/*
330** Shadow VFTA table, this is needed because
331** the real filter table gets cleared during
332** a soft reset and the driver needs to be able
333** to repopulate it.
334*/
335static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
336
337
338/*********************************************************************
339 *  Device identification routine
340 *
341 *  igb_probe determines if the driver should be loaded on
342 *  adapter based on PCI vendor/device id of the adapter.
343 *
344 *  return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
346
347static int
348igb_probe(device_t dev)
349{
350	char		adapter_name[60];
351	uint16_t	pci_vendor_id = 0;
352	uint16_t	pci_device_id = 0;
353	uint16_t	pci_subvendor_id = 0;
354	uint16_t	pci_subdevice_id = 0;
355	igb_vendor_info_t *ent;
356
357	INIT_DEBUGOUT("igb_probe: begin");
358
359	pci_vendor_id = pci_get_vendor(dev);
360	if (pci_vendor_id != IGB_VENDOR_ID)
361		return (ENXIO);
362
363	pci_device_id = pci_get_device(dev);
364	pci_subvendor_id = pci_get_subvendor(dev);
365	pci_subdevice_id = pci_get_subdevice(dev);
366
367	ent = igb_vendor_info_array;
368	while (ent->vendor_id != 0) {
369		if ((pci_vendor_id == ent->vendor_id) &&
370		    (pci_device_id == ent->device_id) &&
371
372		    ((pci_subvendor_id == ent->subvendor_id) ||
373		    (ent->subvendor_id == PCI_ANY_ID)) &&
374
375		    ((pci_subdevice_id == ent->subdevice_id) ||
376		    (ent->subdevice_id == PCI_ANY_ID))) {
377			sprintf(adapter_name, "%s %s",
378				igb_strings[ent->index],
379				igb_driver_version);
380			device_set_desc_copy(dev, adapter_name);
381			return (BUS_PROBE_DEFAULT);
382		}
383		ent++;
384	}
385
386	return (ENXIO);
387}
388
389/*********************************************************************
390 *  Device initialization routine
391 *
392 *  The attach entry point is called when the driver is being loaded.
393 *  This routine identifies the type of hardware, allocates all resources
394 *  and initializes the hardware.
395 *
396 *  return 0 on success, positive on failure
397 *********************************************************************/
398
399static int
400igb_attach(device_t dev)
401{
402	struct adapter	*adapter;
403	int		error = 0;
404	u16		eeprom_data;
405
406	INIT_DEBUGOUT("igb_attach: begin");
407
408	adapter = device_get_softc(dev);
409	adapter->dev = adapter->osdep.dev = dev;
410	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412	/* SYSCTL stuff */
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    igb_sysctl_debug_info, "I", "Debug Information");
417
418	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421	    igb_sysctl_stats, "I", "Statistics");
422
423	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426	    &igb_fc_setting, 0, "Flow Control");
427
428	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431	    &igb_enable_aim, 1, "Interrupt Moderation");
432
433	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435	/* Determine hardware and mac info */
436	igb_identify_hardware(adapter);
437
438	/* Setup PCI resources */
439	if (igb_allocate_pci_resources(adapter)) {
440		device_printf(dev, "Allocation of PCI resources failed\n");
441		error = ENXIO;
442		goto err_pci;
443	}
444
445	/* Do Shared Code initialization */
446	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447		device_printf(dev, "Setup of Shared code failed\n");
448		error = ENXIO;
449		goto err_pci;
450	}
451
452	e1000_get_bus_info(&adapter->hw);
453
454	/* Sysctls for limiting the amount of work done in the taskqueue */
455	igb_add_rx_process_limit(adapter, "rx_processing_limit",
456	    "max number of rx packets to process", &adapter->rx_process_limit,
457	    igb_rx_process_limit);
458
459	/*
460	 * Validate number of transmit and receive descriptors. It
461	 * must not exceed hardware maximum, and must be multiple
462	 * of E1000_DBA_ALIGN.
463	 */
464	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467		    IGB_DEFAULT_TXD, igb_txd);
468		adapter->num_tx_desc = IGB_DEFAULT_TXD;
469	} else
470		adapter->num_tx_desc = igb_txd;
471	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474		    IGB_DEFAULT_RXD, igb_rxd);
475		adapter->num_rx_desc = IGB_DEFAULT_RXD;
476	} else
477		adapter->num_rx_desc = igb_rxd;
478
479	adapter->hw.mac.autoneg = DO_AUTO_NEG;
480	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483	/* Copper options */
484	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485		adapter->hw.phy.mdix = AUTO_ALL_MODES;
486		adapter->hw.phy.disable_polarity_correction = FALSE;
487		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488	}
489
490	/*
491	 * Set the frame limits assuming
492	 * standard ethernet sized frames.
493	 */
494	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497	/*
498	** Allocate and Setup Queues
499	*/
500	if (igb_allocate_queues(adapter)) {
501		error = ENOMEM;
502		goto err_pci;
503	}
504
505	/*
506	** Start from a known state, this is
507	** important in reading the nvm and
508	** mac from that.
509	*/
510	e1000_reset_hw(&adapter->hw);
511
512	/* Make sure we have a good EEPROM before we read from it */
513	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514		/*
515		** Some PCI-E parts fail the first check due to
516		** the link being in sleep state, call it again,
517		** if it fails a second time its a real issue.
518		*/
519		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520			device_printf(dev,
521			    "The EEPROM Checksum Is Not Valid\n");
522			error = EIO;
523			goto err_late;
524		}
525	}
526
527	/*
528	** Copy the permanent MAC address out of the EEPROM
529	*/
530	if (e1000_read_mac_addr(&adapter->hw) < 0) {
531		device_printf(dev, "EEPROM read error while reading MAC"
532		    " address\n");
533		error = EIO;
534		goto err_late;
535	}
536	/* Check its sanity */
537	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538		device_printf(dev, "Invalid MAC address\n");
539		error = EIO;
540		goto err_late;
541	}
542
543	/*
544	** Configure Interrupts
545	*/
546	if ((adapter->msix > 1) && (igb_enable_msix))
547		error = igb_allocate_msix(adapter);
548	else /* MSI or Legacy */
549		error = igb_allocate_legacy(adapter);
550	if (error)
551		goto err_late;
552
553	/* Setup OS specific network interface */
554	igb_setup_interface(dev, adapter);
555
556	/* Now get a good starting state */
557	igb_reset(adapter);
558
559	/* Initialize statistics */
560	igb_update_stats_counters(adapter);
561
562	adapter->hw.mac.get_link_status = 1;
563	igb_update_link_status(adapter);
564
565	/* Indicate SOL/IDER usage */
566	if (e1000_check_reset_block(&adapter->hw))
567		device_printf(dev,
568		    "PHY reset is blocked due to SOL/IDER session.\n");
569
570	/* Determine if we have to control management hardware */
571	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
572
573	/*
574	 * Setup Wake-on-Lan
575	 */
576	/* APME bit in EEPROM is mapped to WUC.APME */
577	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
578	if (eeprom_data)
579		adapter->wol = E1000_WUFC_MAG;
580
581	/* Register for VLAN events */
582	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586
587	/* Tell the stack that the interface is not active */
588	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
589
590	adapter->led_dev = led_create(igb_led_func, adapter,
591	    device_get_nameunit(dev));
592
593	INIT_DEBUGOUT("igb_attach: end");
594
595	return (0);
596
597err_late:
598	igb_free_transmit_structures(adapter);
599	igb_free_receive_structures(adapter);
600	igb_release_hw_control(adapter);
601err_pci:
602	igb_free_pci_resources(adapter);
603	IGB_CORE_LOCK_DESTROY(adapter);
604
605	return (error);
606}
607
608/*********************************************************************
609 *  Device removal routine
610 *
611 *  The detach entry point is called when the driver is being removed.
612 *  This routine stops the adapter and deallocates all the resources
613 *  that were allocated for driver operation.
614 *
615 *  return 0 on success, positive on failure
616 *********************************************************************/
617
618static int
619igb_detach(device_t dev)
620{
621	struct adapter	*adapter = device_get_softc(dev);
622	struct ifnet	*ifp = adapter->ifp;
623
624	INIT_DEBUGOUT("igb_detach: begin");
625
626	/* Make sure VLANS are not using driver */
627	if (adapter->ifp->if_vlantrunk != NULL) {
628		device_printf(dev,"Vlan in use, detach first\n");
629		return (EBUSY);
630	}
631
632	if (adapter->led_dev != NULL)
633		led_destroy(adapter->led_dev);
634
635#ifdef DEVICE_POLLING
636	if (ifp->if_capenable & IFCAP_POLLING)
637		ether_poll_deregister(ifp);
638#endif
639
640	IGB_CORE_LOCK(adapter);
641	adapter->in_detach = 1;
642	igb_stop(adapter);
643	IGB_CORE_UNLOCK(adapter);
644
645	e1000_phy_hw_reset(&adapter->hw);
646
647	/* Give control back to firmware */
648	igb_release_manageability(adapter);
649	igb_release_hw_control(adapter);
650
651	if (adapter->wol) {
652		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654		igb_enable_wakeup(dev);
655	}
656
657	/* Unregister VLAN events */
658	if (adapter->vlan_attach != NULL)
659		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660	if (adapter->vlan_detach != NULL)
661		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
662
663	ether_ifdetach(adapter->ifp);
664
665	callout_drain(&adapter->timer);
666
667	igb_free_pci_resources(adapter);
668	bus_generic_detach(dev);
669	if_free(ifp);
670
671	igb_free_transmit_structures(adapter);
672	igb_free_receive_structures(adapter);
673
674	IGB_CORE_LOCK_DESTROY(adapter);
675
676	return (0);
677}
678
679/*********************************************************************
680 *
681 *  Shutdown entry point
682 *
683 **********************************************************************/
684
685static int
686igb_shutdown(device_t dev)
687{
688	return igb_suspend(dev);
689}
690
691/*
692 * Suspend/resume device methods.
693 */
694static int
695igb_suspend(device_t dev)
696{
697	struct adapter *adapter = device_get_softc(dev);
698
699	IGB_CORE_LOCK(adapter);
700
701	igb_stop(adapter);
702
703        igb_release_manageability(adapter);
704	igb_release_hw_control(adapter);
705
706        if (adapter->wol) {
707                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709                igb_enable_wakeup(dev);
710        }
711
712	IGB_CORE_UNLOCK(adapter);
713
714	return bus_generic_suspend(dev);
715}
716
717static int
718igb_resume(device_t dev)
719{
720	struct adapter *adapter = device_get_softc(dev);
721	struct ifnet *ifp = adapter->ifp;
722
723	IGB_CORE_LOCK(adapter);
724	igb_init_locked(adapter);
725	igb_init_manageability(adapter);
726
727	if ((ifp->if_flags & IFF_UP) &&
728	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
729		igb_start(ifp);
730
731	IGB_CORE_UNLOCK(adapter);
732
733	return bus_generic_resume(dev);
734}
735
736
737/*********************************************************************
738 *  Transmit entry point
739 *
740 *  igb_start is called by the stack to initiate a transmit.
741 *  The driver will remain in this routine as long as there are
742 *  packets to transmit and transmit resources are available.
743 *  In case resources are not available stack is notified and
744 *  the packet is requeued.
745 **********************************************************************/
746
747static void
748igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749{
750	struct adapter	*adapter = ifp->if_softc;
751	struct mbuf	*m_head;
752
753	IGB_TX_LOCK_ASSERT(txr);
754
755	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
756	    IFF_DRV_RUNNING)
757		return;
758	if (!adapter->link_active)
759		return;
760
761	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
762
763		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
764		if (m_head == NULL)
765			break;
766		/*
767		 *  Encapsulation can modify our pointer, and or make it
768		 *  NULL on failure.  In that event, we can't requeue.
769		 */
770		if (igb_xmit(txr, &m_head)) {
771			if (m_head == NULL)
772				break;
773			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
774			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775			break;
776		}
777
778		/* Send a copy of the frame to the BPF listener */
779		ETHER_BPF_MTAP(ifp, m_head);
780
781		/* Set watchdog on */
782		txr->watchdog_check = TRUE;
783	}
784}
785
786/*
787 * Legacy TX driver routine, called from the
788 * stack, always uses tx[0], and spins for it.
789 * Should not be used with multiqueue tx
790 */
791static void
792igb_start(struct ifnet *ifp)
793{
794	struct adapter	*adapter = ifp->if_softc;
795	struct tx_ring	*txr = adapter->tx_rings;
796
797	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
798		IGB_TX_LOCK(txr);
799		igb_start_locked(txr, ifp);
800		IGB_TX_UNLOCK(txr);
801	}
802	return;
803}
804
805#if __FreeBSD_version >= 800000
806/*
807** Multiqueue Transmit driver
808**
809*/
810static int
811igb_mq_start(struct ifnet *ifp, struct mbuf *m)
812{
813	struct adapter	*adapter = ifp->if_softc;
814	struct tx_ring	*txr;
815	int 		i = 0, err = 0;
816
817	/* Which queue to use */
818	if ((m->m_flags & M_FLOWID) != 0)
819		i = m->m_pkthdr.flowid % adapter->num_queues;
820	else
821		i = curcpu % adapter->num_queues;
822
823	txr = &adapter->tx_rings[i];
824
825	if (IGB_TX_TRYLOCK(txr)) {
826		err = igb_mq_start_locked(ifp, txr, m);
827		IGB_TX_UNLOCK(txr);
828	} else
829		err = drbr_enqueue(ifp, txr->br, m);
830
831	return (err);
832}
833
834static int
835igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
836{
837	struct adapter  *adapter = txr->adapter;
838        struct mbuf     *next;
839        int             err = 0, enq;
840
841	IGB_TX_LOCK_ASSERT(txr);
842
843	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
844	    IFF_DRV_RUNNING || adapter->link_active == 0) {
845		if (m != NULL)
846			err = drbr_enqueue(ifp, txr->br, m);
847		return (err);
848	}
849
850	enq = 0;
851	if (m == NULL) {
852		next = drbr_dequeue(ifp, txr->br);
853	} else if (drbr_needs_enqueue(ifp, txr->br)) {
854		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
855			return (err);
856		next = drbr_dequeue(ifp, txr->br);
857	} else
858		next = m;
859	/* Process the queue */
860	while (next != NULL) {
861		if ((err = igb_xmit(txr, &next)) != 0) {
862			if (next != NULL)
863				err = drbr_enqueue(ifp, txr->br, next);
864			break;
865		}
866		enq++;
867		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
868		ETHER_BPF_MTAP(ifp, next);
869		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
870			break;
871		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
872			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
873			break;
874		}
875		next = drbr_dequeue(ifp, txr->br);
876	}
877	if (enq > 0) {
878		/* Set the watchdog */
879		txr->watchdog_check = TRUE;
880	}
881	return (err);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888igb_qflush(struct ifnet *ifp)
889{
890	struct adapter	*adapter = ifp->if_softc;
891	struct tx_ring	*txr = adapter->tx_rings;
892	struct mbuf	*m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		IGB_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		IGB_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902#endif /* __FreeBSD_version >= 800000 */
903
904/*********************************************************************
905 *  Ioctl entry point
906 *
907 *  igb_ioctl is called when the user wants to configure the
908 *  interface.
909 *
910 *  return 0 on success, positive on failure
911 **********************************************************************/
912
913static int
914igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
915{
916	struct adapter	*adapter = ifp->if_softc;
917	struct ifreq *ifr = (struct ifreq *)data;
918#ifdef INET
919	struct ifaddr *ifa = (struct ifaddr *)data;
920#endif
921	int error = 0;
922
923	if (adapter->in_detach)
924		return (error);
925
926	switch (command) {
927	case SIOCSIFADDR:
928#ifdef INET
929		if (ifa->ifa_addr->sa_family == AF_INET) {
930			/*
931			 * XXX
932			 * Since resetting hardware takes a very long time
933			 * and results in link renegotiation we only
934			 * initialize the hardware only when it is absolutely
935			 * required.
936			 */
937			ifp->if_flags |= IFF_UP;
938			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
939				IGB_CORE_LOCK(adapter);
940				igb_init_locked(adapter);
941				IGB_CORE_UNLOCK(adapter);
942			}
943			if (!(ifp->if_flags & IFF_NOARP))
944				arp_ifinit(ifp, ifa);
945		} else
946#endif
947			error = ether_ioctl(ifp, command, data);
948		break;
949	case SIOCSIFMTU:
950	    {
951		int max_frame_size;
952
953		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
954
955		IGB_CORE_LOCK(adapter);
956		max_frame_size = 9234;
957		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
958		    ETHER_CRC_LEN) {
959			IGB_CORE_UNLOCK(adapter);
960			error = EINVAL;
961			break;
962		}
963
964		ifp->if_mtu = ifr->ifr_mtu;
965		adapter->max_frame_size =
966		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
967		igb_init_locked(adapter);
968		IGB_CORE_UNLOCK(adapter);
969		break;
970	    }
971	case SIOCSIFFLAGS:
972		IOCTL_DEBUGOUT("ioctl rcv'd:\
973		    SIOCSIFFLAGS (Set Interface Flags)");
974		IGB_CORE_LOCK(adapter);
975		if (ifp->if_flags & IFF_UP) {
976			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
977				if ((ifp->if_flags ^ adapter->if_flags) &
978				    (IFF_PROMISC | IFF_ALLMULTI)) {
979					igb_disable_promisc(adapter);
980					igb_set_promisc(adapter);
981				}
982			} else
983				igb_init_locked(adapter);
984		} else
985			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
986				igb_stop(adapter);
987		adapter->if_flags = ifp->if_flags;
988		IGB_CORE_UNLOCK(adapter);
989		break;
990	case SIOCADDMULTI:
991	case SIOCDELMULTI:
992		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
993		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
994			IGB_CORE_LOCK(adapter);
995			igb_disable_intr(adapter);
996			igb_set_multi(adapter);
997#ifdef DEVICE_POLLING
998			if (!(ifp->if_capenable & IFCAP_POLLING))
999#endif
1000				igb_enable_intr(adapter);
1001			IGB_CORE_UNLOCK(adapter);
1002		}
1003		break;
1004	case SIOCSIFMEDIA:
1005		/* Check SOL/IDER usage */
1006		IGB_CORE_LOCK(adapter);
1007		if (e1000_check_reset_block(&adapter->hw)) {
1008			IGB_CORE_UNLOCK(adapter);
1009			device_printf(adapter->dev, "Media change is"
1010			    " blocked due to SOL/IDER session.\n");
1011			break;
1012		}
1013		IGB_CORE_UNLOCK(adapter);
1014	case SIOCGIFMEDIA:
1015		IOCTL_DEBUGOUT("ioctl rcv'd: \
1016		    SIOCxIFMEDIA (Get/Set Interface Media)");
1017		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1018		break;
1019	case SIOCSIFCAP:
1020	    {
1021		int mask, reinit;
1022
1023		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1024		reinit = 0;
1025		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1026#ifdef DEVICE_POLLING
1027		if (mask & IFCAP_POLLING) {
1028			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1029				error = ether_poll_register(igb_poll, ifp);
1030				if (error)
1031					return (error);
1032				IGB_CORE_LOCK(adapter);
1033				igb_disable_intr(adapter);
1034				ifp->if_capenable |= IFCAP_POLLING;
1035				IGB_CORE_UNLOCK(adapter);
1036			} else {
1037				error = ether_poll_deregister(ifp);
1038				/* Enable interrupt even in error case */
1039				IGB_CORE_LOCK(adapter);
1040				igb_enable_intr(adapter);
1041				ifp->if_capenable &= ~IFCAP_POLLING;
1042				IGB_CORE_UNLOCK(adapter);
1043			}
1044		}
1045#endif
1046		if (mask & IFCAP_HWCSUM) {
1047			ifp->if_capenable ^= IFCAP_HWCSUM;
1048			reinit = 1;
1049		}
1050		if (mask & IFCAP_TSO4) {
1051			ifp->if_capenable ^= IFCAP_TSO4;
1052			reinit = 1;
1053		}
1054		if (mask & IFCAP_VLAN_HWTAGGING) {
1055			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1056			reinit = 1;
1057		}
1058		if (mask & IFCAP_LRO) {
1059			ifp->if_capenable ^= IFCAP_LRO;
1060			reinit = 1;
1061		}
1062		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1063			igb_init(adapter);
1064		VLAN_CAPABILITIES(ifp);
1065		break;
1066	    }
1067
1068	default:
1069		error = ether_ioctl(ifp, command, data);
1070		break;
1071	}
1072
1073	return (error);
1074}
1075
1076
1077/*********************************************************************
1078 *  Init entry point
1079 *
1080 *  This routine is used in two ways. It is used by the stack as
1081 *  init entry point in network interface structure. It is also used
1082 *  by the driver as a hw/sw initialization routine to get to a
1083 *  consistent state.
1084 *
1085 *  return 0 on success, positive on failure
1086 **********************************************************************/
1087
1088static void
1089igb_init_locked(struct adapter *adapter)
1090{
1091	struct ifnet	*ifp = adapter->ifp;
1092	device_t	dev = adapter->dev;
1093
1094	INIT_DEBUGOUT("igb_init: begin");
1095
1096	IGB_CORE_LOCK_ASSERT(adapter);
1097
1098	igb_disable_intr(adapter);
1099	callout_stop(&adapter->timer);
1100
1101	/* Get the latest mac address, User can use a LAA */
1102        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1103              ETHER_ADDR_LEN);
1104
1105	/* Put the address into the Receive Address Array */
1106	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1107
1108	igb_reset(adapter);
1109	igb_update_link_status(adapter);
1110
1111	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1112
1113	/* Set hardware offload abilities */
1114	ifp->if_hwassist = 0;
1115	if (ifp->if_capenable & IFCAP_TXCSUM) {
1116		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1117#if __FreeBSD_version >= 800000
1118		if (adapter->hw.mac.type == e1000_82576)
1119			ifp->if_hwassist |= CSUM_SCTP;
1120#endif
1121	}
1122
1123	if (ifp->if_capenable & IFCAP_TSO4)
1124		ifp->if_hwassist |= CSUM_TSO;
1125
1126	/* Configure for OS presence */
1127	igb_init_manageability(adapter);
1128
1129	/* Prepare transmit descriptors and buffers */
1130	igb_setup_transmit_structures(adapter);
1131	igb_initialize_transmit_units(adapter);
1132
1133	/* Setup Multicast table */
1134	igb_set_multi(adapter);
1135
1136	/*
1137	** Figure out the desired mbuf pool
1138	** for doing jumbo/packetsplit
1139	*/
1140	if (ifp->if_mtu > ETHERMTU)
1141		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1142	else
1143		adapter->rx_mbuf_sz = MCLBYTES;
1144
1145	/* Prepare receive descriptors and buffers */
1146	if (igb_setup_receive_structures(adapter)) {
1147		device_printf(dev, "Could not setup receive structures\n");
1148		return;
1149	}
1150	igb_initialize_receive_units(adapter);
1151
1152	/* Don't lose promiscuous settings */
1153	igb_set_promisc(adapter);
1154
1155	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1156	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1157
1158	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1159	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1160
1161	if (adapter->msix > 1) /* Set up queue routing */
1162		igb_configure_queues(adapter);
1163
1164	/* Set up VLAN tag offload and filter */
1165	igb_setup_vlan_hw_support(adapter);
1166
1167	/* this clears any pending interrupts */
1168	E1000_READ_REG(&adapter->hw, E1000_ICR);
1169#ifdef DEVICE_POLLING
1170	/*
1171	 * Only enable interrupts if we are not polling, make sure
1172	 * they are off otherwise.
1173	 */
1174	if (ifp->if_capenable & IFCAP_POLLING)
1175		igb_disable_intr(adapter);
1176	else
1177#endif /* DEVICE_POLLING */
1178	{
1179	igb_enable_intr(adapter);
1180	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1181	}
1182
1183	/* Don't reset the phy next time init gets called */
1184	adapter->hw.phy.reset_disable = TRUE;
1185}
1186
1187static void
1188igb_init(void *arg)
1189{
1190	struct adapter *adapter = arg;
1191
1192	IGB_CORE_LOCK(adapter);
1193	igb_init_locked(adapter);
1194	IGB_CORE_UNLOCK(adapter);
1195}
1196
1197
1198static void
1199igb_handle_rxtx(void *context, int pending)
1200{
1201	struct igb_queue	*que = context;
1202	struct adapter		*adapter = que->adapter;
1203	struct tx_ring		*txr = adapter->tx_rings;
1204	struct ifnet		*ifp;
1205
1206	ifp = adapter->ifp;
1207
1208	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1209		if (igb_rxeof(que, adapter->rx_process_limit))
1210			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1211		IGB_TX_LOCK(txr);
1212		igb_txeof(txr);
1213
1214#if __FreeBSD_version >= 800000
1215		if (!drbr_empty(ifp, txr->br))
1216			igb_mq_start_locked(ifp, txr, NULL);
1217#else
1218		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1219			igb_start_locked(txr, ifp);
1220#endif
1221		IGB_TX_UNLOCK(txr);
1222	}
1223
1224	igb_enable_intr(adapter);
1225}
1226
1227static void
1228igb_handle_que(void *context, int pending)
1229{
1230	struct igb_queue *que = context;
1231	struct adapter *adapter = que->adapter;
1232	struct tx_ring *txr = que->txr;
1233	struct ifnet	*ifp = adapter->ifp;
1234	u32		loop = IGB_MAX_LOOP;
1235	bool		more;
1236
1237	/* RX first */
1238	do {
1239		more = igb_rxeof(que, -1);
1240	} while (loop-- && more);
1241
1242	if (IGB_TX_TRYLOCK(txr)) {
1243		loop = IGB_MAX_LOOP;
1244		do {
1245			more = igb_txeof(txr);
1246		} while (loop-- && more);
1247#if __FreeBSD_version >= 800000
1248		igb_mq_start_locked(ifp, txr, NULL);
1249#else
1250		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1251			igb_start_locked(txr, ifp);
1252#endif
1253		IGB_TX_UNLOCK(txr);
1254	}
1255
1256	/* Reenable this interrupt */
1257#ifdef DEVICE_POLLING
1258	if (!(ifp->if_capenable & IFCAP_POLLING))
1259#endif
1260	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1261}
1262
1263/* Deal with link in a sleepable context */
1264static void
1265igb_handle_link(void *context, int pending)
1266{
1267	struct adapter *adapter = context;
1268
1269	adapter->hw.mac.get_link_status = 1;
1270	igb_update_link_status(adapter);
1271}
1272
1273/*********************************************************************
1274 *
1275 *  MSI/Legacy Deferred
1276 *  Interrupt Service routine
1277 *
1278 *********************************************************************/
1279static int
1280igb_irq_fast(void *arg)
1281{
1282	struct adapter	*adapter = arg;
1283	uint32_t	reg_icr;
1284
1285
1286	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1287
1288	/* Hot eject?  */
1289	if (reg_icr == 0xffffffff)
1290		return FILTER_STRAY;
1291
1292	/* Definitely not our interrupt.  */
1293	if (reg_icr == 0x0)
1294		return FILTER_STRAY;
1295
1296	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1297		return FILTER_STRAY;
1298
1299	/*
1300	 * Mask interrupts until the taskqueue is finished running.  This is
1301	 * cheap, just assume that it is needed.  This also works around the
1302	 * MSI message reordering errata on certain systems.
1303	 */
1304	igb_disable_intr(adapter);
1305	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1306
1307	/* Link status change */
1308	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1309		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1310
1311	if (reg_icr & E1000_ICR_RXO)
1312		adapter->rx_overruns++;
1313	return FILTER_HANDLED;
1314}
1315
1316#ifdef DEVICE_POLLING
1317/*********************************************************************
1318 *
1319 *  Legacy polling routine : if using this code you MUST be sure that
1320 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1321 *
1322 *********************************************************************/
1323#if __FreeBSD_version >= 800000
1324#define POLL_RETURN_COUNT(a) (a)
1325static int
1326#else
1327#define POLL_RETURN_COUNT(a)
1328static void
1329#endif
1330igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1331{
1332	struct adapter		*adapter = ifp->if_softc;
1333	struct igb_queue	*que = adapter->queues;
1334	struct tx_ring		*txr = adapter->tx_rings;
1335	u32			reg_icr, rx_done = 0;
1336	u32			loop = IGB_MAX_LOOP;
1337	bool			more;
1338
1339	IGB_CORE_LOCK(adapter);
1340	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1341		IGB_CORE_UNLOCK(adapter);
1342		return POLL_RETURN_COUNT(rx_done);
1343	}
1344
1345	if (cmd == POLL_AND_CHECK_STATUS) {
1346		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1347		/* Link status change */
1348		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1349			taskqueue_enqueue(adapter->tq, &adapter->link_task);
1350
1351		if (reg_icr & E1000_ICR_RXO)
1352			adapter->rx_overruns++;
1353	}
1354	IGB_CORE_UNLOCK(adapter);
1355
1356	/* TODO: rx_count */
1357	rx_done = igb_rxeof(que, count) ? 1 : 0;
1358
1359	IGB_TX_LOCK(txr);
1360	do {
1361		more = igb_txeof(txr);
1362	} while (loop-- && more);
1363#if __FreeBSD_version >= 800000
1364	if (!drbr_empty(ifp, txr->br))
1365		igb_mq_start_locked(ifp, txr, NULL);
1366#else
1367	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1368		igb_start_locked(txr, ifp);
1369#endif
1370	IGB_TX_UNLOCK(txr);
1371	return POLL_RETURN_COUNT(rx_done);
1372}
1373#endif /* DEVICE_POLLING */
1374
1375/*********************************************************************
1376 *
1377 *  MSIX TX Interrupt Service routine
1378 *
1379 **********************************************************************/
1380static void
1381igb_msix_que(void *arg)
1382{
1383	struct igb_queue *que = arg;
1384	struct adapter *adapter = que->adapter;
1385	struct tx_ring *txr = que->txr;
1386	struct rx_ring *rxr = que->rxr;
1387	u32		newitr = 0;
1388	bool		more_tx, more_rx;
1389
1390	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1391	++que->irqs;
1392
1393	IGB_TX_LOCK(txr);
1394	more_tx = igb_txeof(txr);
1395	IGB_TX_UNLOCK(txr);
1396
1397	more_rx = igb_rxeof(que, adapter->rx_process_limit);
1398
1399	if (igb_enable_aim == FALSE)
1400		goto no_calc;
1401	/*
1402	** Do Adaptive Interrupt Moderation:
1403        **  - Write out last calculated setting
1404	**  - Calculate based on average size over
1405	**    the last interval.
1406	*/
1407        if (que->eitr_setting)
1408                E1000_WRITE_REG(&adapter->hw,
1409                    E1000_EITR(que->msix), que->eitr_setting);
1410
1411        que->eitr_setting = 0;
1412
1413        /* Idle, do nothing */
1414        if ((txr->bytes == 0) && (rxr->bytes == 0))
1415                goto no_calc;
1416
1417        /* Used half Default if sub-gig */
1418        if (adapter->link_speed != 1000)
1419                newitr = IGB_DEFAULT_ITR / 2;
1420        else {
1421		if ((txr->bytes) && (txr->packets))
1422                	newitr = txr->bytes/txr->packets;
1423		if ((rxr->bytes) && (rxr->packets))
1424			newitr = max(newitr,
1425			    (rxr->bytes / rxr->packets));
1426                newitr += 24; /* account for hardware frame, crc */
1427		/* set an upper boundary */
1428		newitr = min(newitr, 3000);
1429		/* Be nice to the mid range */
1430                if ((newitr > 300) && (newitr < 1200))
1431                        newitr = (newitr / 3);
1432                else
1433                        newitr = (newitr / 2);
1434        }
1435        newitr &= 0x7FFC;  /* Mask invalid bits */
1436        if (adapter->hw.mac.type == e1000_82575)
1437                newitr |= newitr << 16;
1438        else
1439                newitr |= 0x8000000;
1440
1441        /* save for next interrupt */
1442        que->eitr_setting = newitr;
1443
1444        /* Reset state */
1445        txr->bytes = 0;
1446        txr->packets = 0;
1447        rxr->bytes = 0;
1448        rxr->packets = 0;
1449
1450no_calc:
1451	/* Schedule a clean task if needed*/
1452	if (more_tx || more_rx)
1453		taskqueue_enqueue(que->tq, &que->que_task);
1454	else
1455		/* Reenable this interrupt */
1456		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1457	return;
1458}
1459
1460
1461/*********************************************************************
1462 *
1463 *  MSIX Link Interrupt Service routine
1464 *
1465 **********************************************************************/
1466
1467static void
1468igb_msix_link(void *arg)
1469{
1470	struct adapter	*adapter = arg;
1471	u32       	icr;
1472
1473	++adapter->link_irq;
1474	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1475	if (!(icr & E1000_ICR_LSC))
1476		goto spurious;
1477	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1478
1479spurious:
1480	/* Rearm */
1481	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1482	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1483	return;
1484}
1485
1486
1487/*********************************************************************
1488 *
1489 *  Media Ioctl callback
1490 *
1491 *  This routine is called whenever the user queries the status of
1492 *  the interface using ifconfig.
1493 *
1494 **********************************************************************/
1495static void
1496igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1497{
1498	struct adapter *adapter = ifp->if_softc;
1499	u_char fiber_type = IFM_1000_SX;
1500
1501	INIT_DEBUGOUT("igb_media_status: begin");
1502
1503	IGB_CORE_LOCK(adapter);
1504	igb_update_link_status(adapter);
1505
1506	ifmr->ifm_status = IFM_AVALID;
1507	ifmr->ifm_active = IFM_ETHER;
1508
1509	if (!adapter->link_active) {
1510		IGB_CORE_UNLOCK(adapter);
1511		return;
1512	}
1513
1514	ifmr->ifm_status |= IFM_ACTIVE;
1515
1516	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1517	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1518		ifmr->ifm_active |= fiber_type | IFM_FDX;
1519	else {
1520		switch (adapter->link_speed) {
1521		case 10:
1522			ifmr->ifm_active |= IFM_10_T;
1523			break;
1524		case 100:
1525			ifmr->ifm_active |= IFM_100_TX;
1526			break;
1527		case 1000:
1528			ifmr->ifm_active |= IFM_1000_T;
1529			break;
1530		}
1531		if (adapter->link_duplex == FULL_DUPLEX)
1532			ifmr->ifm_active |= IFM_FDX;
1533		else
1534			ifmr->ifm_active |= IFM_HDX;
1535	}
1536	IGB_CORE_UNLOCK(adapter);
1537}
1538
1539/*********************************************************************
1540 *
1541 *  Media Ioctl callback
1542 *
1543 *  This routine is called when the user changes speed/duplex using
1544 *  media/mediopt option with ifconfig.
1545 *
1546 **********************************************************************/
1547static int
1548igb_media_change(struct ifnet *ifp)
1549{
1550	struct adapter *adapter = ifp->if_softc;
1551	struct ifmedia  *ifm = &adapter->media;
1552
1553	INIT_DEBUGOUT("igb_media_change: begin");
1554
1555	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1556		return (EINVAL);
1557
1558	IGB_CORE_LOCK(adapter);
1559	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1560	case IFM_AUTO:
1561		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1562		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1563		break;
1564	case IFM_1000_LX:
1565	case IFM_1000_SX:
1566	case IFM_1000_T:
1567		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1568		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1569		break;
1570	case IFM_100_TX:
1571		adapter->hw.mac.autoneg = FALSE;
1572		adapter->hw.phy.autoneg_advertised = 0;
1573		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1574			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1575		else
1576			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1577		break;
1578	case IFM_10_T:
1579		adapter->hw.mac.autoneg = FALSE;
1580		adapter->hw.phy.autoneg_advertised = 0;
1581		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1582			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1583		else
1584			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1585		break;
1586	default:
1587		device_printf(adapter->dev, "Unsupported media type\n");
1588	}
1589
1590	/* As the speed/duplex settings my have changed we need to
1591	 * reset the PHY.
1592	 */
1593	adapter->hw.phy.reset_disable = FALSE;
1594
1595	igb_init_locked(adapter);
1596	IGB_CORE_UNLOCK(adapter);
1597
1598	return (0);
1599}
1600
1601
1602/*********************************************************************
1603 *
1604 *  This routine maps the mbufs to Advanced TX descriptors.
1605 *  used by the 82575 adapter.
1606 *
1607 **********************************************************************/
1608
1609static int
1610igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1611{
1612	struct adapter		*adapter = txr->adapter;
1613	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1614	bus_dmamap_t		map;
1615	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1616	union e1000_adv_tx_desc	*txd = NULL;
1617	struct mbuf		*m_head;
1618	u32			olinfo_status = 0, cmd_type_len = 0;
1619	int			nsegs, i, j, error, first, last = 0;
1620	u32			hdrlen = 0;
1621
1622	m_head = *m_headp;
1623
1624
1625	/* Set basic descriptor constants */
1626	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1627	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1628	if (m_head->m_flags & M_VLANTAG)
1629		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1630
1631        /*
1632         * Force a cleanup if number of TX descriptors
1633         * available hits the threshold
1634         */
1635	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1636		igb_txeof(txr);
1637		/* Now do we at least have a minimal? */
1638		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1639			txr->no_desc_avail++;
1640			return (ENOBUFS);
1641		}
1642	}
1643
1644	/*
1645         * Map the packet for DMA.
1646	 *
1647	 * Capture the first descriptor index,
1648	 * this descriptor will have the index
1649	 * of the EOP which is the only one that
1650	 * now gets a DONE bit writeback.
1651	 */
1652	first = txr->next_avail_desc;
1653	tx_buffer = &txr->tx_buffers[first];
1654	tx_buffer_mapped = tx_buffer;
1655	map = tx_buffer->map;
1656
1657	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1658	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1659
1660	if (error == EFBIG) {
1661		struct mbuf *m;
1662
1663		m = m_defrag(*m_headp, M_DONTWAIT);
1664		if (m == NULL) {
1665			adapter->mbuf_defrag_failed++;
1666			m_freem(*m_headp);
1667			*m_headp = NULL;
1668			return (ENOBUFS);
1669		}
1670		*m_headp = m;
1671
1672		/* Try it again */
1673		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1674		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1675
1676		if (error == ENOMEM) {
1677			adapter->no_tx_dma_setup++;
1678			return (error);
1679		} else if (error != 0) {
1680			adapter->no_tx_dma_setup++;
1681			m_freem(*m_headp);
1682			*m_headp = NULL;
1683			return (error);
1684		}
1685	} else if (error == ENOMEM) {
1686		adapter->no_tx_dma_setup++;
1687		return (error);
1688	} else if (error != 0) {
1689		adapter->no_tx_dma_setup++;
1690		m_freem(*m_headp);
1691		*m_headp = NULL;
1692		return (error);
1693	}
1694
1695	/* Check again to be sure we have enough descriptors */
1696        if (nsegs > (txr->tx_avail - 2)) {
1697                txr->no_desc_avail++;
1698		bus_dmamap_unload(txr->txtag, map);
1699		return (ENOBUFS);
1700        }
1701	m_head = *m_headp;
1702
1703        /*
1704         * Set up the context descriptor:
1705         * used when any hardware offload is done.
1706	 * This includes CSUM, VLAN, and TSO. It
1707	 * will use the first descriptor.
1708         */
1709        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1710		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1711			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1712			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1713			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1714		} else
1715			return (ENXIO);
1716	} else if (igb_tx_ctx_setup(txr, m_head))
1717		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1718
1719	/* Calculate payload length */
1720	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1721	    << E1000_ADVTXD_PAYLEN_SHIFT);
1722
1723	/* 82575 needs the queue index added */
1724	if (adapter->hw.mac.type == e1000_82575)
1725		olinfo_status |= txr->me << 4;
1726
1727	/* Set up our transmit descriptors */
1728	i = txr->next_avail_desc;
1729	for (j = 0; j < nsegs; j++) {
1730		bus_size_t seg_len;
1731		bus_addr_t seg_addr;
1732
1733		tx_buffer = &txr->tx_buffers[i];
1734		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1735		seg_addr = segs[j].ds_addr;
1736		seg_len  = segs[j].ds_len;
1737
1738		txd->read.buffer_addr = htole64(seg_addr);
1739		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1740		txd->read.olinfo_status = htole32(olinfo_status);
1741		last = i;
1742		if (++i == adapter->num_tx_desc)
1743			i = 0;
1744		tx_buffer->m_head = NULL;
1745		tx_buffer->next_eop = -1;
1746	}
1747
1748	txr->next_avail_desc = i;
1749	txr->tx_avail -= nsegs;
1750
1751        tx_buffer->m_head = m_head;
1752	tx_buffer_mapped->map = tx_buffer->map;
1753	tx_buffer->map = map;
1754        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1755
1756        /*
1757         * Last Descriptor of Packet
1758	 * needs End Of Packet (EOP)
1759	 * and Report Status (RS)
1760         */
1761        txd->read.cmd_type_len |=
1762	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1763	/*
1764	 * Keep track in the first buffer which
1765	 * descriptor will be written back
1766	 */
1767	tx_buffer = &txr->tx_buffers[first];
1768	tx_buffer->next_eop = last;
1769	txr->watchdog_time = ticks;
1770
1771	/*
1772	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1773	 * that this frame is available to transmit.
1774	 */
1775	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1776	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1777	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1778	++txr->tx_packets;
1779
1780	return (0);
1781
1782}
1783
1784static void
1785igb_set_promisc(struct adapter *adapter)
1786{
1787	struct ifnet	*ifp = adapter->ifp;
1788	uint32_t	reg_rctl;
1789
1790	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1791
1792	if (ifp->if_flags & IFF_PROMISC) {
1793		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1794		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1795	} else if (ifp->if_flags & IFF_ALLMULTI) {
1796		reg_rctl |= E1000_RCTL_MPE;
1797		reg_rctl &= ~E1000_RCTL_UPE;
1798		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1799	}
1800}
1801
1802static void
1803igb_disable_promisc(struct adapter *adapter)
1804{
1805	uint32_t	reg_rctl;
1806
1807	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1808
1809	reg_rctl &=  (~E1000_RCTL_UPE);
1810	reg_rctl &=  (~E1000_RCTL_MPE);
1811	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1812}
1813
1814
1815/*********************************************************************
1816 *  Multicast Update
1817 *
1818 *  This routine is called whenever multicast address list is updated.
1819 *
1820 **********************************************************************/
1821
1822static void
1823igb_set_multi(struct adapter *adapter)
1824{
1825	struct ifnet	*ifp = adapter->ifp;
1826	struct ifmultiaddr *ifma;
1827	u32 reg_rctl = 0;
1828	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1829
1830	int mcnt = 0;
1831
1832	IOCTL_DEBUGOUT("igb_set_multi: begin");
1833
1834#if __FreeBSD_version < 800000
1835	IF_ADDR_LOCK(ifp);
1836#else
1837	if_maddr_rlock(ifp);
1838#endif
1839	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1840		if (ifma->ifma_addr->sa_family != AF_LINK)
1841			continue;
1842
1843		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1844			break;
1845
1846		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1847		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1848		mcnt++;
1849	}
1850#if __FreeBSD_version < 800000
1851	IF_ADDR_UNLOCK(ifp);
1852#else
1853	if_maddr_runlock(ifp);
1854#endif
1855
1856	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1857		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1858		reg_rctl |= E1000_RCTL_MPE;
1859		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1860	} else
1861		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1862}
1863
1864
1865/*********************************************************************
1866 *  Timer routine:
1867 *  	This routine checks for link status,
1868 *	updates statistics, and does the watchdog.
1869 *
1870 **********************************************************************/
1871
1872static void
1873igb_local_timer(void *arg)
1874{
1875	struct adapter		*adapter = arg;
1876	struct ifnet		*ifp = adapter->ifp;
1877	device_t		dev = adapter->dev;
1878	struct tx_ring		*txr = adapter->tx_rings;
1879
1880
1881	IGB_CORE_LOCK_ASSERT(adapter);
1882
1883	igb_update_link_status(adapter);
1884	igb_update_stats_counters(adapter);
1885
1886	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1887		igb_print_hw_stats(adapter);
1888
1889        /*
1890        ** Watchdog: check for time since any descriptor was cleaned
1891        */
1892	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1893		if (txr->watchdog_check == FALSE)
1894			continue;
1895		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1896			goto timeout;
1897	}
1898
1899	/* Trigger an RX interrupt on all queues */
1900#ifdef DEVICE_POLLING
1901	if (!(ifp->if_capenable & IFCAP_POLLING))
1902#endif
1903	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1904	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1905	return;
1906
1907timeout:
1908	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1909	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1910            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1911            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1912	device_printf(dev,"TX(%d) desc avail = %d,"
1913            "Next TX to Clean = %d\n",
1914            txr->me, txr->tx_avail, txr->next_to_clean);
1915	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1916	adapter->watchdog_events++;
1917	igb_init_locked(adapter);
1918}
1919
1920static void
1921igb_update_link_status(struct adapter *adapter)
1922{
1923	struct e1000_hw *hw = &adapter->hw;
1924	struct ifnet *ifp = adapter->ifp;
1925	device_t dev = adapter->dev;
1926	struct tx_ring *txr = adapter->tx_rings;
1927	u32 link_check = 0;
1928
1929	/* Get the cached link value or read for real */
1930        switch (hw->phy.media_type) {
1931        case e1000_media_type_copper:
1932                if (hw->mac.get_link_status) {
1933			/* Do the work to read phy */
1934                        e1000_check_for_link(hw);
1935                        link_check = !hw->mac.get_link_status;
1936                } else
1937                        link_check = TRUE;
1938                break;
1939        case e1000_media_type_fiber:
1940                e1000_check_for_link(hw);
1941                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1942                                 E1000_STATUS_LU);
1943                break;
1944        case e1000_media_type_internal_serdes:
1945                e1000_check_for_link(hw);
1946                link_check = adapter->hw.mac.serdes_has_link;
1947                break;
1948        default:
1949        case e1000_media_type_unknown:
1950                break;
1951        }
1952
1953	/* Now we check if a transition has happened */
1954	if (link_check && (adapter->link_active == 0)) {
1955		e1000_get_speed_and_duplex(&adapter->hw,
1956		    &adapter->link_speed, &adapter->link_duplex);
1957		if (bootverbose)
1958			device_printf(dev, "Link is up %d Mbps %s\n",
1959			    adapter->link_speed,
1960			    ((adapter->link_duplex == FULL_DUPLEX) ?
1961			    "Full Duplex" : "Half Duplex"));
1962		adapter->link_active = 1;
1963		ifp->if_baudrate = adapter->link_speed * 1000000;
1964		/* This can sleep */
1965		if_link_state_change(ifp, LINK_STATE_UP);
1966	} else if (!link_check && (adapter->link_active == 1)) {
1967		ifp->if_baudrate = adapter->link_speed = 0;
1968		adapter->link_duplex = 0;
1969		if (bootverbose)
1970			device_printf(dev, "Link is Down\n");
1971		adapter->link_active = 0;
1972		/* This can sleep */
1973		if_link_state_change(ifp, LINK_STATE_DOWN);
1974		/* Turn off watchdogs */
1975		for (int i = 0; i < adapter->num_queues; i++, txr++)
1976			txr->watchdog_check = FALSE;
1977	}
1978}
1979
1980/*********************************************************************
1981 *
1982 *  This routine disables all traffic on the adapter by issuing a
1983 *  global reset on the MAC and deallocates TX/RX buffers.
1984 *
1985 **********************************************************************/
1986
1987static void
1988igb_stop(void *arg)
1989{
1990	struct adapter	*adapter = arg;
1991	struct ifnet	*ifp = adapter->ifp;
1992	struct tx_ring *txr = adapter->tx_rings;
1993
1994	IGB_CORE_LOCK_ASSERT(adapter);
1995
1996	INIT_DEBUGOUT("igb_stop: begin");
1997
1998	igb_disable_intr(adapter);
1999
2000	callout_stop(&adapter->timer);
2001
2002	/* Tell the stack that the interface is no longer active */
2003	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2004
2005	/* Unarm watchdog timer. */
2006	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2007		IGB_TX_LOCK(txr);
2008		txr->watchdog_check = FALSE;
2009		IGB_TX_UNLOCK(txr);
2010	}
2011
2012	e1000_reset_hw(&adapter->hw);
2013	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2014
2015	e1000_led_off(&adapter->hw);
2016	e1000_cleanup_led(&adapter->hw);
2017}
2018
2019
2020/*********************************************************************
2021 *
2022 *  Determine hardware revision.
2023 *
2024 **********************************************************************/
2025static void
2026igb_identify_hardware(struct adapter *adapter)
2027{
2028	device_t dev = adapter->dev;
2029
2030	/* Make sure our PCI config space has the necessary stuff set */
2031	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2032	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2033	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2034		device_printf(dev, "Memory Access and/or Bus Master bits "
2035		    "were not set!\n");
2036		adapter->hw.bus.pci_cmd_word |=
2037		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2038		pci_write_config(dev, PCIR_COMMAND,
2039		    adapter->hw.bus.pci_cmd_word, 2);
2040	}
2041
2042	/* Save off the information about this board */
2043	adapter->hw.vendor_id = pci_get_vendor(dev);
2044	adapter->hw.device_id = pci_get_device(dev);
2045	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2046	adapter->hw.subsystem_vendor_id =
2047	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2048	adapter->hw.subsystem_device_id =
2049	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2050
2051	/* Do Shared Code Init and Setup */
2052	if (e1000_set_mac_type(&adapter->hw)) {
2053		device_printf(dev, "Setup init failure\n");
2054		return;
2055	}
2056}
2057
2058static int
2059igb_allocate_pci_resources(struct adapter *adapter)
2060{
2061	device_t	dev = adapter->dev;
2062	int		rid;
2063
2064	rid = PCIR_BAR(0);
2065	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2066	    &rid, RF_ACTIVE);
2067	if (adapter->pci_mem == NULL) {
2068		device_printf(dev, "Unable to allocate bus resource: memory\n");
2069		return (ENXIO);
2070	}
2071	adapter->osdep.mem_bus_space_tag =
2072	    rman_get_bustag(adapter->pci_mem);
2073	adapter->osdep.mem_bus_space_handle =
2074	    rman_get_bushandle(adapter->pci_mem);
2075	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2076
2077	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2078
2079	/* This will setup either MSI/X or MSI */
2080	adapter->msix = igb_setup_msix(adapter);
2081	adapter->hw.back = &adapter->osdep;
2082
2083	return (0);
2084}
2085
2086/*********************************************************************
2087 *
2088 *  Setup the Legacy or MSI Interrupt handler
2089 *
2090 **********************************************************************/
2091static int
2092igb_allocate_legacy(struct adapter *adapter)
2093{
2094	device_t		dev = adapter->dev;
2095	struct igb_queue	*que = adapter->queues;
2096	int			error, rid = 0;
2097
2098	/* Turn off all interrupts */
2099	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2100
2101	/* MSI RID is 1 */
2102	if (adapter->msix == 1)
2103		rid = 1;
2104
2105	/* We allocate a single interrupt resource */
2106	adapter->res = bus_alloc_resource_any(dev,
2107	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2108	if (adapter->res == NULL) {
2109		device_printf(dev, "Unable to allocate bus resource: "
2110		    "interrupt\n");
2111		return (ENXIO);
2112	}
2113
2114	/*
2115	 * Try allocating a fast interrupt and the associated deferred
2116	 * processing contexts.
2117	 */
2118	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2119	/* Make tasklet for deferred link handling */
2120	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2121	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2122	    taskqueue_thread_enqueue, &adapter->tq);
2123	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2124	    device_get_nameunit(adapter->dev));
2125	if ((error = bus_setup_intr(dev, adapter->res,
2126	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2127	    adapter, &adapter->tag)) != 0) {
2128		device_printf(dev, "Failed to register fast interrupt "
2129			    "handler: %d\n", error);
2130		taskqueue_free(adapter->tq);
2131		adapter->tq = NULL;
2132		return (error);
2133	}
2134
2135	return (0);
2136}
2137
2138
2139/*********************************************************************
2140 *
2141 *  Setup the MSIX Queue Interrupt handlers:
2142 *
2143 **********************************************************************/
2144static int
2145igb_allocate_msix(struct adapter *adapter)
2146{
2147	device_t		dev = adapter->dev;
2148	struct igb_queue	*que = adapter->queues;
2149	int			error, rid, vector = 0;
2150
2151
2152	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2153		rid = vector +1;
2154		que->res = bus_alloc_resource_any(dev,
2155		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2156		if (que->res == NULL) {
2157			device_printf(dev,
2158			    "Unable to allocate bus resource: "
2159			    "MSIX Queue Interrupt\n");
2160			return (ENXIO);
2161		}
2162		error = bus_setup_intr(dev, que->res,
2163	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2164		    igb_msix_que, que, &que->tag);
2165		if (error) {
2166			que->res = NULL;
2167			device_printf(dev, "Failed to register Queue handler");
2168			return (error);
2169		}
2170		que->msix = vector;
2171		if (adapter->hw.mac.type == e1000_82575)
2172			que->eims = E1000_EICR_TX_QUEUE0 << i;
2173		else
2174			que->eims = 1 << vector;
2175		/*
2176		** Bind the msix vector, and thus the
2177		** rings to the corresponding cpu.
2178		*/
2179		if (adapter->num_queues > 1)
2180			bus_bind_intr(dev, que->res, i);
2181		/* Make tasklet for deferred handling */
2182		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2183		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2184		    taskqueue_thread_enqueue, &que->tq);
2185		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2186		    device_get_nameunit(adapter->dev));
2187	}
2188
2189	/* And Link */
2190	rid = vector + 1;
2191	adapter->res = bus_alloc_resource_any(dev,
2192	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2193	if (adapter->res == NULL) {
2194		device_printf(dev,
2195		    "Unable to allocate bus resource: "
2196		    "MSIX Link Interrupt\n");
2197		return (ENXIO);
2198	}
2199	if ((error = bus_setup_intr(dev, adapter->res,
2200	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2201	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2202		device_printf(dev, "Failed to register Link handler");
2203		return (error);
2204	}
2205	adapter->linkvec = vector;
2206
2207	/* Make tasklet for deferred handling */
2208	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2209	adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2210	    taskqueue_thread_enqueue, &adapter->tq);
2211	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2212	    device_get_nameunit(adapter->dev));
2213
2214	return (0);
2215}
2216
2217
2218static void
2219igb_configure_queues(struct adapter *adapter)
2220{
2221	struct	e1000_hw	*hw = &adapter->hw;
2222	struct	igb_queue	*que;
2223	u32			tmp, ivar = 0;
2224	u32			newitr = IGB_DEFAULT_ITR;
2225
2226	/* First turn on RSS capability */
2227	if (adapter->hw.mac.type > e1000_82575)
2228		E1000_WRITE_REG(hw, E1000_GPIE,
2229		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2230		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2231
2232	/* Turn on MSIX */
2233	switch (adapter->hw.mac.type) {
2234	case e1000_82580:
2235		/* RX entries */
2236		for (int i = 0; i < adapter->num_queues; i++) {
2237			u32 index = i >> 1;
2238			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2239			que = &adapter->queues[i];
2240			if (i & 1) {
2241				ivar &= 0xFF00FFFF;
2242				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2243			} else {
2244				ivar &= 0xFFFFFF00;
2245				ivar |= que->msix | E1000_IVAR_VALID;
2246			}
2247			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2248		}
2249		/* TX entries */
2250		for (int i = 0; i < adapter->num_queues; i++) {
2251			u32 index = i >> 1;
2252			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2253			que = &adapter->queues[i];
2254			if (i & 1) {
2255				ivar &= 0x00FFFFFF;
2256				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2257			} else {
2258				ivar &= 0xFFFF00FF;
2259				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2260			}
2261			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2262			adapter->eims_mask |= que->eims;
2263		}
2264
2265		/* And for the link interrupt */
2266		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2267		adapter->link_mask = 1 << adapter->linkvec;
2268		adapter->eims_mask |= adapter->link_mask;
2269		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2270		break;
2271	case e1000_82576:
2272		/* RX entries */
2273		for (int i = 0; i < adapter->num_queues; i++) {
2274			u32 index = i & 0x7; /* Each IVAR has two entries */
2275			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2276			que = &adapter->queues[i];
2277			if (i < 8) {
2278				ivar &= 0xFFFFFF00;
2279				ivar |= que->msix | E1000_IVAR_VALID;
2280			} else {
2281				ivar &= 0xFF00FFFF;
2282				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2283			}
2284			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2285			adapter->eims_mask |= que->eims;
2286		}
2287		/* TX entries */
2288		for (int i = 0; i < adapter->num_queues; i++) {
2289			u32 index = i & 0x7; /* Each IVAR has two entries */
2290			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2291			que = &adapter->queues[i];
2292			if (i < 8) {
2293				ivar &= 0xFFFF00FF;
2294				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2295			} else {
2296				ivar &= 0x00FFFFFF;
2297				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2298			}
2299			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2300			adapter->eims_mask |= que->eims;
2301		}
2302
2303		/* And for the link interrupt */
2304		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2305		adapter->link_mask = 1 << adapter->linkvec;
2306		adapter->eims_mask |= adapter->link_mask;
2307		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2308		break;
2309
2310	case e1000_82575:
2311                /* enable MSI-X support*/
2312		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2313                tmp |= E1000_CTRL_EXT_PBA_CLR;
2314                /* Auto-Mask interrupts upon ICR read. */
2315                tmp |= E1000_CTRL_EXT_EIAME;
2316                tmp |= E1000_CTRL_EXT_IRCA;
2317                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2318
2319		/* Queues */
2320		for (int i = 0; i < adapter->num_queues; i++) {
2321			que = &adapter->queues[i];
2322			tmp = E1000_EICR_RX_QUEUE0 << i;
2323			tmp |= E1000_EICR_TX_QUEUE0 << i;
2324			que->eims = tmp;
2325			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2326			    i, que->eims);
2327			adapter->eims_mask |= que->eims;
2328		}
2329
2330		/* Link */
2331		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2332		    E1000_EIMS_OTHER);
2333		adapter->link_mask |= E1000_EIMS_OTHER;
2334		adapter->eims_mask |= adapter->link_mask;
2335	default:
2336		break;
2337	}
2338
2339	/* Set the starting interrupt rate */
2340        if (hw->mac.type == e1000_82575)
2341                newitr |= newitr << 16;
2342        else
2343                newitr |= 0x8000000;
2344
2345	for (int i = 0; i < adapter->num_queues; i++) {
2346		que = &adapter->queues[i];
2347		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2348	}
2349
2350	return;
2351}
2352
2353
2354static void
2355igb_free_pci_resources(struct adapter *adapter)
2356{
2357	struct		igb_queue *que = adapter->queues;
2358	device_t	dev = adapter->dev;
2359	int		rid;
2360
2361	/*
2362	** There is a slight possibility of a failure mode
2363	** in attach that will result in entering this function
2364	** before interrupt resources have been initialized, and
2365	** in that case we do not want to execute the loops below
2366	** We can detect this reliably by the state of the adapter
2367	** res pointer.
2368	*/
2369	if (adapter->res == NULL)
2370		goto mem;
2371
2372	/*
2373	 * First release all the interrupt resources:
2374	 */
2375	for (int i = 0; i < adapter->num_queues; i++, que++) {
2376		rid = que->msix + 1;
2377		if (que->tag != NULL) {
2378			bus_teardown_intr(dev, que->res, que->tag);
2379			que->tag = NULL;
2380		}
2381		if (que->res != NULL)
2382			bus_release_resource(dev,
2383			    SYS_RES_IRQ, rid, que->res);
2384	}
2385
2386	/* Clean the Legacy or Link interrupt last */
2387	if (adapter->linkvec) /* we are doing MSIX */
2388		rid = adapter->linkvec + 1;
2389	else
2390		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2391
2392	if (adapter->tag != NULL) {
2393		bus_teardown_intr(dev, adapter->res, adapter->tag);
2394		adapter->tag = NULL;
2395	}
2396	if (adapter->res != NULL)
2397		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2398
2399mem:
2400	if (adapter->msix)
2401		pci_release_msi(dev);
2402
2403	if (adapter->msix_mem != NULL)
2404		bus_release_resource(dev, SYS_RES_MEMORY,
2405		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2406
2407	if (adapter->pci_mem != NULL)
2408		bus_release_resource(dev, SYS_RES_MEMORY,
2409		    PCIR_BAR(0), adapter->pci_mem);
2410
2411}
2412
2413/*
2414 * Setup Either MSI/X or MSI
2415 */
2416static int
2417igb_setup_msix(struct adapter *adapter)
2418{
2419	device_t dev = adapter->dev;
2420	int rid, want, queues, msgs;
2421
2422	/* tuneable override */
2423	if (igb_enable_msix == 0)
2424		goto msi;
2425
2426	/* First try MSI/X */
2427	rid = PCIR_BAR(IGB_MSIX_BAR);
2428	adapter->msix_mem = bus_alloc_resource_any(dev,
2429	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2430       	if (!adapter->msix_mem) {
2431		/* May not be enabled */
2432		device_printf(adapter->dev,
2433		    "Unable to map MSIX table \n");
2434		goto msi;
2435	}
2436
2437	msgs = pci_msix_count(dev);
2438	if (msgs == 0) { /* system has msix disabled */
2439		bus_release_resource(dev, SYS_RES_MEMORY,
2440		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2441		adapter->msix_mem = NULL;
2442		goto msi;
2443	}
2444
2445	/* Figure out a reasonable auto config value */
2446	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2447
2448	/* Manual override */
2449	if (igb_num_queues != 0)
2450		queues = igb_num_queues;
2451
2452	/* Can have max of 4 queues on 82575 */
2453	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2454		queues = 4;
2455
2456	/*
2457	** One vector (RX/TX pair) per queue
2458	** plus an additional for Link interrupt
2459	*/
2460	want = queues + 1;
2461	if (msgs >= want)
2462		msgs = want;
2463	else {
2464               	device_printf(adapter->dev,
2465		    "MSIX Configuration Problem, "
2466		    "%d vectors configured, but %d queues wanted!\n",
2467		    msgs, want);
2468		return (ENXIO);
2469	}
2470	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2471               	device_printf(adapter->dev,
2472		    "Using MSIX interrupts with %d vectors\n", msgs);
2473		adapter->num_queues = queues;
2474		return (msgs);
2475	}
2476msi:
2477       	msgs = pci_msi_count(dev);
2478       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2479               	device_printf(adapter->dev,"Using MSI interrupt\n");
2480	return (msgs);
2481}
2482
2483/*********************************************************************
2484 *
2485 *  Set up an fresh starting state
2486 *
2487 **********************************************************************/
2488static void
2489igb_reset(struct adapter *adapter)
2490{
2491	device_t	dev = adapter->dev;
2492	struct e1000_hw *hw = &adapter->hw;
2493	struct e1000_fc_info *fc = &hw->fc;
2494	struct ifnet	*ifp = adapter->ifp;
2495	u32		pba = 0;
2496	u16		hwm;
2497
2498	INIT_DEBUGOUT("igb_reset: begin");
2499
2500	/* Let the firmware know the OS is in control */
2501	igb_get_hw_control(adapter);
2502
2503	/*
2504	 * Packet Buffer Allocation (PBA)
2505	 * Writing PBA sets the receive portion of the buffer
2506	 * the remainder is used for the transmit buffer.
2507	 */
2508	switch (hw->mac.type) {
2509	case e1000_82575:
2510		pba = E1000_PBA_32K;
2511		break;
2512	case e1000_82576:
2513		pba = E1000_PBA_64K;
2514		break;
2515	case e1000_82580:
2516		pba = E1000_PBA_35K;
2517	default:
2518		break;
2519	}
2520
2521	/* Special needs in case of Jumbo frames */
2522	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2523		u32 tx_space, min_tx, min_rx;
2524		pba = E1000_READ_REG(hw, E1000_PBA);
2525		tx_space = pba >> 16;
2526		pba &= 0xffff;
2527		min_tx = (adapter->max_frame_size +
2528		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2529		min_tx = roundup2(min_tx, 1024);
2530		min_tx >>= 10;
2531                min_rx = adapter->max_frame_size;
2532                min_rx = roundup2(min_rx, 1024);
2533                min_rx >>= 10;
2534		if (tx_space < min_tx &&
2535		    ((min_tx - tx_space) < pba)) {
2536			pba = pba - (min_tx - tx_space);
2537			/*
2538                         * if short on rx space, rx wins
2539                         * and must trump tx adjustment
2540			 */
2541                        if (pba < min_rx)
2542                                pba = min_rx;
2543		}
2544		E1000_WRITE_REG(hw, E1000_PBA, pba);
2545	}
2546
2547	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2548
2549	/*
2550	 * These parameters control the automatic generation (Tx) and
2551	 * response (Rx) to Ethernet PAUSE frames.
2552	 * - High water mark should allow for at least two frames to be
2553	 *   received after sending an XOFF.
2554	 * - Low water mark works best when it is very near the high water mark.
2555	 *   This allows the receiver to restart by sending XON when it has
2556	 *   drained a bit.
2557	 */
2558	hwm = min(((pba << 10) * 9 / 10),
2559	    ((pba << 10) - 2 * adapter->max_frame_size));
2560
2561	if (hw->mac.type < e1000_82576) {
2562		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2563		fc->low_water = fc->high_water - 8;
2564	} else {
2565		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2566		fc->low_water = fc->high_water - 16;
2567	}
2568
2569	fc->pause_time = IGB_FC_PAUSE_TIME;
2570	fc->send_xon = TRUE;
2571
2572	/* Set Flow control, use the tunable location if sane */
2573	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2574		fc->requested_mode = igb_fc_setting;
2575	else
2576		fc->requested_mode = e1000_fc_none;
2577
2578	fc->current_mode = fc->requested_mode;
2579
2580	/* Issue a global reset */
2581	e1000_reset_hw(hw);
2582	E1000_WRITE_REG(hw, E1000_WUC, 0);
2583
2584	if (e1000_init_hw(hw) < 0)
2585		device_printf(dev, "Hardware Initialization Failed\n");
2586
2587	if (hw->mac.type == e1000_82580) {
2588		u32 reg;
2589
2590		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2591		/*
2592		 * 0x80000000 - enable DMA COAL
2593		 * 0x10000000 - use L0s as low power
2594		 * 0x20000000 - use L1 as low power
2595		 * X << 16 - exit dma coal when rx data exceeds X kB
2596		 * Y - upper limit to stay in dma coal in units of 32usecs
2597		 */
2598		E1000_WRITE_REG(hw, E1000_DMACR,
2599		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2600
2601		/* set hwm to PBA -  2 * max frame size */
2602		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2603		/*
2604		 * This sets the time to wait before requesting transition to
2605		 * low power state to number of usecs needed to receive 1 512
2606		 * byte frame at gigabit line rate
2607		 */
2608		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2609
2610		/* free space in tx packet buffer to wake from DMA coal */
2611		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2612		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2613
2614		/* make low power state decision controlled by DMA coal */
2615		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2616		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2617		    reg | E1000_PCIEMISC_LX_DECISION);
2618	}
2619
2620	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2621	e1000_get_phy_info(hw);
2622	e1000_check_for_link(hw);
2623	return;
2624}
2625
2626/*********************************************************************
2627 *
2628 *  Setup networking device structure and register an interface.
2629 *
2630 **********************************************************************/
2631static void
2632igb_setup_interface(device_t dev, struct adapter *adapter)
2633{
2634	struct ifnet   *ifp;
2635
2636	INIT_DEBUGOUT("igb_setup_interface: begin");
2637
2638	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2639	if (ifp == NULL)
2640		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2641	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2642	ifp->if_mtu = ETHERMTU;
2643	ifp->if_init =  igb_init;
2644	ifp->if_softc = adapter;
2645	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2646	ifp->if_ioctl = igb_ioctl;
2647	ifp->if_start = igb_start;
2648#if __FreeBSD_version >= 800000
2649	ifp->if_transmit = igb_mq_start;
2650	ifp->if_qflush = igb_qflush;
2651#endif
2652	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2653	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2654	IFQ_SET_READY(&ifp->if_snd);
2655
2656	ether_ifattach(ifp, adapter->hw.mac.addr);
2657
2658	ifp->if_capabilities = ifp->if_capenable = 0;
2659
2660	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2661	ifp->if_capabilities |= IFCAP_TSO4;
2662	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2663	if (igb_header_split)
2664		ifp->if_capabilities |= IFCAP_LRO;
2665
2666	ifp->if_capenable = ifp->if_capabilities;
2667#ifdef DEVICE_POLLING
2668	ifp->if_capabilities |= IFCAP_POLLING;
2669#endif
2670
2671	/*
2672	 * Tell the upper layer(s) we support long frames.
2673	 */
2674	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2675	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2676	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2677
2678	/*
2679	 * Specify the media types supported by this adapter and register
2680	 * callbacks to update media and link information
2681	 */
2682	ifmedia_init(&adapter->media, IFM_IMASK,
2683	    igb_media_change, igb_media_status);
2684	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2685	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2686		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2687			    0, NULL);
2688		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2689	} else {
2690		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2691		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2692			    0, NULL);
2693		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2694			    0, NULL);
2695		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2696			    0, NULL);
2697		if (adapter->hw.phy.type != e1000_phy_ife) {
2698			ifmedia_add(&adapter->media,
2699				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2700			ifmedia_add(&adapter->media,
2701				IFM_ETHER | IFM_1000_T, 0, NULL);
2702		}
2703	}
2704	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2705	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2706}
2707
2708
2709/*
2710 * Manage DMA'able memory.
2711 */
2712static void
2713igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2714{
2715	if (error)
2716		return;
2717	*(bus_addr_t *) arg = segs[0].ds_addr;
2718}
2719
2720static int
2721igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2722        struct igb_dma_alloc *dma, int mapflags)
2723{
2724	int error;
2725
2726	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2727				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2728				BUS_SPACE_MAXADDR,	/* lowaddr */
2729				BUS_SPACE_MAXADDR,	/* highaddr */
2730				NULL, NULL,		/* filter, filterarg */
2731				size,			/* maxsize */
2732				1,			/* nsegments */
2733				size,			/* maxsegsize */
2734				0,			/* flags */
2735				NULL,			/* lockfunc */
2736				NULL,			/* lockarg */
2737				&dma->dma_tag);
2738	if (error) {
2739		device_printf(adapter->dev,
2740		    "%s: bus_dma_tag_create failed: %d\n",
2741		    __func__, error);
2742		goto fail_0;
2743	}
2744
2745	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2746	    BUS_DMA_NOWAIT, &dma->dma_map);
2747	if (error) {
2748		device_printf(adapter->dev,
2749		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2750		    __func__, (uintmax_t)size, error);
2751		goto fail_2;
2752	}
2753
2754	dma->dma_paddr = 0;
2755	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2756	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2757	if (error || dma->dma_paddr == 0) {
2758		device_printf(adapter->dev,
2759		    "%s: bus_dmamap_load failed: %d\n",
2760		    __func__, error);
2761		goto fail_3;
2762	}
2763
2764	return (0);
2765
2766fail_3:
2767	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2768fail_2:
2769	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2770	bus_dma_tag_destroy(dma->dma_tag);
2771fail_0:
2772	dma->dma_map = NULL;
2773	dma->dma_tag = NULL;
2774
2775	return (error);
2776}
2777
2778static void
2779igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2780{
2781	if (dma->dma_tag == NULL)
2782		return;
2783	if (dma->dma_map != NULL) {
2784		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2785		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2786		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2787		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2788		dma->dma_map = NULL;
2789	}
2790	bus_dma_tag_destroy(dma->dma_tag);
2791	dma->dma_tag = NULL;
2792}
2793
2794
2795/*********************************************************************
2796 *
2797 *  Allocate memory for the transmit and receive rings, and then
2798 *  the descriptors associated with each, called only once at attach.
2799 *
2800 **********************************************************************/
2801static int
2802igb_allocate_queues(struct adapter *adapter)
2803{
2804	device_t dev = adapter->dev;
2805	struct igb_queue	*que = NULL;
2806	struct tx_ring		*txr = NULL;
2807	struct rx_ring		*rxr = NULL;
2808	int rsize, tsize, error = E1000_SUCCESS;
2809	int txconf = 0, rxconf = 0;
2810
2811	/* First allocate the top level queue structs */
2812	if (!(adapter->queues =
2813	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2814	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2815		device_printf(dev, "Unable to allocate queue memory\n");
2816		error = ENOMEM;
2817		goto fail;
2818	}
2819
2820	/* Next allocate the TX ring struct memory */
2821	if (!(adapter->tx_rings =
2822	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2823	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2824		device_printf(dev, "Unable to allocate TX ring memory\n");
2825		error = ENOMEM;
2826		goto tx_fail;
2827	}
2828
2829	/* Now allocate the RX */
2830	if (!(adapter->rx_rings =
2831	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2832	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2833		device_printf(dev, "Unable to allocate RX ring memory\n");
2834		error = ENOMEM;
2835		goto rx_fail;
2836	}
2837
2838	tsize = roundup2(adapter->num_tx_desc *
2839	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2840	/*
2841	 * Now set up the TX queues, txconf is needed to handle the
2842	 * possibility that things fail midcourse and we need to
2843	 * undo memory gracefully
2844	 */
2845	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2846		/* Set up some basics */
2847		txr = &adapter->tx_rings[i];
2848		txr->adapter = adapter;
2849		txr->me = i;
2850
2851		/* Initialize the TX lock */
2852		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2853		    device_get_nameunit(dev), txr->me);
2854		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2855
2856		if (igb_dma_malloc(adapter, tsize,
2857			&txr->txdma, BUS_DMA_NOWAIT)) {
2858			device_printf(dev,
2859			    "Unable to allocate TX Descriptor memory\n");
2860			error = ENOMEM;
2861			goto err_tx_desc;
2862		}
2863		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2864		bzero((void *)txr->tx_base, tsize);
2865
2866        	/* Now allocate transmit buffers for the ring */
2867        	if (igb_allocate_transmit_buffers(txr)) {
2868			device_printf(dev,
2869			    "Critical Failure setting up transmit buffers\n");
2870			error = ENOMEM;
2871			goto err_tx_desc;
2872        	}
2873#if __FreeBSD_version >= 800000
2874		/* Allocate a buf ring */
2875		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2876		    M_WAITOK, &txr->tx_mtx);
2877#endif
2878	}
2879
2880	/*
2881	 * Next the RX queues...
2882	 */
2883	rsize = roundup2(adapter->num_rx_desc *
2884	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2885	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2886		rxr = &adapter->rx_rings[i];
2887		rxr->adapter = adapter;
2888		rxr->me = i;
2889
2890		/* Initialize the RX lock */
2891		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2892		    device_get_nameunit(dev), txr->me);
2893		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2894
2895		if (igb_dma_malloc(adapter, rsize,
2896			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2897			device_printf(dev,
2898			    "Unable to allocate RxDescriptor memory\n");
2899			error = ENOMEM;
2900			goto err_rx_desc;
2901		}
2902		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2903		bzero((void *)rxr->rx_base, rsize);
2904
2905        	/* Allocate receive buffers for the ring*/
2906		if (igb_allocate_receive_buffers(rxr)) {
2907			device_printf(dev,
2908			    "Critical Failure setting up receive buffers\n");
2909			error = ENOMEM;
2910			goto err_rx_desc;
2911		}
2912	}
2913
2914	/*
2915	** Finally set up the queue holding structs
2916	*/
2917	for (int i = 0; i < adapter->num_queues; i++) {
2918		que = &adapter->queues[i];
2919		que->adapter = adapter;
2920		que->txr = &adapter->tx_rings[i];
2921		que->rxr = &adapter->rx_rings[i];
2922	}
2923
2924	return (0);
2925
2926err_rx_desc:
2927	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2928		igb_dma_free(adapter, &rxr->rxdma);
2929err_tx_desc:
2930	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2931		igb_dma_free(adapter, &txr->txdma);
2932	free(adapter->rx_rings, M_DEVBUF);
2933rx_fail:
2934	buf_ring_free(txr->br, M_DEVBUF);
2935	free(adapter->tx_rings, M_DEVBUF);
2936tx_fail:
2937	free(adapter->queues, M_DEVBUF);
2938fail:
2939	return (error);
2940}
2941
2942/*********************************************************************
2943 *
2944 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2945 *  the information needed to transmit a packet on the wire. This is
2946 *  called only once at attach, setup is done every reset.
2947 *
2948 **********************************************************************/
2949static int
2950igb_allocate_transmit_buffers(struct tx_ring *txr)
2951{
2952	struct adapter *adapter = txr->adapter;
2953	device_t dev = adapter->dev;
2954	struct igb_tx_buffer *txbuf;
2955	int error, i;
2956
2957	/*
2958	 * Setup DMA descriptor areas.
2959	 */
2960	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2961			       1, 0,			/* alignment, bounds */
2962			       BUS_SPACE_MAXADDR,	/* lowaddr */
2963			       BUS_SPACE_MAXADDR,	/* highaddr */
2964			       NULL, NULL,		/* filter, filterarg */
2965			       IGB_TSO_SIZE,		/* maxsize */
2966			       IGB_MAX_SCATTER,		/* nsegments */
2967			       PAGE_SIZE,		/* maxsegsize */
2968			       0,			/* flags */
2969			       NULL,			/* lockfunc */
2970			       NULL,			/* lockfuncarg */
2971			       &txr->txtag))) {
2972		device_printf(dev,"Unable to allocate TX DMA tag\n");
2973		goto fail;
2974	}
2975
2976	if (!(txr->tx_buffers =
2977	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2978	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2979		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2980		error = ENOMEM;
2981		goto fail;
2982	}
2983
2984        /* Create the descriptor buffer dma maps */
2985	txbuf = txr->tx_buffers;
2986	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2987		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2988		if (error != 0) {
2989			device_printf(dev, "Unable to create TX DMA map\n");
2990			goto fail;
2991		}
2992	}
2993
2994	return 0;
2995fail:
2996	/* We free all, it handles case where we are in the middle */
2997	igb_free_transmit_structures(adapter);
2998	return (error);
2999}
3000
3001/*********************************************************************
3002 *
3003 *  Initialize a transmit ring.
3004 *
3005 **********************************************************************/
3006static void
3007igb_setup_transmit_ring(struct tx_ring *txr)
3008{
3009	struct adapter *adapter = txr->adapter;
3010	struct igb_tx_buffer *txbuf;
3011	int i;
3012
3013	/* Clear the old descriptor contents */
3014	IGB_TX_LOCK(txr);
3015	bzero((void *)txr->tx_base,
3016	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3017	/* Reset indices */
3018	txr->next_avail_desc = 0;
3019	txr->next_to_clean = 0;
3020
3021	/* Free any existing tx buffers. */
3022        txbuf = txr->tx_buffers;
3023	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3024		if (txbuf->m_head != NULL) {
3025			bus_dmamap_sync(txr->txtag, txbuf->map,
3026			    BUS_DMASYNC_POSTWRITE);
3027			bus_dmamap_unload(txr->txtag, txbuf->map);
3028			m_freem(txbuf->m_head);
3029			txbuf->m_head = NULL;
3030		}
3031		/* clear the watch index */
3032		txbuf->next_eop = -1;
3033        }
3034
3035	/* Set number of descriptors available */
3036	txr->tx_avail = adapter->num_tx_desc;
3037
3038	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3039	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3040	IGB_TX_UNLOCK(txr);
3041}
3042
3043/*********************************************************************
3044 *
3045 *  Initialize all transmit rings.
3046 *
3047 **********************************************************************/
3048static void
3049igb_setup_transmit_structures(struct adapter *adapter)
3050{
3051	struct tx_ring *txr = adapter->tx_rings;
3052
3053	for (int i = 0; i < adapter->num_queues; i++, txr++)
3054		igb_setup_transmit_ring(txr);
3055
3056	return;
3057}
3058
3059/*********************************************************************
3060 *
3061 *  Enable transmit unit.
3062 *
3063 **********************************************************************/
3064static void
3065igb_initialize_transmit_units(struct adapter *adapter)
3066{
3067	struct tx_ring	*txr = adapter->tx_rings;
3068	struct e1000_hw *hw = &adapter->hw;
3069	u32		tctl, txdctl;
3070
3071	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3072
3073	/* Setup the Tx Descriptor Rings */
3074	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3075		u64 bus_addr = txr->txdma.dma_paddr;
3076
3077		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3078		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3079		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3080		    (uint32_t)(bus_addr >> 32));
3081		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3082		    (uint32_t)bus_addr);
3083
3084		/* Setup the HW Tx Head and Tail descriptor pointers */
3085		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3086		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3087
3088		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3089		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3090		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3091
3092		txr->watchdog_check = FALSE;
3093
3094		txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3095		txdctl |= IGB_TX_PTHRESH;
3096		txdctl |= IGB_TX_HTHRESH << 8;
3097		txdctl |= IGB_TX_WTHRESH << 16;
3098		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3099		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3100	}
3101
3102	/* Program the Transmit Control Register */
3103	tctl = E1000_READ_REG(hw, E1000_TCTL);
3104	tctl &= ~E1000_TCTL_CT;
3105	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3106		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3107
3108	e1000_config_collision_dist(hw);
3109
3110	/* This write will effectively turn on the transmit unit. */
3111	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3112}
3113
3114/*********************************************************************
3115 *
3116 *  Free all transmit rings.
3117 *
3118 **********************************************************************/
3119static void
3120igb_free_transmit_structures(struct adapter *adapter)
3121{
3122	struct tx_ring *txr = adapter->tx_rings;
3123
3124	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3125		IGB_TX_LOCK(txr);
3126		igb_free_transmit_buffers(txr);
3127		igb_dma_free(adapter, &txr->txdma);
3128		IGB_TX_UNLOCK(txr);
3129		IGB_TX_LOCK_DESTROY(txr);
3130	}
3131	free(adapter->tx_rings, M_DEVBUF);
3132}
3133
3134/*********************************************************************
3135 *
3136 *  Free transmit ring related data structures.
3137 *
3138 **********************************************************************/
3139static void
3140igb_free_transmit_buffers(struct tx_ring *txr)
3141{
3142	struct adapter *adapter = txr->adapter;
3143	struct igb_tx_buffer *tx_buffer;
3144	int             i;
3145
3146	INIT_DEBUGOUT("free_transmit_ring: begin");
3147
3148	if (txr->tx_buffers == NULL)
3149		return;
3150
3151	tx_buffer = txr->tx_buffers;
3152	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3153		if (tx_buffer->m_head != NULL) {
3154			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3155			    BUS_DMASYNC_POSTWRITE);
3156			bus_dmamap_unload(txr->txtag,
3157			    tx_buffer->map);
3158			m_freem(tx_buffer->m_head);
3159			tx_buffer->m_head = NULL;
3160			if (tx_buffer->map != NULL) {
3161				bus_dmamap_destroy(txr->txtag,
3162				    tx_buffer->map);
3163				tx_buffer->map = NULL;
3164			}
3165		} else if (tx_buffer->map != NULL) {
3166			bus_dmamap_unload(txr->txtag,
3167			    tx_buffer->map);
3168			bus_dmamap_destroy(txr->txtag,
3169			    tx_buffer->map);
3170			tx_buffer->map = NULL;
3171		}
3172	}
3173#if __FreeBSD_version >= 800000
3174	if (txr->br != NULL)
3175		buf_ring_free(txr->br, M_DEVBUF);
3176#endif
3177	if (txr->tx_buffers != NULL) {
3178		free(txr->tx_buffers, M_DEVBUF);
3179		txr->tx_buffers = NULL;
3180	}
3181	if (txr->txtag != NULL) {
3182		bus_dma_tag_destroy(txr->txtag);
3183		txr->txtag = NULL;
3184	}
3185	return;
3186}
3187
3188/**********************************************************************
3189 *
3190 *  Setup work for hardware segmentation offload (TSO)
3191 *
3192 **********************************************************************/
3193static boolean_t
3194igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3195{
3196	struct adapter *adapter = txr->adapter;
3197	struct e1000_adv_tx_context_desc *TXD;
3198	struct igb_tx_buffer        *tx_buffer;
3199	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3200	u32 mss_l4len_idx = 0;
3201	u16 vtag = 0;
3202	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3203	struct ether_vlan_header *eh;
3204	struct ip *ip;
3205	struct tcphdr *th;
3206
3207
3208	/*
3209	 * Determine where frame payload starts.
3210	 * Jump over vlan headers if already present
3211	 */
3212	eh = mtod(mp, struct ether_vlan_header *);
3213	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3214		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3215	else
3216		ehdrlen = ETHER_HDR_LEN;
3217
3218	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3219	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3220		return FALSE;
3221
3222	/* Only supports IPV4 for now */
3223	ctxd = txr->next_avail_desc;
3224	tx_buffer = &txr->tx_buffers[ctxd];
3225	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3226
3227	ip = (struct ip *)(mp->m_data + ehdrlen);
3228	if (ip->ip_p != IPPROTO_TCP)
3229                return FALSE;   /* 0 */
3230	ip->ip_sum = 0;
3231	ip_hlen = ip->ip_hl << 2;
3232	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3233	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3234	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3235	tcp_hlen = th->th_off << 2;
3236	/*
3237	 * Calculate header length, this is used
3238	 * in the transmit desc in igb_xmit
3239	 */
3240	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3241
3242	/* VLAN MACLEN IPLEN */
3243	if (mp->m_flags & M_VLANTAG) {
3244		vtag = htole16(mp->m_pkthdr.ether_vtag);
3245		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3246	}
3247
3248	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3249	vlan_macip_lens |= ip_hlen;
3250	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3251
3252	/* ADV DTYPE TUCMD */
3253	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3254	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3255	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3256	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3257
3258	/* MSS L4LEN IDX */
3259	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3260	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3261	/* 82575 needs the queue index added */
3262	if (adapter->hw.mac.type == e1000_82575)
3263		mss_l4len_idx |= txr->me << 4;
3264	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3265
3266	TXD->seqnum_seed = htole32(0);
3267	tx_buffer->m_head = NULL;
3268	tx_buffer->next_eop = -1;
3269
3270	if (++ctxd == adapter->num_tx_desc)
3271		ctxd = 0;
3272
3273	txr->tx_avail--;
3274	txr->next_avail_desc = ctxd;
3275	return TRUE;
3276}
3277
3278
3279/*********************************************************************
3280 *
3281 *  Context Descriptor setup for VLAN or CSUM
3282 *
3283 **********************************************************************/
3284
3285static bool
3286igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3287{
3288	struct adapter *adapter = txr->adapter;
3289	struct e1000_adv_tx_context_desc *TXD;
3290	struct igb_tx_buffer        *tx_buffer;
3291	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3292	struct ether_vlan_header *eh;
3293	struct ip *ip = NULL;
3294	struct ip6_hdr *ip6;
3295	int  ehdrlen, ctxd, ip_hlen = 0;
3296	u16	etype, vtag = 0;
3297	u8	ipproto = 0;
3298	bool	offload = TRUE;
3299
3300	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3301		offload = FALSE;
3302
3303	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3304	ctxd = txr->next_avail_desc;
3305	tx_buffer = &txr->tx_buffers[ctxd];
3306	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3307
3308	/*
3309	** In advanced descriptors the vlan tag must
3310	** be placed into the context descriptor, thus
3311	** we need to be here just for that setup.
3312	*/
3313	if (mp->m_flags & M_VLANTAG) {
3314		vtag = htole16(mp->m_pkthdr.ether_vtag);
3315		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3316	} else if (offload == FALSE)
3317		return FALSE;
3318
3319	/*
3320	 * Determine where frame payload starts.
3321	 * Jump over vlan headers if already present,
3322	 * helpful for QinQ too.
3323	 */
3324	eh = mtod(mp, struct ether_vlan_header *);
3325	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3326		etype = ntohs(eh->evl_proto);
3327		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3328	} else {
3329		etype = ntohs(eh->evl_encap_proto);
3330		ehdrlen = ETHER_HDR_LEN;
3331	}
3332
3333	/* Set the ether header length */
3334	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3335
3336	switch (etype) {
3337		case ETHERTYPE_IP:
3338			ip = (struct ip *)(mp->m_data + ehdrlen);
3339			ip_hlen = ip->ip_hl << 2;
3340			if (mp->m_len < ehdrlen + ip_hlen) {
3341				offload = FALSE;
3342				break;
3343			}
3344			ipproto = ip->ip_p;
3345			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3346			break;
3347		case ETHERTYPE_IPV6:
3348			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3349			ip_hlen = sizeof(struct ip6_hdr);
3350			if (mp->m_len < ehdrlen + ip_hlen)
3351				return (FALSE);
3352			ipproto = ip6->ip6_nxt;
3353			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3354			break;
3355		default:
3356			offload = FALSE;
3357			break;
3358	}
3359
3360	vlan_macip_lens |= ip_hlen;
3361	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3362
3363	switch (ipproto) {
3364		case IPPROTO_TCP:
3365			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3366				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3367			break;
3368		case IPPROTO_UDP:
3369			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3370				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3371			break;
3372#if __FreeBSD_version >= 800000
3373		case IPPROTO_SCTP:
3374			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3375				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3376			break;
3377#endif
3378		default:
3379			offload = FALSE;
3380			break;
3381	}
3382
3383	/* 82575 needs the queue index added */
3384	if (adapter->hw.mac.type == e1000_82575)
3385		mss_l4len_idx = txr->me << 4;
3386
3387	/* Now copy bits into descriptor */
3388	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3389	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3390	TXD->seqnum_seed = htole32(0);
3391	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3392
3393	tx_buffer->m_head = NULL;
3394	tx_buffer->next_eop = -1;
3395
3396	/* We've consumed the first desc, adjust counters */
3397	if (++ctxd == adapter->num_tx_desc)
3398		ctxd = 0;
3399	txr->next_avail_desc = ctxd;
3400	--txr->tx_avail;
3401
3402        return (offload);
3403}
3404
3405
3406/**********************************************************************
3407 *
3408 *  Examine each tx_buffer in the used queue. If the hardware is done
3409 *  processing the packet then free associated resources. The
3410 *  tx_buffer is put back on the free queue.
3411 *
3412 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3413 **********************************************************************/
3414static bool
3415igb_txeof(struct tx_ring *txr)
3416{
3417	struct adapter	*adapter = txr->adapter;
3418        int first, last, done;
3419        struct igb_tx_buffer *tx_buffer;
3420        struct e1000_tx_desc   *tx_desc, *eop_desc;
3421	struct ifnet   *ifp = adapter->ifp;
3422
3423	IGB_TX_LOCK_ASSERT(txr);
3424
3425        if (txr->tx_avail == adapter->num_tx_desc)
3426                return FALSE;
3427
3428        first = txr->next_to_clean;
3429        tx_desc = &txr->tx_base[first];
3430        tx_buffer = &txr->tx_buffers[first];
3431	last = tx_buffer->next_eop;
3432        eop_desc = &txr->tx_base[last];
3433
3434	/*
3435	 * What this does is get the index of the
3436	 * first descriptor AFTER the EOP of the
3437	 * first packet, that way we can do the
3438	 * simple comparison on the inner while loop.
3439	 */
3440	if (++last == adapter->num_tx_desc)
3441 		last = 0;
3442	done = last;
3443
3444        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3445            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3446
3447        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3448		/* We clean the range of the packet */
3449		while (first != done) {
3450                	tx_desc->upper.data = 0;
3451                	tx_desc->lower.data = 0;
3452                	tx_desc->buffer_addr = 0;
3453                	++txr->tx_avail;
3454
3455			if (tx_buffer->m_head) {
3456				txr->bytes +=
3457				    tx_buffer->m_head->m_pkthdr.len;
3458				bus_dmamap_sync(txr->txtag,
3459				    tx_buffer->map,
3460				    BUS_DMASYNC_POSTWRITE);
3461				bus_dmamap_unload(txr->txtag,
3462				    tx_buffer->map);
3463
3464                        	m_freem(tx_buffer->m_head);
3465                        	tx_buffer->m_head = NULL;
3466                	}
3467			tx_buffer->next_eop = -1;
3468			txr->watchdog_time = ticks;
3469
3470	                if (++first == adapter->num_tx_desc)
3471				first = 0;
3472
3473	                tx_buffer = &txr->tx_buffers[first];
3474			tx_desc = &txr->tx_base[first];
3475		}
3476		++txr->packets;
3477		++ifp->if_opackets;
3478		/* See if we can continue to the next packet */
3479		last = tx_buffer->next_eop;
3480		if (last != -1) {
3481        		eop_desc = &txr->tx_base[last];
3482			/* Get new done point */
3483			if (++last == adapter->num_tx_desc) last = 0;
3484			done = last;
3485		} else
3486			break;
3487        }
3488        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3489            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3490
3491        txr->next_to_clean = first;
3492
3493        /*
3494         * If we have enough room, clear IFF_DRV_OACTIVE
3495         * to tell the stack that it is OK to send packets.
3496         */
3497        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3498                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3499		/* All clean, turn off the watchdog */
3500                if (txr->tx_avail == adapter->num_tx_desc) {
3501			txr->watchdog_check = FALSE;
3502			return FALSE;
3503		}
3504        }
3505
3506	return (TRUE);
3507}
3508
3509
3510/*********************************************************************
3511 *
3512 *  Refresh mbuf buffers for RX descriptor rings
3513 *   - now keeps its own state so discards due to resource
3514 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3515 *     it just returns, keeping its placeholder, thus it can simply
3516 *     be recalled to try again.
3517 *
3518 **********************************************************************/
3519static void
3520igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3521{
3522	struct adapter		*adapter = rxr->adapter;
3523	bus_dma_segment_t	hseg[1];
3524	bus_dma_segment_t	pseg[1];
3525	struct igb_rx_buf	*rxbuf;
3526	struct mbuf		*mh, *mp;
3527	int			i, nsegs, error, cleaned;
3528
3529	i = rxr->next_to_refresh;
3530	cleaned = -1; /* Signify no completions */
3531	while (i != limit) {
3532		rxbuf = &rxr->rx_buffers[i];
3533		if (rxbuf->m_head == NULL) {
3534			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3535			if (mh == NULL)
3536				goto update;
3537			mh->m_pkthdr.len = mh->m_len = MHLEN;
3538			mh->m_len = MHLEN;
3539			mh->m_flags |= M_PKTHDR;
3540			m_adj(mh, ETHER_ALIGN);
3541			/* Get the memory mapping */
3542			error = bus_dmamap_load_mbuf_sg(rxr->htag,
3543			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3544			if (error != 0) {
3545				printf("GET BUF: dmamap load"
3546				    " failure - %d\n", error);
3547				m_free(mh);
3548				goto update;
3549			}
3550			rxbuf->m_head = mh;
3551			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3552			    BUS_DMASYNC_PREREAD);
3553			rxr->rx_base[i].read.hdr_addr =
3554			    htole64(hseg[0].ds_addr);
3555		}
3556
3557		if (rxbuf->m_pack == NULL) {
3558			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3559			    M_PKTHDR, adapter->rx_mbuf_sz);
3560			if (mp == NULL)
3561				goto update;
3562			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3563			/* Get the memory mapping */
3564			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3565			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3566			if (error != 0) {
3567				printf("GET BUF: dmamap load"
3568				    " failure - %d\n", error);
3569				m_free(mp);
3570				goto update;
3571			}
3572			rxbuf->m_pack = mp;
3573			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3574			    BUS_DMASYNC_PREREAD);
3575			rxr->rx_base[i].read.pkt_addr =
3576			    htole64(pseg[0].ds_addr);
3577		}
3578
3579		cleaned = i;
3580		/* Calculate next index */
3581		if (++i == adapter->num_rx_desc)
3582			i = 0;
3583		/* This is the work marker for refresh */
3584		rxr->next_to_refresh = i;
3585	}
3586update:
3587	if (cleaned != -1) /* If we refreshed some, bump tail */
3588		E1000_WRITE_REG(&adapter->hw,
3589		    E1000_RDT(rxr->me), cleaned);
3590	return;
3591}
3592
3593
3594/*********************************************************************
3595 *
3596 *  Allocate memory for rx_buffer structures. Since we use one
3597 *  rx_buffer per received packet, the maximum number of rx_buffer's
3598 *  that we'll need is equal to the number of receive descriptors
3599 *  that we've allocated.
3600 *
3601 **********************************************************************/
3602static int
3603igb_allocate_receive_buffers(struct rx_ring *rxr)
3604{
3605	struct	adapter 	*adapter = rxr->adapter;
3606	device_t 		dev = adapter->dev;
3607	struct igb_rx_buf	*rxbuf;
3608	int             	i, bsize, error;
3609
3610	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3611	if (!(rxr->rx_buffers =
3612	    (struct igb_rx_buf *) malloc(bsize,
3613	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3614		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3615		error = ENOMEM;
3616		goto fail;
3617	}
3618
3619	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3620				   1, 0,		/* alignment, bounds */
3621				   BUS_SPACE_MAXADDR,	/* lowaddr */
3622				   BUS_SPACE_MAXADDR,	/* highaddr */
3623				   NULL, NULL,		/* filter, filterarg */
3624				   MSIZE,		/* maxsize */
3625				   1,			/* nsegments */
3626				   MSIZE,		/* maxsegsize */
3627				   0,			/* flags */
3628				   NULL,		/* lockfunc */
3629				   NULL,		/* lockfuncarg */
3630				   &rxr->htag))) {
3631		device_printf(dev, "Unable to create RX DMA tag\n");
3632		goto fail;
3633	}
3634
3635	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3636				   1, 0,		/* alignment, bounds */
3637				   BUS_SPACE_MAXADDR,	/* lowaddr */
3638				   BUS_SPACE_MAXADDR,	/* highaddr */
3639				   NULL, NULL,		/* filter, filterarg */
3640				   MJUMPAGESIZE,	/* maxsize */
3641				   1,			/* nsegments */
3642				   MJUMPAGESIZE,	/* maxsegsize */
3643				   0,			/* flags */
3644				   NULL,		/* lockfunc */
3645				   NULL,		/* lockfuncarg */
3646				   &rxr->ptag))) {
3647		device_printf(dev, "Unable to create RX payload DMA tag\n");
3648		goto fail;
3649	}
3650
3651	for (i = 0; i < adapter->num_rx_desc; i++) {
3652		rxbuf = &rxr->rx_buffers[i];
3653		error = bus_dmamap_create(rxr->htag,
3654		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3655		if (error) {
3656			device_printf(dev,
3657			    "Unable to create RX head DMA maps\n");
3658			goto fail;
3659		}
3660		error = bus_dmamap_create(rxr->ptag,
3661		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3662		if (error) {
3663			device_printf(dev,
3664			    "Unable to create RX packet DMA maps\n");
3665			goto fail;
3666		}
3667	}
3668
3669	return (0);
3670
3671fail:
3672	/* Frees all, but can handle partial completion */
3673	igb_free_receive_structures(adapter);
3674	return (error);
3675}
3676
3677
3678static void
3679igb_free_receive_ring(struct rx_ring *rxr)
3680{
3681	struct	adapter		*adapter;
3682	struct igb_rx_buf	*rxbuf;
3683	int i;
3684
3685	adapter = rxr->adapter;
3686	for (i = 0; i < adapter->num_rx_desc; i++) {
3687		rxbuf = &rxr->rx_buffers[i];
3688		if (rxbuf->m_head != NULL) {
3689			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3690			    BUS_DMASYNC_POSTREAD);
3691			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3692			rxbuf->m_head->m_flags |= M_PKTHDR;
3693			m_freem(rxbuf->m_head);
3694		}
3695		if (rxbuf->m_pack != NULL) {
3696			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3697			    BUS_DMASYNC_POSTREAD);
3698			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3699			rxbuf->m_pack->m_flags |= M_PKTHDR;
3700			m_freem(rxbuf->m_pack);
3701		}
3702		rxbuf->m_head = NULL;
3703		rxbuf->m_pack = NULL;
3704	}
3705}
3706
3707
3708/*********************************************************************
3709 *
3710 *  Initialize a receive ring and its buffers.
3711 *
3712 **********************************************************************/
3713static int
3714igb_setup_receive_ring(struct rx_ring *rxr)
3715{
3716	struct	adapter		*adapter;
3717	struct  ifnet		*ifp;
3718	device_t		dev;
3719	struct igb_rx_buf	*rxbuf;
3720	bus_dma_segment_t	pseg[1], hseg[1];
3721	struct lro_ctrl		*lro = &rxr->lro;
3722	int			rsize, nsegs, error = 0;
3723
3724	adapter = rxr->adapter;
3725	dev = adapter->dev;
3726	ifp = adapter->ifp;
3727
3728	/* Clear the ring contents */
3729	IGB_RX_LOCK(rxr);
3730	rsize = roundup2(adapter->num_rx_desc *
3731	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3732	bzero((void *)rxr->rx_base, rsize);
3733
3734	/*
3735	** Free current RX buffer structures and their mbufs
3736	*/
3737	igb_free_receive_ring(rxr);
3738
3739        /* Now replenish the ring mbufs */
3740	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3741		struct mbuf	*mh, *mp;
3742
3743		rxbuf = &rxr->rx_buffers[j];
3744
3745		/* First the header */
3746		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3747		if (rxbuf->m_head == NULL)
3748                        goto fail;
3749		m_adj(rxbuf->m_head, ETHER_ALIGN);
3750		mh = rxbuf->m_head;
3751		mh->m_len = mh->m_pkthdr.len = MHLEN;
3752		mh->m_flags |= M_PKTHDR;
3753		/* Get the memory mapping */
3754		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3755		    rxbuf->hmap, rxbuf->m_head, hseg,
3756		    &nsegs, BUS_DMA_NOWAIT);
3757		if (error != 0) /* Nothing elegant to do here */
3758                        goto fail;
3759		bus_dmamap_sync(rxr->htag,
3760		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3761		/* Update descriptor */
3762		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3763
3764		/* Now the payload cluster */
3765		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3766		    M_PKTHDR, adapter->rx_mbuf_sz);
3767		if (rxbuf->m_pack == NULL)
3768                        goto fail;
3769		mp = rxbuf->m_pack;
3770		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3771		/* Get the memory mapping */
3772		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3773		    rxbuf->pmap, mp, pseg,
3774		    &nsegs, BUS_DMA_NOWAIT);
3775		if (error != 0)
3776                        goto fail;
3777		bus_dmamap_sync(rxr->ptag,
3778		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3779		/* Update descriptor */
3780		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3781        }
3782	rxr->next_to_refresh = 0;
3783	rxr->lro_enabled = FALSE;
3784
3785	if (igb_header_split)
3786		rxr->hdr_split = TRUE;
3787	else
3788		ifp->if_capabilities &= ~IFCAP_LRO;
3789
3790	rxr->fmp = NULL;
3791	rxr->lmp = NULL;
3792	rxr->discard = FALSE;
3793
3794	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3795	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3796
3797	/*
3798	** Now set up the LRO interface, we
3799	** also only do head split when LRO
3800	** is enabled, since so often they
3801	** are undesireable in similar setups.
3802	*/
3803	if (ifp->if_capenable & IFCAP_LRO) {
3804		int err = tcp_lro_init(lro);
3805		if (err) {
3806			device_printf(dev, "LRO Initialization failed!\n");
3807			goto fail;
3808		}
3809		INIT_DEBUGOUT("RX LRO Initialized\n");
3810		rxr->lro_enabled = TRUE;
3811		lro->ifp = adapter->ifp;
3812	}
3813
3814	IGB_RX_UNLOCK(rxr);
3815	return (0);
3816
3817fail:
3818	igb_free_receive_ring(rxr);
3819	IGB_RX_UNLOCK(rxr);
3820	return (error);
3821}
3822
3823/*********************************************************************
3824 *
3825 *  Initialize all receive rings.
3826 *
3827 **********************************************************************/
3828static int
3829igb_setup_receive_structures(struct adapter *adapter)
3830{
3831	struct rx_ring *rxr = adapter->rx_rings;
3832	int i, j;
3833
3834	for (i = 0; i < adapter->num_queues; i++, rxr++)
3835		if (igb_setup_receive_ring(rxr))
3836			goto fail;
3837
3838	return (0);
3839fail:
3840	/*
3841	 * Free RX buffers allocated so far, we will only handle
3842	 * the rings that completed, the failing case will have
3843	 * cleaned up for itself. The value of 'i' will be the
3844	 * failed ring so we must pre-decrement it.
3845	 */
3846	rxr = adapter->rx_rings;
3847	for (--i; i > 0; i--, rxr++) {
3848		for (j = 0; j < adapter->num_rx_desc; j++)
3849			igb_free_receive_ring(rxr);
3850	}
3851
3852	return (ENOBUFS);
3853}
3854
3855/*********************************************************************
3856 *
3857 *  Enable receive unit.
3858 *
3859 **********************************************************************/
3860static void
3861igb_initialize_receive_units(struct adapter *adapter)
3862{
3863	struct rx_ring	*rxr = adapter->rx_rings;
3864	struct ifnet	*ifp = adapter->ifp;
3865	struct e1000_hw *hw = &adapter->hw;
3866	u32		rctl, rxcsum, psize, srrctl = 0;
3867
3868	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3869
3870	/*
3871	 * Make sure receives are disabled while setting
3872	 * up the descriptor ring
3873	 */
3874	rctl = E1000_READ_REG(hw, E1000_RCTL);
3875	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3876
3877	/*
3878	** Set up for header split
3879	*/
3880	if (rxr->hdr_split) {
3881		/* Use a standard mbuf for the header */
3882		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3883		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3884	} else
3885		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3886
3887	/*
3888	** Set up for jumbo frames
3889	*/
3890	if (ifp->if_mtu > ETHERMTU) {
3891		rctl |= E1000_RCTL_LPE;
3892		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3893		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3894
3895		/* Set maximum packet len */
3896		psize = adapter->max_frame_size;
3897		/* are we on a vlan? */
3898		if (adapter->ifp->if_vlantrunk != NULL)
3899			psize += VLAN_TAG_SIZE;
3900		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3901	} else {
3902		rctl &= ~E1000_RCTL_LPE;
3903		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3904		rctl |= E1000_RCTL_SZ_2048;
3905	}
3906
3907	/* Setup the Base and Length of the Rx Descriptor Rings */
3908	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3909		u64 bus_addr = rxr->rxdma.dma_paddr;
3910		u32 rxdctl;
3911
3912		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3913		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3914		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3915		    (uint32_t)(bus_addr >> 32));
3916		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3917		    (uint32_t)bus_addr);
3918		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3919		/* Enable this Queue */
3920		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3921		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3922		rxdctl &= 0xFFF00000;
3923		rxdctl |= IGB_RX_PTHRESH;
3924		rxdctl |= IGB_RX_HTHRESH << 8;
3925		rxdctl |= IGB_RX_WTHRESH << 16;
3926		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3927	}
3928
3929	/*
3930	** Setup for RX MultiQueue
3931	*/
3932	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3933	if (adapter->num_queues >1) {
3934		u32 random[10], mrqc, shift = 0;
3935		union igb_reta {
3936			u32 dword;
3937			u8  bytes[4];
3938		} reta;
3939
3940		arc4rand(&random, sizeof(random), 0);
3941		if (adapter->hw.mac.type == e1000_82575)
3942			shift = 6;
3943		/* Warning FM follows */
3944		for (int i = 0; i < 128; i++) {
3945			reta.bytes[i & 3] =
3946			    (i % adapter->num_queues) << shift;
3947			if ((i & 3) == 3)
3948				E1000_WRITE_REG(hw,
3949				    E1000_RETA(i >> 2), reta.dword);
3950		}
3951		/* Now fill in hash table */
3952		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3953		for (int i = 0; i < 10; i++)
3954			E1000_WRITE_REG_ARRAY(hw,
3955			    E1000_RSSRK(0), i, random[i]);
3956
3957		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3958		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3959		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3960		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3961		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3962		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3963		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3964		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3965
3966		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3967
3968		/*
3969		** NOTE: Receive Full-Packet Checksum Offload
3970		** is mutually exclusive with Multiqueue. However
3971		** this is not the same as TCP/IP checksums which
3972		** still work.
3973		*/
3974		rxcsum |= E1000_RXCSUM_PCSD;
3975#if __FreeBSD_version >= 800000
3976		/* For SCTP Offload */
3977		if ((hw->mac.type == e1000_82576)
3978		    && (ifp->if_capenable & IFCAP_RXCSUM))
3979			rxcsum |= E1000_RXCSUM_CRCOFL;
3980#endif
3981	} else {
3982		/* Non RSS setup */
3983		if (ifp->if_capenable & IFCAP_RXCSUM) {
3984			rxcsum |= E1000_RXCSUM_IPPCSE;
3985#if __FreeBSD_version >= 800000
3986			if (adapter->hw.mac.type == e1000_82576)
3987				rxcsum |= E1000_RXCSUM_CRCOFL;
3988#endif
3989		} else
3990			rxcsum &= ~E1000_RXCSUM_TUOFL;
3991	}
3992	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
3993
3994	/* Setup the Receive Control Register */
3995	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3996	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3997		   E1000_RCTL_RDMTS_HALF |
3998		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3999	/* Strip CRC bytes. */
4000	rctl |= E1000_RCTL_SECRC;
4001	/* Make sure VLAN Filters are off */
4002	rctl &= ~E1000_RCTL_VFE;
4003	/* Don't store bad packets */
4004	rctl &= ~E1000_RCTL_SBP;
4005
4006	/* Enable Receives */
4007	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4008
4009	/*
4010	 * Setup the HW Rx Head and Tail Descriptor Pointers
4011	 *   - needs to be after enable
4012	 */
4013	for (int i = 0; i < adapter->num_queues; i++) {
4014		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4015		E1000_WRITE_REG(hw, E1000_RDT(i),
4016		     adapter->num_rx_desc - 1);
4017	}
4018	return;
4019}
4020
4021/*********************************************************************
4022 *
4023 *  Free receive rings.
4024 *
4025 **********************************************************************/
4026static void
4027igb_free_receive_structures(struct adapter *adapter)
4028{
4029	struct rx_ring *rxr = adapter->rx_rings;
4030
4031	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4032		struct lro_ctrl	*lro = &rxr->lro;
4033		igb_free_receive_buffers(rxr);
4034		tcp_lro_free(lro);
4035		igb_dma_free(adapter, &rxr->rxdma);
4036	}
4037
4038	free(adapter->rx_rings, M_DEVBUF);
4039}
4040
4041/*********************************************************************
4042 *
4043 *  Free receive ring data structures.
4044 *
4045 **********************************************************************/
4046static void
4047igb_free_receive_buffers(struct rx_ring *rxr)
4048{
4049	struct adapter		*adapter = rxr->adapter;
4050	struct igb_rx_buf	*rxbuf;
4051	int i;
4052
4053	INIT_DEBUGOUT("free_receive_structures: begin");
4054
4055	/* Cleanup any existing buffers */
4056	if (rxr->rx_buffers != NULL) {
4057		for (i = 0; i < adapter->num_rx_desc; i++) {
4058			rxbuf = &rxr->rx_buffers[i];
4059			if (rxbuf->m_head != NULL) {
4060				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4061				    BUS_DMASYNC_POSTREAD);
4062				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4063				rxbuf->m_head->m_flags |= M_PKTHDR;
4064				m_freem(rxbuf->m_head);
4065			}
4066			if (rxbuf->m_pack != NULL) {
4067				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4068				    BUS_DMASYNC_POSTREAD);
4069				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4070				rxbuf->m_pack->m_flags |= M_PKTHDR;
4071				m_freem(rxbuf->m_pack);
4072			}
4073			rxbuf->m_head = NULL;
4074			rxbuf->m_pack = NULL;
4075			if (rxbuf->hmap != NULL) {
4076				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4077				rxbuf->hmap = NULL;
4078			}
4079			if (rxbuf->pmap != NULL) {
4080				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4081				rxbuf->pmap = NULL;
4082			}
4083		}
4084		if (rxr->rx_buffers != NULL) {
4085			free(rxr->rx_buffers, M_DEVBUF);
4086			rxr->rx_buffers = NULL;
4087		}
4088	}
4089
4090	if (rxr->htag != NULL) {
4091		bus_dma_tag_destroy(rxr->htag);
4092		rxr->htag = NULL;
4093	}
4094	if (rxr->ptag != NULL) {
4095		bus_dma_tag_destroy(rxr->ptag);
4096		rxr->ptag = NULL;
4097	}
4098}
4099
4100static __inline void
4101igb_rx_discard(struct rx_ring *rxr, int i)
4102{
4103	struct adapter		*adapter = rxr->adapter;
4104	struct igb_rx_buf	*rbuf;
4105	struct mbuf             *mh, *mp;
4106
4107	rbuf = &rxr->rx_buffers[i];
4108	if (rxr->fmp != NULL) {
4109		rxr->fmp->m_flags |= M_PKTHDR;
4110		m_freem(rxr->fmp);
4111		rxr->fmp = NULL;
4112		rxr->lmp = NULL;
4113	}
4114
4115	mh = rbuf->m_head;
4116	mp = rbuf->m_pack;
4117
4118	/* Reuse loaded DMA map and just update mbuf chain */
4119	mh->m_len = MHLEN;
4120	mh->m_flags |= M_PKTHDR;
4121	mh->m_next = NULL;
4122
4123	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4124	mp->m_data = mp->m_ext.ext_buf;
4125	mp->m_next = NULL;
4126	return;
4127}
4128
4129static __inline void
4130igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4131{
4132
4133	/*
4134	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4135	 * should be computed by hardware. Also it should not have VLAN tag in
4136	 * ethernet header.
4137	 */
4138	if (rxr->lro_enabled &&
4139	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4140	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4141	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4142	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4143	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4144	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4145		/*
4146		 * Send to the stack if:
4147		 **  - LRO not enabled, or
4148		 **  - no LRO resources, or
4149		 **  - lro enqueue fails
4150		 */
4151		if (rxr->lro.lro_cnt != 0)
4152			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4153				return;
4154	}
4155	(*ifp->if_input)(ifp, m);
4156}
4157
4158/*********************************************************************
4159 *
4160 *  This routine executes in interrupt context. It replenishes
4161 *  the mbufs in the descriptor and sends data which has been
4162 *  dma'ed into host memory to upper layer.
4163 *
4164 *  We loop at most count times if count is > 0, or until done if
4165 *  count < 0.
4166 *
4167 *  Return TRUE if more to clean, FALSE otherwise
4168 *********************************************************************/
4169static bool
4170igb_rxeof(struct igb_queue *que, int count)
4171{
4172	struct adapter		*adapter = que->adapter;
4173	struct rx_ring		*rxr = que->rxr;
4174	struct ifnet		*ifp = adapter->ifp;
4175	struct lro_ctrl		*lro = &rxr->lro;
4176	struct lro_entry	*queued;
4177	int			i, processed = 0;
4178	u32			ptype, staterr = 0;
4179	union e1000_adv_rx_desc	*cur;
4180
4181	IGB_RX_LOCK(rxr);
4182	/* Sync the ring. */
4183	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4184	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4185
4186	/* Main clean loop */
4187	for (i = rxr->next_to_check; count != 0;) {
4188		struct mbuf		*sendmp, *mh, *mp;
4189		struct igb_rx_buf	*rxbuf;
4190		u16			hlen, plen, hdr, vtag;
4191		bool			eop = FALSE;
4192
4193		cur = &rxr->rx_base[i];
4194		staterr = le32toh(cur->wb.upper.status_error);
4195		if ((staterr & E1000_RXD_STAT_DD) == 0)
4196			break;
4197		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4198			break;
4199		count--;
4200		sendmp = mh = mp = NULL;
4201		cur->wb.upper.status_error = 0;
4202		rxbuf = &rxr->rx_buffers[i];
4203		plen = le16toh(cur->wb.upper.length);
4204		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4205		vtag = le16toh(cur->wb.upper.vlan);
4206		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4207		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4208
4209		/* Make sure all segments of a bad packet are discarded */
4210		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4211		    (rxr->discard)) {
4212			ifp->if_ierrors++;
4213			++rxr->rx_discarded;
4214			if (!eop) /* Catch subsequent segs */
4215				rxr->discard = TRUE;
4216			else
4217				rxr->discard = FALSE;
4218			igb_rx_discard(rxr, i);
4219			goto next_desc;
4220		}
4221
4222		/*
4223		** The way the hardware is configured to
4224		** split, it will ONLY use the header buffer
4225		** when header split is enabled, otherwise we
4226		** get normal behavior, ie, both header and
4227		** payload are DMA'd into the payload buffer.
4228		**
4229		** The fmp test is to catch the case where a
4230		** packet spans multiple descriptors, in that
4231		** case only the first header is valid.
4232		*/
4233		if (rxr->hdr_split && rxr->fmp == NULL) {
4234			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4235			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4236			if (hlen > IGB_HDR_BUF)
4237				hlen = IGB_HDR_BUF;
4238			/* Handle the header mbuf */
4239			mh = rxr->rx_buffers[i].m_head;
4240			mh->m_len = hlen;
4241			/* clear buf info for refresh */
4242			rxbuf->m_head = NULL;
4243			/*
4244			** Get the payload length, this
4245			** could be zero if its a small
4246			** packet.
4247			*/
4248			if (plen > 0) {
4249				mp = rxr->rx_buffers[i].m_pack;
4250				mp->m_len = plen;
4251				mh->m_next = mp;
4252				/* clear buf info for refresh */
4253				rxbuf->m_pack = NULL;
4254				rxr->rx_split_packets++;
4255			}
4256		} else {
4257			/*
4258			** Either no header split, or a
4259			** secondary piece of a fragmented
4260			** split packet.
4261			*/
4262			mh = rxr->rx_buffers[i].m_pack;
4263			mh->m_len = plen;
4264			/* clear buf info for refresh */
4265			rxbuf->m_pack = NULL;
4266		}
4267
4268		++processed; /* So we know when to refresh */
4269
4270		/* Initial frame - setup */
4271		if (rxr->fmp == NULL) {
4272			mh->m_pkthdr.len = mh->m_len;
4273			/* Store the first mbuf */
4274			rxr->fmp = mh;
4275			rxr->lmp = mh;
4276			if (mp != NULL) {
4277				/* Add payload if split */
4278				mh->m_pkthdr.len += mp->m_len;
4279				rxr->lmp = mh->m_next;
4280			}
4281		} else {
4282			/* Chain mbuf's together */
4283			rxr->lmp->m_next = mh;
4284			rxr->lmp = rxr->lmp->m_next;
4285			rxr->fmp->m_pkthdr.len += mh->m_len;
4286		}
4287
4288		if (eop) {
4289			rxr->fmp->m_pkthdr.rcvif = ifp;
4290			ifp->if_ipackets++;
4291			rxr->rx_packets++;
4292			/* capture data for AIM */
4293			rxr->packets++;
4294			rxr->bytes += rxr->fmp->m_pkthdr.len;
4295			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4296
4297			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4298				igb_rx_checksum(staterr, rxr->fmp, ptype);
4299
4300			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4301			    (staterr & E1000_RXD_STAT_VP) != 0) {
4302				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4303				rxr->fmp->m_flags |= M_VLANTAG;
4304			}
4305#if __FreeBSD_version >= 800000
4306			rxr->fmp->m_pkthdr.flowid = que->msix;
4307			rxr->fmp->m_flags |= M_FLOWID;
4308#endif
4309			sendmp = rxr->fmp;
4310			/* Make sure to set M_PKTHDR. */
4311			sendmp->m_flags |= M_PKTHDR;
4312			rxr->fmp = NULL;
4313			rxr->lmp = NULL;
4314		}
4315
4316next_desc:
4317		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4318		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4319
4320		/* Advance our pointers to the next descriptor. */
4321		if (++i == adapter->num_rx_desc)
4322			i = 0;
4323		/*
4324		** Send to the stack or LRO
4325		*/
4326		if (sendmp != NULL)
4327			igb_rx_input(rxr, ifp, sendmp, ptype);
4328
4329		/* Every 8 descriptors we go to refresh mbufs */
4330		if (processed == 8) {
4331                        igb_refresh_mbufs(rxr, i);
4332                        processed = 0;
4333		}
4334	}
4335
4336	/* Catch any remainders */
4337	if (processed != 0) {
4338		igb_refresh_mbufs(rxr, i);
4339		processed = 0;
4340	}
4341
4342	rxr->next_to_check = i;
4343
4344	/*
4345	 * Flush any outstanding LRO work
4346	 */
4347	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4348		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4349		tcp_lro_flush(lro, queued);
4350	}
4351
4352	IGB_RX_UNLOCK(rxr);
4353
4354	/*
4355	** We still have cleaning to do?
4356	** Schedule another interrupt if so.
4357	*/
4358	if ((staterr & E1000_RXD_STAT_DD) != 0)
4359		return (TRUE);
4360
4361	return (FALSE);
4362}
4363
4364/*********************************************************************
4365 *
4366 *  Verify that the hardware indicated that the checksum is valid.
4367 *  Inform the stack about the status of checksum so that stack
4368 *  doesn't spend time verifying the checksum.
4369 *
4370 *********************************************************************/
4371static void
4372igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4373{
4374	u16 status = (u16)staterr;
4375	u8  errors = (u8) (staterr >> 24);
4376	int sctp;
4377
4378	/* Ignore Checksum bit is set */
4379	if (status & E1000_RXD_STAT_IXSM) {
4380		mp->m_pkthdr.csum_flags = 0;
4381		return;
4382	}
4383
4384	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4385	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4386		sctp = 1;
4387	else
4388		sctp = 0;
4389	if (status & E1000_RXD_STAT_IPCS) {
4390		/* Did it pass? */
4391		if (!(errors & E1000_RXD_ERR_IPE)) {
4392			/* IP Checksum Good */
4393			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4394			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4395		} else
4396			mp->m_pkthdr.csum_flags = 0;
4397	}
4398
4399	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4400		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4401#if __FreeBSD_version >= 800000
4402		if (sctp) /* reassign */
4403			type = CSUM_SCTP_VALID;
4404#endif
4405		/* Did it pass? */
4406		if (!(errors & E1000_RXD_ERR_TCPE)) {
4407			mp->m_pkthdr.csum_flags |= type;
4408			if (sctp == 0)
4409				mp->m_pkthdr.csum_data = htons(0xffff);
4410		}
4411	}
4412	return;
4413}
4414
4415/*
4416 * This routine is run via an vlan
4417 * config EVENT
4418 */
4419static void
4420igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4421{
4422	struct adapter	*adapter = ifp->if_softc;
4423	u32		index, bit;
4424
4425	if (ifp->if_softc !=  arg)   /* Not our event */
4426		return;
4427
4428	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4429                return;
4430
4431	index = (vtag >> 5) & 0x7F;
4432	bit = vtag & 0x1F;
4433	igb_shadow_vfta[index] |= (1 << bit);
4434	++adapter->num_vlans;
4435	/* Re-init to load the changes */
4436	igb_init(adapter);
4437}
4438
4439/*
4440 * This routine is run via an vlan
4441 * unconfig EVENT
4442 */
4443static void
4444igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4445{
4446	struct adapter	*adapter = ifp->if_softc;
4447	u32		index, bit;
4448
4449	if (ifp->if_softc !=  arg)
4450		return;
4451
4452	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4453                return;
4454
4455	index = (vtag >> 5) & 0x7F;
4456	bit = vtag & 0x1F;
4457	igb_shadow_vfta[index] &= ~(1 << bit);
4458	--adapter->num_vlans;
4459	/* Re-init to load the changes */
4460	igb_init(adapter);
4461}
4462
4463static void
4464igb_setup_vlan_hw_support(struct adapter *adapter)
4465{
4466	struct e1000_hw *hw = &adapter->hw;
4467	u32             reg;
4468
4469	/*
4470	** We get here thru init_locked, meaning
4471	** a soft reset, this has already cleared
4472	** the VFTA and other state, so if there
4473	** have been no vlan's registered do nothing.
4474	*/
4475	if (adapter->num_vlans == 0)
4476                return;
4477
4478	/*
4479	** A soft reset zero's out the VFTA, so
4480	** we need to repopulate it now.
4481	*/
4482	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4483                if (igb_shadow_vfta[i] != 0)
4484			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4485                            i, igb_shadow_vfta[i]);
4486
4487	reg = E1000_READ_REG(hw, E1000_CTRL);
4488	reg |= E1000_CTRL_VME;
4489	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4490
4491	/* Enable the Filter Table */
4492	reg = E1000_READ_REG(hw, E1000_RCTL);
4493	reg &= ~E1000_RCTL_CFIEN;
4494	reg |= E1000_RCTL_VFE;
4495	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4496
4497	/* Update the frame size */
4498	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4499	    adapter->max_frame_size + VLAN_TAG_SIZE);
4500}
4501
4502static void
4503igb_enable_intr(struct adapter *adapter)
4504{
4505	/* With RSS set up what to auto clear */
4506	if (adapter->msix_mem) {
4507		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4508		    adapter->eims_mask);
4509		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4510		    adapter->eims_mask);
4511		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4512		    adapter->eims_mask);
4513		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4514		    E1000_IMS_LSC);
4515	} else {
4516		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4517		    IMS_ENABLE_MASK);
4518	}
4519	E1000_WRITE_FLUSH(&adapter->hw);
4520
4521	return;
4522}
4523
4524static void
4525igb_disable_intr(struct adapter *adapter)
4526{
4527	if (adapter->msix_mem) {
4528		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4529		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4530	}
4531	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4532	E1000_WRITE_FLUSH(&adapter->hw);
4533	return;
4534}
4535
4536/*
4537 * Bit of a misnomer, what this really means is
4538 * to enable OS management of the system... aka
4539 * to disable special hardware management features
4540 */
4541static void
4542igb_init_manageability(struct adapter *adapter)
4543{
4544	if (adapter->has_manage) {
4545		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4546		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4547
4548		/* disable hardware interception of ARP */
4549		manc &= ~(E1000_MANC_ARP_EN);
4550
4551                /* enable receiving management packets to the host */
4552		manc |= E1000_MANC_EN_MNG2HOST;
4553		manc2h |= 1 << 5;  /* Mng Port 623 */
4554		manc2h |= 1 << 6;  /* Mng Port 664 */
4555		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4556		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4557	}
4558}
4559
4560/*
4561 * Give control back to hardware management
4562 * controller if there is one.
4563 */
4564static void
4565igb_release_manageability(struct adapter *adapter)
4566{
4567	if (adapter->has_manage) {
4568		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4569
4570		/* re-enable hardware interception of ARP */
4571		manc |= E1000_MANC_ARP_EN;
4572		manc &= ~E1000_MANC_EN_MNG2HOST;
4573
4574		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4575	}
4576}
4577
4578/*
4579 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4580 * For ASF and Pass Through versions of f/w this means that
4581 * the driver is loaded.
4582 *
4583 */
4584static void
4585igb_get_hw_control(struct adapter *adapter)
4586{
4587	u32 ctrl_ext;
4588
4589	/* Let firmware know the driver has taken over */
4590	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4591	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4592	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4593}
4594
4595/*
4596 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4597 * For ASF and Pass Through versions of f/w this means that the
4598 * driver is no longer loaded.
4599 *
4600 */
4601static void
4602igb_release_hw_control(struct adapter *adapter)
4603{
4604	u32 ctrl_ext;
4605
4606	/* Let firmware taken over control of h/w */
4607	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4608	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4609	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4610}
4611
4612static int
4613igb_is_valid_ether_addr(uint8_t *addr)
4614{
4615	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4616
4617	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4618		return (FALSE);
4619	}
4620
4621	return (TRUE);
4622}
4623
4624
4625/*
4626 * Enable PCI Wake On Lan capability
4627 */
4628static void
4629igb_enable_wakeup(device_t dev)
4630{
4631	u16     cap, status;
4632	u8      id;
4633
4634	/* First find the capabilities pointer*/
4635	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4636	/* Read the PM Capabilities */
4637	id = pci_read_config(dev, cap, 1);
4638	if (id != PCIY_PMG)     /* Something wrong */
4639		return;
4640	/* OK, we have the power capabilities, so
4641	   now get the status register */
4642	cap += PCIR_POWER_STATUS;
4643	status = pci_read_config(dev, cap, 2);
4644	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4645	pci_write_config(dev, cap, status, 2);
4646	return;
4647}
4648
4649static void
4650igb_led_func(void *arg, int onoff)
4651{
4652	struct adapter	*adapter = arg;
4653
4654	IGB_CORE_LOCK(adapter);
4655	if (onoff) {
4656		e1000_setup_led(&adapter->hw);
4657		e1000_led_on(&adapter->hw);
4658	} else {
4659		e1000_led_off(&adapter->hw);
4660		e1000_cleanup_led(&adapter->hw);
4661	}
4662	IGB_CORE_UNLOCK(adapter);
4663}
4664
4665/**********************************************************************
4666 *
4667 *  Update the board statistics counters.
4668 *
4669 **********************************************************************/
4670static void
4671igb_update_stats_counters(struct adapter *adapter)
4672{
4673	struct ifnet   *ifp;
4674
4675	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4676	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4677		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4678		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4679	}
4680	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4681	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4682	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4683	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4684
4685	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4686	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4687	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4688	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4689	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4690	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4691	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4692	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4693	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4694	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4695	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4696	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4697	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4698	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4699	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4700	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4701	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4702	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4703	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4704	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4705
4706	/* For the 64-bit byte counters the low dword must be read first. */
4707	/* Both registers clear on the read of the high dword */
4708
4709	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4710	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4711
4712	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4713	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4714	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4715	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4716	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4717
4718	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4719	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4720
4721	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4722	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4723	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4724	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4725	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4726	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4727	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4728	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4729	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4730	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4731
4732	adapter->stats.algnerrc +=
4733		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4734	adapter->stats.rxerrc +=
4735		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4736	adapter->stats.tncrs +=
4737		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4738	adapter->stats.cexterr +=
4739		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4740	adapter->stats.tsctc +=
4741		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4742	adapter->stats.tsctfc +=
4743		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4744	ifp = adapter->ifp;
4745
4746	ifp->if_collisions = adapter->stats.colc;
4747
4748	/* Rx Errors */
4749	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4750	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4751	    adapter->stats.ruc + adapter->stats.roc +
4752	    adapter->stats.mpc + adapter->stats.cexterr;
4753
4754	/* Tx Errors */
4755	ifp->if_oerrors = adapter->stats.ecol +
4756	    adapter->stats.latecol + adapter->watchdog_events;
4757}
4758
4759
4760/**********************************************************************
4761 *
4762 *  This routine is called only when igb_display_debug_stats is enabled.
4763 *  This routine provides a way to take a look at important statistics
4764 *  maintained by the driver and hardware.
4765 *
4766 **********************************************************************/
4767static void
4768igb_print_debug_info(struct adapter *adapter)
4769{
4770	device_t dev = adapter->dev;
4771	struct igb_queue *que = adapter->queues;
4772	struct rx_ring *rxr = adapter->rx_rings;
4773	struct tx_ring *txr = adapter->tx_rings;
4774	uint8_t *hw_addr = adapter->hw.hw_addr;
4775
4776	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4777	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4778	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4779	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4780
4781#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4782	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4783	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4784	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4785#endif
4786
4787	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4788	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4789	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4790	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4791	    adapter->hw.fc.high_water,
4792	    adapter->hw.fc.low_water);
4793
4794	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4795		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4796		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4797		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4798		device_printf(dev, "rdh = %d, rdt = %d\n",
4799		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4800		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4801		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4802		    txr->me, (long long)txr->no_desc_avail);
4803		device_printf(dev, "TX(%d) Packets sent = %lld\n",
4804		    txr->me, (long long)txr->tx_packets);
4805		device_printf(dev, "RX(%d) Packets received = %lld  ",
4806		    rxr->me, (long long)rxr->rx_packets);
4807	}
4808
4809	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4810		struct lro_ctrl *lro = &rxr->lro;
4811		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4812		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4813		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4814		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4815		    (long long)rxr->rx_packets);
4816		device_printf(dev, " Split Packets = %lld ",
4817		    (long long)rxr->rx_split_packets);
4818		device_printf(dev, " Byte count = %lld\n",
4819		    (long long)rxr->rx_bytes);
4820		device_printf(dev,"RX(%d) LRO Queued= %d  ",
4821		    i, lro->lro_queued);
4822		device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4823	}
4824
4825	for (int i = 0; i < adapter->num_queues; i++, que++)
4826		device_printf(dev,"QUE(%d) IRQs = %llx\n",
4827		    i, (long long)que->irqs);
4828
4829	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4830	device_printf(dev, "Mbuf defrag failed = %ld\n",
4831	    adapter->mbuf_defrag_failed);
4832	device_printf(dev, "Std mbuf header failed = %ld\n",
4833	    adapter->mbuf_header_failed);
4834	device_printf(dev, "Std mbuf packet failed = %ld\n",
4835	    adapter->mbuf_packet_failed);
4836	device_printf(dev, "Driver dropped packets = %ld\n",
4837	    adapter->dropped_pkts);
4838	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4839		adapter->no_tx_dma_setup);
4840}
4841
4842static void
4843igb_print_hw_stats(struct adapter *adapter)
4844{
4845	device_t dev = adapter->dev;
4846
4847	device_printf(dev, "Excessive collisions = %lld\n",
4848	    (long long)adapter->stats.ecol);
4849#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4850	device_printf(dev, "Symbol errors = %lld\n",
4851	    (long long)adapter->stats.symerrs);
4852#endif
4853	device_printf(dev, "Sequence errors = %lld\n",
4854	    (long long)adapter->stats.sec);
4855	device_printf(dev, "Defer count = %lld\n",
4856	    (long long)adapter->stats.dc);
4857	device_printf(dev, "Missed Packets = %lld\n",
4858	    (long long)adapter->stats.mpc);
4859	device_printf(dev, "Receive No Buffers = %lld\n",
4860	    (long long)adapter->stats.rnbc);
4861	/* RLEC is inaccurate on some hardware, calculate our own. */
4862	device_printf(dev, "Receive Length Errors = %lld\n",
4863	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4864	device_printf(dev, "Receive errors = %lld\n",
4865	    (long long)adapter->stats.rxerrc);
4866	device_printf(dev, "Crc errors = %lld\n",
4867	    (long long)adapter->stats.crcerrs);
4868	device_printf(dev, "Alignment errors = %lld\n",
4869	    (long long)adapter->stats.algnerrc);
4870	/* On 82575 these are collision counts */
4871	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4872	    (long long)adapter->stats.cexterr);
4873	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4874	device_printf(dev, "watchdog timeouts = %ld\n",
4875	    adapter->watchdog_events);
4876	device_printf(dev, "XON Rcvd = %lld\n",
4877	    (long long)adapter->stats.xonrxc);
4878	device_printf(dev, "XON Xmtd = %lld\n",
4879	    (long long)adapter->stats.xontxc);
4880	device_printf(dev, "XOFF Rcvd = %lld\n",
4881	    (long long)adapter->stats.xoffrxc);
4882	device_printf(dev, "XOFF Xmtd = %lld\n",
4883	    (long long)adapter->stats.xofftxc);
4884	device_printf(dev, "Good Packets Rcvd = %lld\n",
4885	    (long long)adapter->stats.gprc);
4886	device_printf(dev, "Good Packets Xmtd = %lld\n",
4887	    (long long)adapter->stats.gptc);
4888	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4889	    (long long)adapter->stats.tsctc);
4890	device_printf(dev, "TSO Contexts Failed = %lld\n",
4891	    (long long)adapter->stats.tsctfc);
4892}
4893
4894/**********************************************************************
4895 *
4896 *  This routine provides a way to dump out the adapter eeprom,
4897 *  often a useful debug/service tool. This only dumps the first
4898 *  32 words, stuff that matters is in that extent.
4899 *
4900 **********************************************************************/
4901static void
4902igb_print_nvm_info(struct adapter *adapter)
4903{
4904	u16	eeprom_data;
4905	int	i, j, row = 0;
4906
4907	/* Its a bit crude, but it gets the job done */
4908	printf("\nInterface EEPROM Dump:\n");
4909	printf("Offset\n0x0000  ");
4910	for (i = 0, j = 0; i < 32; i++, j++) {
4911		if (j == 8) { /* Make the offset block */
4912			j = 0; ++row;
4913			printf("\n0x00%x0  ",row);
4914		}
4915		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4916		printf("%04x ", eeprom_data);
4917	}
4918	printf("\n");
4919}
4920
4921static int
4922igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4923{
4924	struct adapter *adapter;
4925	int error;
4926	int result;
4927
4928	result = -1;
4929	error = sysctl_handle_int(oidp, &result, 0, req);
4930
4931	if (error || !req->newptr)
4932		return (error);
4933
4934	if (result == 1) {
4935		adapter = (struct adapter *)arg1;
4936		igb_print_debug_info(adapter);
4937	}
4938	/*
4939	 * This value will cause a hex dump of the
4940	 * first 32 16-bit words of the EEPROM to
4941	 * the screen.
4942	 */
4943	if (result == 2) {
4944		adapter = (struct adapter *)arg1;
4945		igb_print_nvm_info(adapter);
4946        }
4947
4948	return (error);
4949}
4950
4951
4952static int
4953igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4954{
4955	struct adapter *adapter;
4956	int error;
4957	int result;
4958
4959	result = -1;
4960	error = sysctl_handle_int(oidp, &result, 0, req);
4961
4962	if (error || !req->newptr)
4963		return (error);
4964
4965	if (result == 1) {
4966		adapter = (struct adapter *)arg1;
4967		igb_print_hw_stats(adapter);
4968	}
4969
4970	return (error);
4971}
4972
4973static void
4974igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4975	const char *description, int *limit, int value)
4976{
4977	*limit = value;
4978	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4979	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4980	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4981}
4982