if_igb.c revision 206001
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 206001 2010-03-31 20:43:24Z marius $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 1.9.3";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	/* required last entry */
138	{ 0, 0, 0, 0, 0}
139};
140
141/*********************************************************************
142 *  Table of branding strings for all supported NICs.
143 *********************************************************************/
144
145static char *igb_strings[] = {
146	"Intel(R) PRO/1000 Network Connection"
147};
148
149/*********************************************************************
150 *  Function prototypes
151 *********************************************************************/
152static int	igb_probe(device_t);
153static int	igb_attach(device_t);
154static int	igb_detach(device_t);
155static int	igb_shutdown(device_t);
156static int	igb_suspend(device_t);
157static int	igb_resume(device_t);
158static void	igb_start(struct ifnet *);
159static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160#if __FreeBSD_version >= 800000
161static int	igb_mq_start(struct ifnet *, struct mbuf *);
162static int	igb_mq_start_locked(struct ifnet *,
163		    struct tx_ring *, struct mbuf *);
164static void	igb_qflush(struct ifnet *);
165#endif
166static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
167static void	igb_init(void *);
168static void	igb_init_locked(struct adapter *);
169static void	igb_stop(void *);
170static void	igb_media_status(struct ifnet *, struct ifmediareq *);
171static int	igb_media_change(struct ifnet *);
172static void	igb_identify_hardware(struct adapter *);
173static int	igb_allocate_pci_resources(struct adapter *);
174static int	igb_allocate_msix(struct adapter *);
175static int	igb_allocate_legacy(struct adapter *);
176static int	igb_setup_msix(struct adapter *);
177static void	igb_free_pci_resources(struct adapter *);
178static void	igb_local_timer(void *);
179static void	igb_reset(struct adapter *);
180static void	igb_setup_interface(device_t, struct adapter *);
181static int	igb_allocate_queues(struct adapter *);
182static void	igb_configure_queues(struct adapter *);
183
184static int	igb_allocate_transmit_buffers(struct tx_ring *);
185static void	igb_setup_transmit_structures(struct adapter *);
186static void	igb_setup_transmit_ring(struct tx_ring *);
187static void	igb_initialize_transmit_units(struct adapter *);
188static void	igb_free_transmit_structures(struct adapter *);
189static void	igb_free_transmit_buffers(struct tx_ring *);
190
191static int	igb_allocate_receive_buffers(struct rx_ring *);
192static int	igb_setup_receive_structures(struct adapter *);
193static int	igb_setup_receive_ring(struct rx_ring *);
194static void	igb_initialize_receive_units(struct adapter *);
195static void	igb_free_receive_structures(struct adapter *);
196static void	igb_free_receive_buffers(struct rx_ring *);
197static void	igb_free_receive_ring(struct rx_ring *);
198
199static void	igb_enable_intr(struct adapter *);
200static void	igb_disable_intr(struct adapter *);
201static void	igb_update_stats_counters(struct adapter *);
202static bool	igb_txeof(struct tx_ring *);
203
204static __inline	void igb_rx_discard(struct rx_ring *, int);
205static __inline void igb_rx_input(struct rx_ring *,
206		    struct ifnet *, struct mbuf *, u32);
207
208static bool	igb_rxeof(struct igb_queue *, int);
209static void	igb_rx_checksum(u32, struct mbuf *, u32);
210static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212static void	igb_set_promisc(struct adapter *);
213static void	igb_disable_promisc(struct adapter *);
214static void	igb_set_multi(struct adapter *);
215static void	igb_print_hw_stats(struct adapter *);
216static void	igb_update_link_status(struct adapter *);
217static void	igb_refresh_mbufs(struct rx_ring *, int);
218
219static void	igb_register_vlan(void *, struct ifnet *, u16);
220static void	igb_unregister_vlan(void *, struct ifnet *, u16);
221static void	igb_setup_vlan_hw_support(struct adapter *);
222
223static int	igb_xmit(struct tx_ring *, struct mbuf **);
224static int	igb_dma_malloc(struct adapter *, bus_size_t,
225		    struct igb_dma_alloc *, int);
226static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227static void	igb_print_debug_info(struct adapter *);
228static void	igb_print_nvm_info(struct adapter *);
229static int 	igb_is_valid_ether_addr(u8 *);
230static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232/* Management and WOL Support */
233static void	igb_init_manageability(struct adapter *);
234static void	igb_release_manageability(struct adapter *);
235static void     igb_get_hw_control(struct adapter *);
236static void     igb_release_hw_control(struct adapter *);
237static void     igb_enable_wakeup(device_t);
238static void     igb_led_func(void *, int);
239
240static int	igb_irq_fast(void *);
241static void	igb_add_rx_process_limit(struct adapter *, const char *,
242		    const char *, int *, int);
243static void	igb_handle_rxtx(void *context, int pending);
244static void	igb_handle_que(void *context, int pending);
245static void	igb_handle_link(void *context, int pending);
246
247/* These are MSIX only irq handlers */
248static void	igb_msix_que(void *);
249static void	igb_msix_link(void *);
250
251#ifdef DEVICE_POLLING
252static poll_handler_t igb_poll;
253#endif /* POLLING */
254
255/*********************************************************************
256 *  FreeBSD Device Interface Entry Points
257 *********************************************************************/
258
259static device_method_t igb_methods[] = {
260	/* Device interface */
261	DEVMETHOD(device_probe, igb_probe),
262	DEVMETHOD(device_attach, igb_attach),
263	DEVMETHOD(device_detach, igb_detach),
264	DEVMETHOD(device_shutdown, igb_shutdown),
265	DEVMETHOD(device_suspend, igb_suspend),
266	DEVMETHOD(device_resume, igb_resume),
267	{0, 0}
268};
269
270static driver_t igb_driver = {
271	"igb", igb_methods, sizeof(struct adapter),
272};
273
274static devclass_t igb_devclass;
275DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276MODULE_DEPEND(igb, pci, 1, 1, 1);
277MODULE_DEPEND(igb, ether, 1, 1, 1);
278
279/*********************************************************************
280 *  Tunable default values.
281 *********************************************************************/
282
283/* Descriptor defaults */
284static int igb_rxd = IGB_DEFAULT_RXD;
285static int igb_txd = IGB_DEFAULT_TXD;
286TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287TUNABLE_INT("hw.igb.txd", &igb_txd);
288
289/*
290** AIM: Adaptive Interrupt Moderation
291** which means that the interrupt rate
292** is varied over time based on the
293** traffic for that interrupt vector
294*/
295static int igb_enable_aim = TRUE;
296TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297
298/*
299 * MSIX should be the default for best performance,
300 * but this allows it to be forced off for testing.
301 */
302static int igb_enable_msix = 1;
303TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
304
305/*
306 * Header split has seemed to be beneficial in
307 * many circumstances tested, however there have
308 * been some stability issues, so the default is
309 * off.
310 */
311static bool igb_header_split = FALSE;
312TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
313
314/*
315** This will autoconfigure based on
316** the number of CPUs if left at 0.
317*/
318static int igb_num_queues = 0;
319TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
320
321/* How many packets rxeof tries to clean at a time */
322static int igb_rx_process_limit = 100;
323TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
324
325/* Flow control setting - default to FULL */
326static int igb_fc_setting = e1000_fc_full;
327TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
328
329/*
330** Shadow VFTA table, this is needed because
331** the real filter table gets cleared during
332** a soft reset and the driver needs to be able
333** to repopulate it.
334*/
335static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
336
337
338/*********************************************************************
339 *  Device identification routine
340 *
341 *  igb_probe determines if the driver should be loaded on
342 *  adapter based on PCI vendor/device id of the adapter.
343 *
344 *  return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
346
347static int
348igb_probe(device_t dev)
349{
350	char		adapter_name[60];
351	uint16_t	pci_vendor_id = 0;
352	uint16_t	pci_device_id = 0;
353	uint16_t	pci_subvendor_id = 0;
354	uint16_t	pci_subdevice_id = 0;
355	igb_vendor_info_t *ent;
356
357	INIT_DEBUGOUT("igb_probe: begin");
358
359	pci_vendor_id = pci_get_vendor(dev);
360	if (pci_vendor_id != IGB_VENDOR_ID)
361		return (ENXIO);
362
363	pci_device_id = pci_get_device(dev);
364	pci_subvendor_id = pci_get_subvendor(dev);
365	pci_subdevice_id = pci_get_subdevice(dev);
366
367	ent = igb_vendor_info_array;
368	while (ent->vendor_id != 0) {
369		if ((pci_vendor_id == ent->vendor_id) &&
370		    (pci_device_id == ent->device_id) &&
371
372		    ((pci_subvendor_id == ent->subvendor_id) ||
373		    (ent->subvendor_id == PCI_ANY_ID)) &&
374
375		    ((pci_subdevice_id == ent->subdevice_id) ||
376		    (ent->subdevice_id == PCI_ANY_ID))) {
377			sprintf(adapter_name, "%s %s",
378				igb_strings[ent->index],
379				igb_driver_version);
380			device_set_desc_copy(dev, adapter_name);
381			return (BUS_PROBE_DEFAULT);
382		}
383		ent++;
384	}
385
386	return (ENXIO);
387}
388
389/*********************************************************************
390 *  Device initialization routine
391 *
392 *  The attach entry point is called when the driver is being loaded.
393 *  This routine identifies the type of hardware, allocates all resources
394 *  and initializes the hardware.
395 *
396 *  return 0 on success, positive on failure
397 *********************************************************************/
398
399static int
400igb_attach(device_t dev)
401{
402	struct adapter	*adapter;
403	int		error = 0;
404	u16		eeprom_data;
405
406	INIT_DEBUGOUT("igb_attach: begin");
407
408	adapter = device_get_softc(dev);
409	adapter->dev = adapter->osdep.dev = dev;
410	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412	/* SYSCTL stuff */
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    igb_sysctl_debug_info, "I", "Debug Information");
417
418	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421	    igb_sysctl_stats, "I", "Statistics");
422
423	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426	    &igb_fc_setting, 0, "Flow Control");
427
428	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431	    &igb_enable_aim, 1, "Interrupt Moderation");
432
433	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435	/* Determine hardware and mac info */
436	igb_identify_hardware(adapter);
437
438	/* Setup PCI resources */
439	if (igb_allocate_pci_resources(adapter)) {
440		device_printf(dev, "Allocation of PCI resources failed\n");
441		error = ENXIO;
442		goto err_pci;
443	}
444
445	/* Do Shared Code initialization */
446	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447		device_printf(dev, "Setup of Shared code failed\n");
448		error = ENXIO;
449		goto err_pci;
450	}
451
452	e1000_get_bus_info(&adapter->hw);
453
454	/* Sysctls for limiting the amount of work done in the taskqueue */
455	igb_add_rx_process_limit(adapter, "rx_processing_limit",
456	    "max number of rx packets to process", &adapter->rx_process_limit,
457	    igb_rx_process_limit);
458
459	/*
460	 * Validate number of transmit and receive descriptors. It
461	 * must not exceed hardware maximum, and must be multiple
462	 * of E1000_DBA_ALIGN.
463	 */
464	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467		    IGB_DEFAULT_TXD, igb_txd);
468		adapter->num_tx_desc = IGB_DEFAULT_TXD;
469	} else
470		adapter->num_tx_desc = igb_txd;
471	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474		    IGB_DEFAULT_RXD, igb_rxd);
475		adapter->num_rx_desc = IGB_DEFAULT_RXD;
476	} else
477		adapter->num_rx_desc = igb_rxd;
478
479	adapter->hw.mac.autoneg = DO_AUTO_NEG;
480	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483	/* Copper options */
484	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485		adapter->hw.phy.mdix = AUTO_ALL_MODES;
486		adapter->hw.phy.disable_polarity_correction = FALSE;
487		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488	}
489
490	/*
491	 * Set the frame limits assuming
492	 * standard ethernet sized frames.
493	 */
494	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497	/*
498	** Allocate and Setup Queues
499	*/
500	if (igb_allocate_queues(adapter)) {
501		error = ENOMEM;
502		goto err_pci;
503	}
504
505	/*
506	** Start from a known state, this is
507	** important in reading the nvm and
508	** mac from that.
509	*/
510	e1000_reset_hw(&adapter->hw);
511
512	/* Make sure we have a good EEPROM before we read from it */
513	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514		/*
515		** Some PCI-E parts fail the first check due to
516		** the link being in sleep state, call it again,
517		** if it fails a second time its a real issue.
518		*/
519		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520			device_printf(dev,
521			    "The EEPROM Checksum Is Not Valid\n");
522			error = EIO;
523			goto err_late;
524		}
525	}
526
527	/*
528	** Copy the permanent MAC address out of the EEPROM
529	*/
530	if (e1000_read_mac_addr(&adapter->hw) < 0) {
531		device_printf(dev, "EEPROM read error while reading MAC"
532		    " address\n");
533		error = EIO;
534		goto err_late;
535	}
536	/* Check its sanity */
537	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538		device_printf(dev, "Invalid MAC address\n");
539		error = EIO;
540		goto err_late;
541	}
542
543	/*
544	** Configure Interrupts
545	*/
546	if ((adapter->msix > 1) && (igb_enable_msix))
547		error = igb_allocate_msix(adapter);
548	else /* MSI or Legacy */
549		error = igb_allocate_legacy(adapter);
550	if (error)
551		goto err_late;
552
553	/* Setup OS specific network interface */
554	igb_setup_interface(dev, adapter);
555
556	/* Now get a good starting state */
557	igb_reset(adapter);
558
559	/* Initialize statistics */
560	igb_update_stats_counters(adapter);
561
562	adapter->hw.mac.get_link_status = 1;
563	igb_update_link_status(adapter);
564
565	/* Indicate SOL/IDER usage */
566	if (e1000_check_reset_block(&adapter->hw))
567		device_printf(dev,
568		    "PHY reset is blocked due to SOL/IDER session.\n");
569
570	/* Determine if we have to control management hardware */
571	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
572
573	/*
574	 * Setup Wake-on-Lan
575	 */
576	/* APME bit in EEPROM is mapped to WUC.APME */
577	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
578	if (eeprom_data)
579		adapter->wol = E1000_WUFC_MAG;
580
581	/* Register for VLAN events */
582	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586
587	/* Tell the stack that the interface is not active */
588	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
589
590	adapter->led_dev = led_create(igb_led_func, adapter,
591	    device_get_nameunit(dev));
592
593	INIT_DEBUGOUT("igb_attach: end");
594
595	return (0);
596
597err_late:
598	igb_free_transmit_structures(adapter);
599	igb_free_receive_structures(adapter);
600	igb_release_hw_control(adapter);
601err_pci:
602	igb_free_pci_resources(adapter);
603	IGB_CORE_LOCK_DESTROY(adapter);
604
605	return (error);
606}
607
608/*********************************************************************
609 *  Device removal routine
610 *
611 *  The detach entry point is called when the driver is being removed.
612 *  This routine stops the adapter and deallocates all the resources
613 *  that were allocated for driver operation.
614 *
615 *  return 0 on success, positive on failure
616 *********************************************************************/
617
618static int
619igb_detach(device_t dev)
620{
621	struct adapter	*adapter = device_get_softc(dev);
622	struct ifnet	*ifp = adapter->ifp;
623
624	INIT_DEBUGOUT("igb_detach: begin");
625
626	/* Make sure VLANS are not using driver */
627	if (adapter->ifp->if_vlantrunk != NULL) {
628		device_printf(dev,"Vlan in use, detach first\n");
629		return (EBUSY);
630	}
631
632	if (adapter->led_dev != NULL)
633		led_destroy(adapter->led_dev);
634
635#ifdef DEVICE_POLLING
636	if (ifp->if_capenable & IFCAP_POLLING)
637		ether_poll_deregister(ifp);
638#endif
639
640	IGB_CORE_LOCK(adapter);
641	adapter->in_detach = 1;
642	igb_stop(adapter);
643	IGB_CORE_UNLOCK(adapter);
644
645	e1000_phy_hw_reset(&adapter->hw);
646
647	/* Give control back to firmware */
648	igb_release_manageability(adapter);
649	igb_release_hw_control(adapter);
650
651	if (adapter->wol) {
652		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654		igb_enable_wakeup(dev);
655	}
656
657	/* Unregister VLAN events */
658	if (adapter->vlan_attach != NULL)
659		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660	if (adapter->vlan_detach != NULL)
661		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
662
663	ether_ifdetach(adapter->ifp);
664
665	callout_drain(&adapter->timer);
666
667	igb_free_pci_resources(adapter);
668	bus_generic_detach(dev);
669	if_free(ifp);
670
671	igb_free_transmit_structures(adapter);
672	igb_free_receive_structures(adapter);
673
674	IGB_CORE_LOCK_DESTROY(adapter);
675
676	return (0);
677}
678
679/*********************************************************************
680 *
681 *  Shutdown entry point
682 *
683 **********************************************************************/
684
685static int
686igb_shutdown(device_t dev)
687{
688	return igb_suspend(dev);
689}
690
691/*
692 * Suspend/resume device methods.
693 */
694static int
695igb_suspend(device_t dev)
696{
697	struct adapter *adapter = device_get_softc(dev);
698
699	IGB_CORE_LOCK(adapter);
700
701	igb_stop(adapter);
702
703        igb_release_manageability(adapter);
704	igb_release_hw_control(adapter);
705
706        if (adapter->wol) {
707                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709                igb_enable_wakeup(dev);
710        }
711
712	IGB_CORE_UNLOCK(adapter);
713
714	return bus_generic_suspend(dev);
715}
716
717static int
718igb_resume(device_t dev)
719{
720	struct adapter *adapter = device_get_softc(dev);
721	struct ifnet *ifp = adapter->ifp;
722
723	IGB_CORE_LOCK(adapter);
724	igb_init_locked(adapter);
725	igb_init_manageability(adapter);
726
727	if ((ifp->if_flags & IFF_UP) &&
728	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
729		igb_start(ifp);
730
731	IGB_CORE_UNLOCK(adapter);
732
733	return bus_generic_resume(dev);
734}
735
736
737/*********************************************************************
738 *  Transmit entry point
739 *
740 *  igb_start is called by the stack to initiate a transmit.
741 *  The driver will remain in this routine as long as there are
742 *  packets to transmit and transmit resources are available.
743 *  In case resources are not available stack is notified and
744 *  the packet is requeued.
745 **********************************************************************/
746
747static void
748igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749{
750	struct adapter	*adapter = ifp->if_softc;
751	struct mbuf	*m_head;
752
753	IGB_TX_LOCK_ASSERT(txr);
754
755	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
756	    IFF_DRV_RUNNING)
757		return;
758	if (!adapter->link_active)
759		return;
760
761	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
762
763		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
764		if (m_head == NULL)
765			break;
766		/*
767		 *  Encapsulation can modify our pointer, and or make it
768		 *  NULL on failure.  In that event, we can't requeue.
769		 */
770		if (igb_xmit(txr, &m_head)) {
771			if (m_head == NULL)
772				break;
773			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
774			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775			break;
776		}
777
778		/* Send a copy of the frame to the BPF listener */
779		ETHER_BPF_MTAP(ifp, m_head);
780
781		/* Set watchdog on */
782		txr->watchdog_check = TRUE;
783	}
784}
785
786/*
787 * Legacy TX driver routine, called from the
788 * stack, always uses tx[0], and spins for it.
789 * Should not be used with multiqueue tx
790 */
791static void
792igb_start(struct ifnet *ifp)
793{
794	struct adapter	*adapter = ifp->if_softc;
795	struct tx_ring	*txr = adapter->tx_rings;
796
797	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
798		IGB_TX_LOCK(txr);
799		igb_start_locked(txr, ifp);
800		IGB_TX_UNLOCK(txr);
801	}
802	return;
803}
804
805#if __FreeBSD_version >= 800000
806/*
807** Multiqueue Transmit driver
808**
809*/
810static int
811igb_mq_start(struct ifnet *ifp, struct mbuf *m)
812{
813	struct adapter	*adapter = ifp->if_softc;
814	struct tx_ring	*txr;
815	int 		i = 0, err = 0;
816
817	/* Which queue to use */
818	if ((m->m_flags & M_FLOWID) != 0)
819		i = m->m_pkthdr.flowid % adapter->num_queues;
820	else
821		i = curcpu % adapter->num_queues;
822
823	txr = &adapter->tx_rings[i];
824
825	if (IGB_TX_TRYLOCK(txr)) {
826		err = igb_mq_start_locked(ifp, txr, m);
827		IGB_TX_UNLOCK(txr);
828	} else
829		err = drbr_enqueue(ifp, txr->br, m);
830
831	return (err);
832}
833
834static int
835igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
836{
837	struct adapter  *adapter = txr->adapter;
838        struct mbuf     *next;
839        int             err = 0, enq;
840
841	IGB_TX_LOCK_ASSERT(txr);
842
843	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
844	    IFF_DRV_RUNNING || adapter->link_active == 0) {
845		if (m != NULL)
846			err = drbr_enqueue(ifp, txr->br, m);
847		return (err);
848	}
849
850	enq = 0;
851	if (m == NULL) {
852		next = drbr_dequeue(ifp, txr->br);
853	} else if (drbr_needs_enqueue(ifp, txr->br)) {
854		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
855			return (err);
856		next = drbr_dequeue(ifp, txr->br);
857	} else
858		next = m;
859	/* Process the queue */
860	while (next != NULL) {
861		if ((err = igb_xmit(txr, &next)) != 0) {
862			if (next != NULL)
863				err = drbr_enqueue(ifp, txr->br, next);
864			break;
865		}
866		enq++;
867		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
868		ETHER_BPF_MTAP(ifp, next);
869		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
870			break;
871		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
872			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
873			break;
874		}
875		next = drbr_dequeue(ifp, txr->br);
876	}
877	if (enq > 0) {
878		/* Set the watchdog */
879		txr->watchdog_check = TRUE;
880	}
881	return (err);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888igb_qflush(struct ifnet *ifp)
889{
890	struct adapter	*adapter = ifp->if_softc;
891	struct tx_ring	*txr = adapter->tx_rings;
892	struct mbuf	*m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		IGB_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		IGB_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902#endif /* __FreeBSD_version >= 800000 */
903
904/*********************************************************************
905 *  Ioctl entry point
906 *
907 *  igb_ioctl is called when the user wants to configure the
908 *  interface.
909 *
910 *  return 0 on success, positive on failure
911 **********************************************************************/
912
913static int
914igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
915{
916	struct adapter	*adapter = ifp->if_softc;
917	struct ifreq *ifr = (struct ifreq *)data;
918#ifdef INET
919	struct ifaddr *ifa = (struct ifaddr *)data;
920#endif
921	int error = 0;
922
923	if (adapter->in_detach)
924		return (error);
925
926	switch (command) {
927	case SIOCSIFADDR:
928#ifdef INET
929		if (ifa->ifa_addr->sa_family == AF_INET) {
930			/*
931			 * XXX
932			 * Since resetting hardware takes a very long time
933			 * and results in link renegotiation we only
934			 * initialize the hardware only when it is absolutely
935			 * required.
936			 */
937			ifp->if_flags |= IFF_UP;
938			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
939				IGB_CORE_LOCK(adapter);
940				igb_init_locked(adapter);
941				IGB_CORE_UNLOCK(adapter);
942			}
943			if (!(ifp->if_flags & IFF_NOARP))
944				arp_ifinit(ifp, ifa);
945		} else
946#endif
947			error = ether_ioctl(ifp, command, data);
948		break;
949	case SIOCSIFMTU:
950	    {
951		int max_frame_size;
952
953		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
954
955		IGB_CORE_LOCK(adapter);
956		max_frame_size = 9234;
957		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
958		    ETHER_CRC_LEN) {
959			IGB_CORE_UNLOCK(adapter);
960			error = EINVAL;
961			break;
962		}
963
964		ifp->if_mtu = ifr->ifr_mtu;
965		adapter->max_frame_size =
966		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
967		igb_init_locked(adapter);
968		IGB_CORE_UNLOCK(adapter);
969		break;
970	    }
971	case SIOCSIFFLAGS:
972		IOCTL_DEBUGOUT("ioctl rcv'd:\
973		    SIOCSIFFLAGS (Set Interface Flags)");
974		IGB_CORE_LOCK(adapter);
975		if (ifp->if_flags & IFF_UP) {
976			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
977				if ((ifp->if_flags ^ adapter->if_flags) &
978				    (IFF_PROMISC | IFF_ALLMULTI)) {
979					igb_disable_promisc(adapter);
980					igb_set_promisc(adapter);
981				}
982			} else
983				igb_init_locked(adapter);
984		} else
985			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
986				igb_stop(adapter);
987		adapter->if_flags = ifp->if_flags;
988		IGB_CORE_UNLOCK(adapter);
989		break;
990	case SIOCADDMULTI:
991	case SIOCDELMULTI:
992		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
993		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
994			IGB_CORE_LOCK(adapter);
995			igb_disable_intr(adapter);
996			igb_set_multi(adapter);
997#ifdef DEVICE_POLLING
998			if (!(ifp->if_capenable & IFCAP_POLLING))
999#endif
1000				igb_enable_intr(adapter);
1001			IGB_CORE_UNLOCK(adapter);
1002		}
1003		break;
1004	case SIOCSIFMEDIA:
1005		/* Check SOL/IDER usage */
1006		IGB_CORE_LOCK(adapter);
1007		if (e1000_check_reset_block(&adapter->hw)) {
1008			IGB_CORE_UNLOCK(adapter);
1009			device_printf(adapter->dev, "Media change is"
1010			    " blocked due to SOL/IDER session.\n");
1011			break;
1012		}
1013		IGB_CORE_UNLOCK(adapter);
1014	case SIOCGIFMEDIA:
1015		IOCTL_DEBUGOUT("ioctl rcv'd: \
1016		    SIOCxIFMEDIA (Get/Set Interface Media)");
1017		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1018		break;
1019	case SIOCSIFCAP:
1020	    {
1021		int mask, reinit;
1022
1023		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1024		reinit = 0;
1025		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1026#ifdef DEVICE_POLLING
1027		if (mask & IFCAP_POLLING) {
1028			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1029				error = ether_poll_register(igb_poll, ifp);
1030				if (error)
1031					return (error);
1032				IGB_CORE_LOCK(adapter);
1033				igb_disable_intr(adapter);
1034				ifp->if_capenable |= IFCAP_POLLING;
1035				IGB_CORE_UNLOCK(adapter);
1036			} else {
1037				error = ether_poll_deregister(ifp);
1038				/* Enable interrupt even in error case */
1039				IGB_CORE_LOCK(adapter);
1040				igb_enable_intr(adapter);
1041				ifp->if_capenable &= ~IFCAP_POLLING;
1042				IGB_CORE_UNLOCK(adapter);
1043			}
1044		}
1045#endif
1046		if (mask & IFCAP_HWCSUM) {
1047			ifp->if_capenable ^= IFCAP_HWCSUM;
1048			reinit = 1;
1049		}
1050		if (mask & IFCAP_TSO4) {
1051			ifp->if_capenable ^= IFCAP_TSO4;
1052			reinit = 1;
1053		}
1054		if (mask & IFCAP_VLAN_HWTAGGING) {
1055			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1056			reinit = 1;
1057		}
1058		if (mask & IFCAP_LRO) {
1059			ifp->if_capenable ^= IFCAP_LRO;
1060			reinit = 1;
1061		}
1062		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1063			igb_init(adapter);
1064		VLAN_CAPABILITIES(ifp);
1065		break;
1066	    }
1067
1068	default:
1069		error = ether_ioctl(ifp, command, data);
1070		break;
1071	}
1072
1073	return (error);
1074}
1075
1076
1077/*********************************************************************
1078 *  Init entry point
1079 *
1080 *  This routine is used in two ways. It is used by the stack as
1081 *  init entry point in network interface structure. It is also used
1082 *  by the driver as a hw/sw initialization routine to get to a
1083 *  consistent state.
1084 *
1085 *  return 0 on success, positive on failure
1086 **********************************************************************/
1087
1088static void
1089igb_init_locked(struct adapter *adapter)
1090{
1091	struct ifnet	*ifp = adapter->ifp;
1092	device_t	dev = adapter->dev;
1093
1094	INIT_DEBUGOUT("igb_init: begin");
1095
1096	IGB_CORE_LOCK_ASSERT(adapter);
1097
1098	igb_disable_intr(adapter);
1099	callout_stop(&adapter->timer);
1100
1101	/* Get the latest mac address, User can use a LAA */
1102        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1103              ETHER_ADDR_LEN);
1104
1105	/* Put the address into the Receive Address Array */
1106	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1107
1108	igb_reset(adapter);
1109	igb_update_link_status(adapter);
1110
1111	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1112
1113	/* Set hardware offload abilities */
1114	ifp->if_hwassist = 0;
1115	if (ifp->if_capenable & IFCAP_TXCSUM) {
1116		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1117#if __FreeBSD_version >= 800000
1118		if (adapter->hw.mac.type == e1000_82576)
1119			ifp->if_hwassist |= CSUM_SCTP;
1120#endif
1121	}
1122
1123	if (ifp->if_capenable & IFCAP_TSO4)
1124		ifp->if_hwassist |= CSUM_TSO;
1125
1126	/* Configure for OS presence */
1127	igb_init_manageability(adapter);
1128
1129	/* Prepare transmit descriptors and buffers */
1130	igb_setup_transmit_structures(adapter);
1131	igb_initialize_transmit_units(adapter);
1132
1133	/* Setup Multicast table */
1134	igb_set_multi(adapter);
1135
1136	/*
1137	** Figure out the desired mbuf pool
1138	** for doing jumbo/packetsplit
1139	*/
1140	if (ifp->if_mtu > ETHERMTU)
1141		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1142	else
1143		adapter->rx_mbuf_sz = MCLBYTES;
1144
1145	/* Prepare receive descriptors and buffers */
1146	if (igb_setup_receive_structures(adapter)) {
1147		device_printf(dev, "Could not setup receive structures\n");
1148		return;
1149	}
1150	igb_initialize_receive_units(adapter);
1151
1152	/* Don't lose promiscuous settings */
1153	igb_set_promisc(adapter);
1154
1155	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1156	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1157
1158	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1159	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1160
1161	if (adapter->msix > 1) /* Set up queue routing */
1162		igb_configure_queues(adapter);
1163
1164	/* Set up VLAN tag offload and filter */
1165	igb_setup_vlan_hw_support(adapter);
1166
1167	/* this clears any pending interrupts */
1168	E1000_READ_REG(&adapter->hw, E1000_ICR);
1169#ifdef DEVICE_POLLING
1170	/*
1171	 * Only enable interrupts if we are not polling, make sure
1172	 * they are off otherwise.
1173	 */
1174	if (ifp->if_capenable & IFCAP_POLLING)
1175		igb_disable_intr(adapter);
1176	else
1177#endif /* DEVICE_POLLING */
1178	{
1179	igb_enable_intr(adapter);
1180	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1181	}
1182
1183	/* Don't reset the phy next time init gets called */
1184	adapter->hw.phy.reset_disable = TRUE;
1185}
1186
1187static void
1188igb_init(void *arg)
1189{
1190	struct adapter *adapter = arg;
1191
1192	IGB_CORE_LOCK(adapter);
1193	igb_init_locked(adapter);
1194	IGB_CORE_UNLOCK(adapter);
1195}
1196
1197
1198static void
1199igb_handle_rxtx(void *context, int pending)
1200{
1201	struct igb_queue	*que = context;
1202	struct adapter		*adapter = que->adapter;
1203	struct tx_ring		*txr = adapter->tx_rings;
1204	struct ifnet		*ifp;
1205
1206	ifp = adapter->ifp;
1207
1208	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1209		if (igb_rxeof(que, adapter->rx_process_limit))
1210			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1211		IGB_TX_LOCK(txr);
1212		igb_txeof(txr);
1213
1214#if __FreeBSD_version >= 800000
1215		if (!drbr_empty(ifp, txr->br))
1216			igb_mq_start_locked(ifp, txr, NULL);
1217#else
1218		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1219			igb_start_locked(txr, ifp);
1220#endif
1221		IGB_TX_UNLOCK(txr);
1222	}
1223
1224	igb_enable_intr(adapter);
1225}
1226
1227static void
1228igb_handle_que(void *context, int pending)
1229{
1230	struct igb_queue *que = context;
1231	struct adapter *adapter = que->adapter;
1232	struct tx_ring *txr = que->txr;
1233	struct ifnet	*ifp = adapter->ifp;
1234	u32		loop = IGB_MAX_LOOP;
1235	bool		more;
1236
1237	/* RX first */
1238	do {
1239		more = igb_rxeof(que, -1);
1240	} while (loop-- && more);
1241
1242	if (IGB_TX_TRYLOCK(txr)) {
1243		loop = IGB_MAX_LOOP;
1244		do {
1245			more = igb_txeof(txr);
1246		} while (loop-- && more);
1247#if __FreeBSD_version >= 800000
1248		igb_mq_start_locked(ifp, txr, NULL);
1249#else
1250		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1251			igb_start_locked(txr, ifp);
1252#endif
1253		IGB_TX_UNLOCK(txr);
1254	}
1255
1256	/* Reenable this interrupt */
1257#ifdef DEVICE_POLLING
1258	if (!(ifp->if_capenable & IFCAP_POLLING))
1259#endif
1260	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1261}
1262
1263/* Deal with link in a sleepable context */
1264static void
1265igb_handle_link(void *context, int pending)
1266{
1267	struct adapter *adapter = context;
1268
1269	adapter->hw.mac.get_link_status = 1;
1270	igb_update_link_status(adapter);
1271}
1272
1273/*********************************************************************
1274 *
1275 *  MSI/Legacy Deferred
1276 *  Interrupt Service routine
1277 *
1278 *********************************************************************/
1279static int
1280igb_irq_fast(void *arg)
1281{
1282	struct adapter	*adapter = arg;
1283	uint32_t	reg_icr;
1284
1285
1286	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1287
1288	/* Hot eject?  */
1289	if (reg_icr == 0xffffffff)
1290		return FILTER_STRAY;
1291
1292	/* Definitely not our interrupt.  */
1293	if (reg_icr == 0x0)
1294		return FILTER_STRAY;
1295
1296	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1297		return FILTER_STRAY;
1298
1299	/*
1300	 * Mask interrupts until the taskqueue is finished running.  This is
1301	 * cheap, just assume that it is needed.  This also works around the
1302	 * MSI message reordering errata on certain systems.
1303	 */
1304	igb_disable_intr(adapter);
1305	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1306
1307	/* Link status change */
1308	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1309		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1310
1311	if (reg_icr & E1000_ICR_RXO)
1312		adapter->rx_overruns++;
1313	return FILTER_HANDLED;
1314}
1315
1316#ifdef DEVICE_POLLING
1317/*********************************************************************
1318 *
1319 *  Legacy polling routine
1320 *
1321 *********************************************************************/
1322#if __FreeBSD_version >= 800000
1323#define POLL_RETURN_COUNT(a) (a)
1324static int
1325#else
1326#define POLL_RETURN_COUNT(a)
1327static void
1328#endif
1329igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1330{
1331	struct adapter *adapter = ifp->if_softc;
1332	struct rx_ring	*rxr = adapter->rx_rings;
1333	struct tx_ring	*txr = adapter->tx_rings;
1334	u32		reg_icr, rx_done = 0;
1335	u32		loop = IGB_MAX_LOOP;
1336	bool		more;
1337
1338	IGB_CORE_LOCK(adapter);
1339	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1340		IGB_CORE_UNLOCK(adapter);
1341		return POLL_RETURN_COUNT(rx_done);
1342	}
1343
1344	if (cmd == POLL_AND_CHECK_STATUS) {
1345		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1346		/* Link status change */
1347		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1348			taskqueue_enqueue(adapter->tq, &adapter->link_task);
1349
1350		if (reg_icr & E1000_ICR_RXO)
1351			adapter->rx_overruns++;
1352	}
1353	IGB_CORE_UNLOCK(adapter);
1354
1355	/* TODO: rx_count */
1356	rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1357
1358	IGB_TX_LOCK(txr);
1359	do {
1360		more = igb_txeof(txr);
1361	} while (loop-- && more);
1362#if __FreeBSD_version >= 800000
1363	if (!drbr_empty(ifp, txr->br))
1364		igb_mq_start_locked(ifp, txr, NULL);
1365#else
1366	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1367		igb_start_locked(txr, ifp);
1368#endif
1369	IGB_TX_UNLOCK(txr);
1370	return POLL_RETURN_COUNT(rx_done);
1371}
1372#endif /* DEVICE_POLLING */
1373
1374/*********************************************************************
1375 *
1376 *  MSIX TX Interrupt Service routine
1377 *
1378 **********************************************************************/
1379static void
1380igb_msix_que(void *arg)
1381{
1382	struct igb_queue *que = arg;
1383	struct adapter *adapter = que->adapter;
1384	struct tx_ring *txr = que->txr;
1385	struct rx_ring *rxr = que->rxr;
1386	u32		newitr = 0;
1387	bool		more_tx, more_rx;
1388
1389	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1390	++que->irqs;
1391
1392	IGB_TX_LOCK(txr);
1393	more_tx = igb_txeof(txr);
1394	IGB_TX_UNLOCK(txr);
1395
1396	more_rx = igb_rxeof(que, adapter->rx_process_limit);
1397
1398	if (igb_enable_aim == FALSE)
1399		goto no_calc;
1400	/*
1401	** Do Adaptive Interrupt Moderation:
1402        **  - Write out last calculated setting
1403	**  - Calculate based on average size over
1404	**    the last interval.
1405	*/
1406        if (que->eitr_setting)
1407                E1000_WRITE_REG(&adapter->hw,
1408                    E1000_EITR(que->msix), que->eitr_setting);
1409
1410        que->eitr_setting = 0;
1411
1412        /* Idle, do nothing */
1413        if ((txr->bytes == 0) && (rxr->bytes == 0))
1414                goto no_calc;
1415
1416        /* Used half Default if sub-gig */
1417        if (adapter->link_speed != 1000)
1418                newitr = IGB_DEFAULT_ITR / 2;
1419        else {
1420		if ((txr->bytes) && (txr->packets))
1421                	newitr = txr->bytes/txr->packets;
1422		if ((rxr->bytes) && (rxr->packets))
1423			newitr = max(newitr,
1424			    (rxr->bytes / rxr->packets));
1425                newitr += 24; /* account for hardware frame, crc */
1426		/* set an upper boundary */
1427		newitr = min(newitr, 3000);
1428		/* Be nice to the mid range */
1429                if ((newitr > 300) && (newitr < 1200))
1430                        newitr = (newitr / 3);
1431                else
1432                        newitr = (newitr / 2);
1433        }
1434        newitr &= 0x7FFC;  /* Mask invalid bits */
1435        if (adapter->hw.mac.type == e1000_82575)
1436                newitr |= newitr << 16;
1437        else
1438                newitr |= 0x8000000;
1439
1440        /* save for next interrupt */
1441        que->eitr_setting = newitr;
1442
1443        /* Reset state */
1444        txr->bytes = 0;
1445        txr->packets = 0;
1446        rxr->bytes = 0;
1447        rxr->packets = 0;
1448
1449no_calc:
1450	/* Schedule a clean task if needed*/
1451	if (more_tx || more_rx)
1452		taskqueue_enqueue(que->tq, &que->que_task);
1453	else
1454		/* Reenable this interrupt */
1455		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1456	return;
1457}
1458
1459
1460/*********************************************************************
1461 *
1462 *  MSIX Link Interrupt Service routine
1463 *
1464 **********************************************************************/
1465
1466static void
1467igb_msix_link(void *arg)
1468{
1469	struct adapter	*adapter = arg;
1470	u32       	icr;
1471
1472	++adapter->link_irq;
1473	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1474	if (!(icr & E1000_ICR_LSC))
1475		goto spurious;
1476	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1477
1478spurious:
1479	/* Rearm */
1480	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1481	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1482	return;
1483}
1484
1485
1486/*********************************************************************
1487 *
1488 *  Media Ioctl callback
1489 *
1490 *  This routine is called whenever the user queries the status of
1491 *  the interface using ifconfig.
1492 *
1493 **********************************************************************/
1494static void
1495igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1496{
1497	struct adapter *adapter = ifp->if_softc;
1498	u_char fiber_type = IFM_1000_SX;
1499
1500	INIT_DEBUGOUT("igb_media_status: begin");
1501
1502	IGB_CORE_LOCK(adapter);
1503	igb_update_link_status(adapter);
1504
1505	ifmr->ifm_status = IFM_AVALID;
1506	ifmr->ifm_active = IFM_ETHER;
1507
1508	if (!adapter->link_active) {
1509		IGB_CORE_UNLOCK(adapter);
1510		return;
1511	}
1512
1513	ifmr->ifm_status |= IFM_ACTIVE;
1514
1515	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1516	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1517		ifmr->ifm_active |= fiber_type | IFM_FDX;
1518	else {
1519		switch (adapter->link_speed) {
1520		case 10:
1521			ifmr->ifm_active |= IFM_10_T;
1522			break;
1523		case 100:
1524			ifmr->ifm_active |= IFM_100_TX;
1525			break;
1526		case 1000:
1527			ifmr->ifm_active |= IFM_1000_T;
1528			break;
1529		}
1530		if (adapter->link_duplex == FULL_DUPLEX)
1531			ifmr->ifm_active |= IFM_FDX;
1532		else
1533			ifmr->ifm_active |= IFM_HDX;
1534	}
1535	IGB_CORE_UNLOCK(adapter);
1536}
1537
1538/*********************************************************************
1539 *
1540 *  Media Ioctl callback
1541 *
1542 *  This routine is called when the user changes speed/duplex using
1543 *  media/mediopt option with ifconfig.
1544 *
1545 **********************************************************************/
1546static int
1547igb_media_change(struct ifnet *ifp)
1548{
1549	struct adapter *adapter = ifp->if_softc;
1550	struct ifmedia  *ifm = &adapter->media;
1551
1552	INIT_DEBUGOUT("igb_media_change: begin");
1553
1554	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1555		return (EINVAL);
1556
1557	IGB_CORE_LOCK(adapter);
1558	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1559	case IFM_AUTO:
1560		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1561		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1562		break;
1563	case IFM_1000_LX:
1564	case IFM_1000_SX:
1565	case IFM_1000_T:
1566		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1567		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1568		break;
1569	case IFM_100_TX:
1570		adapter->hw.mac.autoneg = FALSE;
1571		adapter->hw.phy.autoneg_advertised = 0;
1572		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1573			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1574		else
1575			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1576		break;
1577	case IFM_10_T:
1578		adapter->hw.mac.autoneg = FALSE;
1579		adapter->hw.phy.autoneg_advertised = 0;
1580		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1581			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1582		else
1583			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1584		break;
1585	default:
1586		device_printf(adapter->dev, "Unsupported media type\n");
1587	}
1588
1589	/* As the speed/duplex settings my have changed we need to
1590	 * reset the PHY.
1591	 */
1592	adapter->hw.phy.reset_disable = FALSE;
1593
1594	igb_init_locked(adapter);
1595	IGB_CORE_UNLOCK(adapter);
1596
1597	return (0);
1598}
1599
1600
1601/*********************************************************************
1602 *
1603 *  This routine maps the mbufs to Advanced TX descriptors.
1604 *  used by the 82575 adapter.
1605 *
1606 **********************************************************************/
1607
1608static int
1609igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1610{
1611	struct adapter		*adapter = txr->adapter;
1612	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1613	bus_dmamap_t		map;
1614	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1615	union e1000_adv_tx_desc	*txd = NULL;
1616	struct mbuf		*m_head;
1617	u32			olinfo_status = 0, cmd_type_len = 0;
1618	int			nsegs, i, j, error, first, last = 0;
1619	u32			hdrlen = 0;
1620
1621	m_head = *m_headp;
1622
1623
1624	/* Set basic descriptor constants */
1625	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1626	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1627	if (m_head->m_flags & M_VLANTAG)
1628		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1629
1630        /*
1631         * Force a cleanup if number of TX descriptors
1632         * available hits the threshold
1633         */
1634	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1635		igb_txeof(txr);
1636		/* Now do we at least have a minimal? */
1637		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1638			txr->no_desc_avail++;
1639			return (ENOBUFS);
1640		}
1641	}
1642
1643	/*
1644         * Map the packet for DMA.
1645	 *
1646	 * Capture the first descriptor index,
1647	 * this descriptor will have the index
1648	 * of the EOP which is the only one that
1649	 * now gets a DONE bit writeback.
1650	 */
1651	first = txr->next_avail_desc;
1652	tx_buffer = &txr->tx_buffers[first];
1653	tx_buffer_mapped = tx_buffer;
1654	map = tx_buffer->map;
1655
1656	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1657	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1658
1659	if (error == EFBIG) {
1660		struct mbuf *m;
1661
1662		m = m_defrag(*m_headp, M_DONTWAIT);
1663		if (m == NULL) {
1664			adapter->mbuf_defrag_failed++;
1665			m_freem(*m_headp);
1666			*m_headp = NULL;
1667			return (ENOBUFS);
1668		}
1669		*m_headp = m;
1670
1671		/* Try it again */
1672		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1673		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1674
1675		if (error == ENOMEM) {
1676			adapter->no_tx_dma_setup++;
1677			return (error);
1678		} else if (error != 0) {
1679			adapter->no_tx_dma_setup++;
1680			m_freem(*m_headp);
1681			*m_headp = NULL;
1682			return (error);
1683		}
1684	} else if (error == ENOMEM) {
1685		adapter->no_tx_dma_setup++;
1686		return (error);
1687	} else if (error != 0) {
1688		adapter->no_tx_dma_setup++;
1689		m_freem(*m_headp);
1690		*m_headp = NULL;
1691		return (error);
1692	}
1693
1694	/* Check again to be sure we have enough descriptors */
1695        if (nsegs > (txr->tx_avail - 2)) {
1696                txr->no_desc_avail++;
1697		bus_dmamap_unload(txr->txtag, map);
1698		return (ENOBUFS);
1699        }
1700	m_head = *m_headp;
1701
1702        /*
1703         * Set up the context descriptor:
1704         * used when any hardware offload is done.
1705	 * This includes CSUM, VLAN, and TSO. It
1706	 * will use the first descriptor.
1707         */
1708        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1709		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1710			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1711			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1712			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1713		} else
1714			return (ENXIO);
1715	} else if (igb_tx_ctx_setup(txr, m_head))
1716		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1717
1718	/* Calculate payload length */
1719	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1720	    << E1000_ADVTXD_PAYLEN_SHIFT);
1721
1722	/* 82575 needs the queue index added */
1723	if (adapter->hw.mac.type == e1000_82575)
1724		olinfo_status |= txr->me << 4;
1725
1726	/* Set up our transmit descriptors */
1727	i = txr->next_avail_desc;
1728	for (j = 0; j < nsegs; j++) {
1729		bus_size_t seg_len;
1730		bus_addr_t seg_addr;
1731
1732		tx_buffer = &txr->tx_buffers[i];
1733		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1734		seg_addr = segs[j].ds_addr;
1735		seg_len  = segs[j].ds_len;
1736
1737		txd->read.buffer_addr = htole64(seg_addr);
1738		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1739		txd->read.olinfo_status = htole32(olinfo_status);
1740		last = i;
1741		if (++i == adapter->num_tx_desc)
1742			i = 0;
1743		tx_buffer->m_head = NULL;
1744		tx_buffer->next_eop = -1;
1745	}
1746
1747	txr->next_avail_desc = i;
1748	txr->tx_avail -= nsegs;
1749
1750        tx_buffer->m_head = m_head;
1751	tx_buffer_mapped->map = tx_buffer->map;
1752	tx_buffer->map = map;
1753        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1754
1755        /*
1756         * Last Descriptor of Packet
1757	 * needs End Of Packet (EOP)
1758	 * and Report Status (RS)
1759         */
1760        txd->read.cmd_type_len |=
1761	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1762	/*
1763	 * Keep track in the first buffer which
1764	 * descriptor will be written back
1765	 */
1766	tx_buffer = &txr->tx_buffers[first];
1767	tx_buffer->next_eop = last;
1768	txr->watchdog_time = ticks;
1769
1770	/*
1771	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1772	 * that this frame is available to transmit.
1773	 */
1774	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1775	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1776	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1777	++txr->tx_packets;
1778
1779	return (0);
1780
1781}
1782
1783static void
1784igb_set_promisc(struct adapter *adapter)
1785{
1786	struct ifnet	*ifp = adapter->ifp;
1787	uint32_t	reg_rctl;
1788
1789	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1790
1791	if (ifp->if_flags & IFF_PROMISC) {
1792		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1793		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1794	} else if (ifp->if_flags & IFF_ALLMULTI) {
1795		reg_rctl |= E1000_RCTL_MPE;
1796		reg_rctl &= ~E1000_RCTL_UPE;
1797		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1798	}
1799}
1800
1801static void
1802igb_disable_promisc(struct adapter *adapter)
1803{
1804	uint32_t	reg_rctl;
1805
1806	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1807
1808	reg_rctl &=  (~E1000_RCTL_UPE);
1809	reg_rctl &=  (~E1000_RCTL_MPE);
1810	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1811}
1812
1813
1814/*********************************************************************
1815 *  Multicast Update
1816 *
1817 *  This routine is called whenever multicast address list is updated.
1818 *
1819 **********************************************************************/
1820
1821static void
1822igb_set_multi(struct adapter *adapter)
1823{
1824	struct ifnet	*ifp = adapter->ifp;
1825	struct ifmultiaddr *ifma;
1826	u32 reg_rctl = 0;
1827	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1828
1829	int mcnt = 0;
1830
1831	IOCTL_DEBUGOUT("igb_set_multi: begin");
1832
1833#if __FreeBSD_version < 800000
1834	IF_ADDR_LOCK(ifp);
1835#else
1836	if_maddr_rlock(ifp);
1837#endif
1838	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1839		if (ifma->ifma_addr->sa_family != AF_LINK)
1840			continue;
1841
1842		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1843			break;
1844
1845		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1846		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1847		mcnt++;
1848	}
1849#if __FreeBSD_version < 800000
1850	IF_ADDR_UNLOCK(ifp);
1851#else
1852	if_maddr_runlock(ifp);
1853#endif
1854
1855	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1856		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1857		reg_rctl |= E1000_RCTL_MPE;
1858		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1859	} else
1860		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1861}
1862
1863
1864/*********************************************************************
1865 *  Timer routine:
1866 *  	This routine checks for link status,
1867 *	updates statistics, and does the watchdog.
1868 *
1869 **********************************************************************/
1870
1871static void
1872igb_local_timer(void *arg)
1873{
1874	struct adapter		*adapter = arg;
1875	struct ifnet		*ifp = adapter->ifp;
1876	device_t		dev = adapter->dev;
1877	struct tx_ring		*txr = adapter->tx_rings;
1878
1879
1880	IGB_CORE_LOCK_ASSERT(adapter);
1881
1882	igb_update_link_status(adapter);
1883	igb_update_stats_counters(adapter);
1884
1885	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1886		igb_print_hw_stats(adapter);
1887
1888        /*
1889        ** Watchdog: check for time since any descriptor was cleaned
1890        */
1891	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1892		if (txr->watchdog_check == FALSE)
1893			continue;
1894		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1895			goto timeout;
1896	}
1897
1898	/* Trigger an RX interrupt on all queues */
1899#ifdef DEVICE_POLLING
1900	if (!(ifp->if_capenable & IFCAP_POLLING))
1901#endif
1902	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1903	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1904	return;
1905
1906timeout:
1907	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1908	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1909            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1910            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1911	device_printf(dev,"TX(%d) desc avail = %d,"
1912            "Next TX to Clean = %d\n",
1913            txr->me, txr->tx_avail, txr->next_to_clean);
1914	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1915	adapter->watchdog_events++;
1916	igb_init_locked(adapter);
1917}
1918
1919static void
1920igb_update_link_status(struct adapter *adapter)
1921{
1922	struct e1000_hw *hw = &adapter->hw;
1923	struct ifnet *ifp = adapter->ifp;
1924	device_t dev = adapter->dev;
1925	struct tx_ring *txr = adapter->tx_rings;
1926	u32 link_check = 0;
1927
1928	/* Get the cached link value or read for real */
1929        switch (hw->phy.media_type) {
1930        case e1000_media_type_copper:
1931                if (hw->mac.get_link_status) {
1932			/* Do the work to read phy */
1933                        e1000_check_for_link(hw);
1934                        link_check = !hw->mac.get_link_status;
1935                } else
1936                        link_check = TRUE;
1937                break;
1938        case e1000_media_type_fiber:
1939                e1000_check_for_link(hw);
1940                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1941                                 E1000_STATUS_LU);
1942                break;
1943        case e1000_media_type_internal_serdes:
1944                e1000_check_for_link(hw);
1945                link_check = adapter->hw.mac.serdes_has_link;
1946                break;
1947        default:
1948        case e1000_media_type_unknown:
1949                break;
1950        }
1951
1952	/* Now we check if a transition has happened */
1953	if (link_check && (adapter->link_active == 0)) {
1954		e1000_get_speed_and_duplex(&adapter->hw,
1955		    &adapter->link_speed, &adapter->link_duplex);
1956		if (bootverbose)
1957			device_printf(dev, "Link is up %d Mbps %s\n",
1958			    adapter->link_speed,
1959			    ((adapter->link_duplex == FULL_DUPLEX) ?
1960			    "Full Duplex" : "Half Duplex"));
1961		adapter->link_active = 1;
1962		ifp->if_baudrate = adapter->link_speed * 1000000;
1963		/* This can sleep */
1964		if_link_state_change(ifp, LINK_STATE_UP);
1965	} else if (!link_check && (adapter->link_active == 1)) {
1966		ifp->if_baudrate = adapter->link_speed = 0;
1967		adapter->link_duplex = 0;
1968		if (bootverbose)
1969			device_printf(dev, "Link is Down\n");
1970		adapter->link_active = 0;
1971		/* This can sleep */
1972		if_link_state_change(ifp, LINK_STATE_DOWN);
1973		/* Turn off watchdogs */
1974		for (int i = 0; i < adapter->num_queues; i++, txr++)
1975			txr->watchdog_check = FALSE;
1976	}
1977}
1978
1979/*********************************************************************
1980 *
1981 *  This routine disables all traffic on the adapter by issuing a
1982 *  global reset on the MAC and deallocates TX/RX buffers.
1983 *
1984 **********************************************************************/
1985
1986static void
1987igb_stop(void *arg)
1988{
1989	struct adapter	*adapter = arg;
1990	struct ifnet	*ifp = adapter->ifp;
1991	struct tx_ring *txr = adapter->tx_rings;
1992
1993	IGB_CORE_LOCK_ASSERT(adapter);
1994
1995	INIT_DEBUGOUT("igb_stop: begin");
1996
1997	igb_disable_intr(adapter);
1998
1999	callout_stop(&adapter->timer);
2000
2001	/* Tell the stack that the interface is no longer active */
2002	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2003
2004	/* Unarm watchdog timer. */
2005	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2006		IGB_TX_LOCK(txr);
2007		txr->watchdog_check = FALSE;
2008		IGB_TX_UNLOCK(txr);
2009	}
2010
2011	e1000_reset_hw(&adapter->hw);
2012	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2013
2014	e1000_led_off(&adapter->hw);
2015	e1000_cleanup_led(&adapter->hw);
2016}
2017
2018
2019/*********************************************************************
2020 *
2021 *  Determine hardware revision.
2022 *
2023 **********************************************************************/
2024static void
2025igb_identify_hardware(struct adapter *adapter)
2026{
2027	device_t dev = adapter->dev;
2028
2029	/* Make sure our PCI config space has the necessary stuff set */
2030	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2031	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2032	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2033		device_printf(dev, "Memory Access and/or Bus Master bits "
2034		    "were not set!\n");
2035		adapter->hw.bus.pci_cmd_word |=
2036		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2037		pci_write_config(dev, PCIR_COMMAND,
2038		    adapter->hw.bus.pci_cmd_word, 2);
2039	}
2040
2041	/* Save off the information about this board */
2042	adapter->hw.vendor_id = pci_get_vendor(dev);
2043	adapter->hw.device_id = pci_get_device(dev);
2044	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2045	adapter->hw.subsystem_vendor_id =
2046	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2047	adapter->hw.subsystem_device_id =
2048	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2049
2050	/* Do Shared Code Init and Setup */
2051	if (e1000_set_mac_type(&adapter->hw)) {
2052		device_printf(dev, "Setup init failure\n");
2053		return;
2054	}
2055}
2056
2057static int
2058igb_allocate_pci_resources(struct adapter *adapter)
2059{
2060	device_t	dev = adapter->dev;
2061	int		rid;
2062
2063	rid = PCIR_BAR(0);
2064	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2065	    &rid, RF_ACTIVE);
2066	if (adapter->pci_mem == NULL) {
2067		device_printf(dev, "Unable to allocate bus resource: memory\n");
2068		return (ENXIO);
2069	}
2070	adapter->osdep.mem_bus_space_tag =
2071	    rman_get_bustag(adapter->pci_mem);
2072	adapter->osdep.mem_bus_space_handle =
2073	    rman_get_bushandle(adapter->pci_mem);
2074	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2075
2076	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2077
2078	/* This will setup either MSI/X or MSI */
2079	adapter->msix = igb_setup_msix(adapter);
2080	adapter->hw.back = &adapter->osdep;
2081
2082	return (0);
2083}
2084
2085/*********************************************************************
2086 *
2087 *  Setup the Legacy or MSI Interrupt handler
2088 *
2089 **********************************************************************/
2090static int
2091igb_allocate_legacy(struct adapter *adapter)
2092{
2093	device_t		dev = adapter->dev;
2094	struct igb_queue	*que = adapter->queues;
2095	int			error, rid = 0;
2096
2097	/* Turn off all interrupts */
2098	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2099
2100	/* MSI RID is 1 */
2101	if (adapter->msix == 1)
2102		rid = 1;
2103
2104	/* We allocate a single interrupt resource */
2105	adapter->res = bus_alloc_resource_any(dev,
2106	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2107	if (adapter->res == NULL) {
2108		device_printf(dev, "Unable to allocate bus resource: "
2109		    "interrupt\n");
2110		return (ENXIO);
2111	}
2112
2113	/*
2114	 * Try allocating a fast interrupt and the associated deferred
2115	 * processing contexts.
2116	 */
2117	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2118	/* Make tasklet for deferred link handling */
2119	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2120	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2121	    taskqueue_thread_enqueue, &adapter->tq);
2122	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2123	    device_get_nameunit(adapter->dev));
2124	if ((error = bus_setup_intr(dev, adapter->res,
2125	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2126	    adapter, &adapter->tag)) != 0) {
2127		device_printf(dev, "Failed to register fast interrupt "
2128			    "handler: %d\n", error);
2129		taskqueue_free(adapter->tq);
2130		adapter->tq = NULL;
2131		return (error);
2132	}
2133
2134	return (0);
2135}
2136
2137
2138/*********************************************************************
2139 *
2140 *  Setup the MSIX Queue Interrupt handlers:
2141 *
2142 **********************************************************************/
2143static int
2144igb_allocate_msix(struct adapter *adapter)
2145{
2146	device_t		dev = adapter->dev;
2147	struct igb_queue	*que = adapter->queues;
2148	int			error, rid, vector = 0;
2149
2150
2151	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2152		rid = vector +1;
2153		que->res = bus_alloc_resource_any(dev,
2154		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2155		if (que->res == NULL) {
2156			device_printf(dev,
2157			    "Unable to allocate bus resource: "
2158			    "MSIX Queue Interrupt\n");
2159			return (ENXIO);
2160		}
2161		error = bus_setup_intr(dev, que->res,
2162	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2163		    igb_msix_que, que, &que->tag);
2164		if (error) {
2165			que->res = NULL;
2166			device_printf(dev, "Failed to register Queue handler");
2167			return (error);
2168		}
2169		que->msix = vector;
2170		if (adapter->hw.mac.type == e1000_82575)
2171			que->eims = E1000_EICR_TX_QUEUE0 << i;
2172		else
2173			que->eims = 1 << vector;
2174		/*
2175		** Bind the msix vector, and thus the
2176		** rings to the corresponding cpu.
2177		*/
2178		if (adapter->num_queues > 1)
2179			bus_bind_intr(dev, que->res, i);
2180		/* Make tasklet for deferred handling */
2181		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2182		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2183		    taskqueue_thread_enqueue, &que->tq);
2184		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2185		    device_get_nameunit(adapter->dev));
2186	}
2187
2188	/* And Link */
2189	rid = vector + 1;
2190	adapter->res = bus_alloc_resource_any(dev,
2191	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2192	if (adapter->res == NULL) {
2193		device_printf(dev,
2194		    "Unable to allocate bus resource: "
2195		    "MSIX Link Interrupt\n");
2196		return (ENXIO);
2197	}
2198	if ((error = bus_setup_intr(dev, adapter->res,
2199	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2200	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2201		device_printf(dev, "Failed to register Link handler");
2202		return (error);
2203	}
2204	adapter->linkvec = vector;
2205
2206	/* Make tasklet for deferred handling */
2207	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2208	adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2209	    taskqueue_thread_enqueue, &adapter->tq);
2210	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2211	    device_get_nameunit(adapter->dev));
2212
2213	return (0);
2214}
2215
2216
2217static void
2218igb_configure_queues(struct adapter *adapter)
2219{
2220	struct	e1000_hw	*hw = &adapter->hw;
2221	struct	igb_queue	*que;
2222	u32			tmp, ivar = 0;
2223	u32			newitr = IGB_DEFAULT_ITR;
2224
2225	/* First turn on RSS capability */
2226	if (adapter->hw.mac.type > e1000_82575)
2227		E1000_WRITE_REG(hw, E1000_GPIE,
2228		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2229		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2230
2231	/* Turn on MSIX */
2232	switch (adapter->hw.mac.type) {
2233	case e1000_82580:
2234		/* RX entries */
2235		for (int i = 0; i < adapter->num_queues; i++) {
2236			u32 index = i >> 1;
2237			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2238			que = &adapter->queues[i];
2239			if (i & 1) {
2240				ivar &= 0xFF00FFFF;
2241				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2242			} else {
2243				ivar &= 0xFFFFFF00;
2244				ivar |= que->msix | E1000_IVAR_VALID;
2245			}
2246			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2247		}
2248		/* TX entries */
2249		for (int i = 0; i < adapter->num_queues; i++) {
2250			u32 index = i >> 1;
2251			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2252			que = &adapter->queues[i];
2253			if (i & 1) {
2254				ivar &= 0x00FFFFFF;
2255				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2256			} else {
2257				ivar &= 0xFFFF00FF;
2258				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2259			}
2260			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2261			adapter->eims_mask |= que->eims;
2262		}
2263
2264		/* And for the link interrupt */
2265		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2266		adapter->link_mask = 1 << adapter->linkvec;
2267		adapter->eims_mask |= adapter->link_mask;
2268		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2269		break;
2270	case e1000_82576:
2271		/* RX entries */
2272		for (int i = 0; i < adapter->num_queues; i++) {
2273			u32 index = i & 0x7; /* Each IVAR has two entries */
2274			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2275			que = &adapter->queues[i];
2276			if (i < 8) {
2277				ivar &= 0xFFFFFF00;
2278				ivar |= que->msix | E1000_IVAR_VALID;
2279			} else {
2280				ivar &= 0xFF00FFFF;
2281				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2282			}
2283			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2284			adapter->eims_mask |= que->eims;
2285		}
2286		/* TX entries */
2287		for (int i = 0; i < adapter->num_queues; i++) {
2288			u32 index = i & 0x7; /* Each IVAR has two entries */
2289			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2290			que = &adapter->queues[i];
2291			if (i < 8) {
2292				ivar &= 0xFFFF00FF;
2293				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2294			} else {
2295				ivar &= 0x00FFFFFF;
2296				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2297			}
2298			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2299			adapter->eims_mask |= que->eims;
2300		}
2301
2302		/* And for the link interrupt */
2303		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2304		adapter->link_mask = 1 << adapter->linkvec;
2305		adapter->eims_mask |= adapter->link_mask;
2306		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2307		break;
2308
2309	case e1000_82575:
2310                /* enable MSI-X support*/
2311		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2312                tmp |= E1000_CTRL_EXT_PBA_CLR;
2313                /* Auto-Mask interrupts upon ICR read. */
2314                tmp |= E1000_CTRL_EXT_EIAME;
2315                tmp |= E1000_CTRL_EXT_IRCA;
2316                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2317
2318		/* Queues */
2319		for (int i = 0; i < adapter->num_queues; i++) {
2320			que = &adapter->queues[i];
2321			tmp = E1000_EICR_RX_QUEUE0 << i;
2322			tmp |= E1000_EICR_TX_QUEUE0 << i;
2323			que->eims = tmp;
2324			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2325			    i, que->eims);
2326			adapter->eims_mask |= que->eims;
2327		}
2328
2329		/* Link */
2330		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2331		    E1000_EIMS_OTHER);
2332		adapter->link_mask |= E1000_EIMS_OTHER;
2333		adapter->eims_mask |= adapter->link_mask;
2334	default:
2335		break;
2336	}
2337
2338	/* Set the starting interrupt rate */
2339        if (hw->mac.type == e1000_82575)
2340                newitr |= newitr << 16;
2341        else
2342                newitr |= 0x8000000;
2343
2344	for (int i = 0; i < adapter->num_queues; i++) {
2345		que = &adapter->queues[i];
2346		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2347	}
2348
2349	return;
2350}
2351
2352
2353static void
2354igb_free_pci_resources(struct adapter *adapter)
2355{
2356	struct		igb_queue *que = adapter->queues;
2357	device_t	dev = adapter->dev;
2358	int		rid;
2359
2360	/*
2361	** There is a slight possibility of a failure mode
2362	** in attach that will result in entering this function
2363	** before interrupt resources have been initialized, and
2364	** in that case we do not want to execute the loops below
2365	** We can detect this reliably by the state of the adapter
2366	** res pointer.
2367	*/
2368	if (adapter->res == NULL)
2369		goto mem;
2370
2371	/*
2372	 * First release all the interrupt resources:
2373	 */
2374	for (int i = 0; i < adapter->num_queues; i++, que++) {
2375		rid = que->msix + 1;
2376		if (que->tag != NULL) {
2377			bus_teardown_intr(dev, que->res, que->tag);
2378			que->tag = NULL;
2379		}
2380		if (que->res != NULL)
2381			bus_release_resource(dev,
2382			    SYS_RES_IRQ, rid, que->res);
2383	}
2384
2385	/* Clean the Legacy or Link interrupt last */
2386	if (adapter->linkvec) /* we are doing MSIX */
2387		rid = adapter->linkvec + 1;
2388	else
2389		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2390
2391	if (adapter->tag != NULL) {
2392		bus_teardown_intr(dev, adapter->res, adapter->tag);
2393		adapter->tag = NULL;
2394	}
2395	if (adapter->res != NULL)
2396		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2397
2398mem:
2399	if (adapter->msix)
2400		pci_release_msi(dev);
2401
2402	if (adapter->msix_mem != NULL)
2403		bus_release_resource(dev, SYS_RES_MEMORY,
2404		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2405
2406	if (adapter->pci_mem != NULL)
2407		bus_release_resource(dev, SYS_RES_MEMORY,
2408		    PCIR_BAR(0), adapter->pci_mem);
2409
2410}
2411
2412/*
2413 * Setup Either MSI/X or MSI
2414 */
2415static int
2416igb_setup_msix(struct adapter *adapter)
2417{
2418	device_t dev = adapter->dev;
2419	int rid, want, queues, msgs;
2420
2421	/* tuneable override */
2422	if (igb_enable_msix == 0)
2423		goto msi;
2424
2425	/* First try MSI/X */
2426	rid = PCIR_BAR(IGB_MSIX_BAR);
2427	adapter->msix_mem = bus_alloc_resource_any(dev,
2428	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2429       	if (!adapter->msix_mem) {
2430		/* May not be enabled */
2431		device_printf(adapter->dev,
2432		    "Unable to map MSIX table \n");
2433		goto msi;
2434	}
2435
2436	msgs = pci_msix_count(dev);
2437	if (msgs == 0) { /* system has msix disabled */
2438		bus_release_resource(dev, SYS_RES_MEMORY,
2439		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2440		adapter->msix_mem = NULL;
2441		goto msi;
2442	}
2443
2444	/* Figure out a reasonable auto config value */
2445	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2446
2447	/* Manual override */
2448	if (igb_num_queues != 0)
2449		queues = igb_num_queues;
2450
2451	/* Can have max of 4 queues on 82575 */
2452	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2453		queues = 4;
2454
2455	/*
2456	** One vector (RX/TX pair) per queue
2457	** plus an additional for Link interrupt
2458	*/
2459	want = queues + 1;
2460	if (msgs >= want)
2461		msgs = want;
2462	else {
2463               	device_printf(adapter->dev,
2464		    "MSIX Configuration Problem, "
2465		    "%d vectors configured, but %d queues wanted!\n",
2466		    msgs, want);
2467		return (ENXIO);
2468	}
2469	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2470               	device_printf(adapter->dev,
2471		    "Using MSIX interrupts with %d vectors\n", msgs);
2472		adapter->num_queues = queues;
2473		return (msgs);
2474	}
2475msi:
2476       	msgs = pci_msi_count(dev);
2477       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2478               	device_printf(adapter->dev,"Using MSI interrupt\n");
2479	return (msgs);
2480}
2481
2482/*********************************************************************
2483 *
2484 *  Set up an fresh starting state
2485 *
2486 **********************************************************************/
2487static void
2488igb_reset(struct adapter *adapter)
2489{
2490	device_t	dev = adapter->dev;
2491	struct e1000_hw *hw = &adapter->hw;
2492	struct e1000_fc_info *fc = &hw->fc;
2493	struct ifnet	*ifp = adapter->ifp;
2494	u32		pba = 0;
2495	u16		hwm;
2496
2497	INIT_DEBUGOUT("igb_reset: begin");
2498
2499	/* Let the firmware know the OS is in control */
2500	igb_get_hw_control(adapter);
2501
2502	/*
2503	 * Packet Buffer Allocation (PBA)
2504	 * Writing PBA sets the receive portion of the buffer
2505	 * the remainder is used for the transmit buffer.
2506	 */
2507	switch (hw->mac.type) {
2508	case e1000_82575:
2509		pba = E1000_PBA_32K;
2510		break;
2511	case e1000_82576:
2512		pba = E1000_PBA_64K;
2513		break;
2514	case e1000_82580:
2515		pba = E1000_PBA_35K;
2516	default:
2517		break;
2518	}
2519
2520	/* Special needs in case of Jumbo frames */
2521	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2522		u32 tx_space, min_tx, min_rx;
2523		pba = E1000_READ_REG(hw, E1000_PBA);
2524		tx_space = pba >> 16;
2525		pba &= 0xffff;
2526		min_tx = (adapter->max_frame_size +
2527		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2528		min_tx = roundup2(min_tx, 1024);
2529		min_tx >>= 10;
2530                min_rx = adapter->max_frame_size;
2531                min_rx = roundup2(min_rx, 1024);
2532                min_rx >>= 10;
2533		if (tx_space < min_tx &&
2534		    ((min_tx - tx_space) < pba)) {
2535			pba = pba - (min_tx - tx_space);
2536			/*
2537                         * if short on rx space, rx wins
2538                         * and must trump tx adjustment
2539			 */
2540                        if (pba < min_rx)
2541                                pba = min_rx;
2542		}
2543		E1000_WRITE_REG(hw, E1000_PBA, pba);
2544	}
2545
2546	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2547
2548	/*
2549	 * These parameters control the automatic generation (Tx) and
2550	 * response (Rx) to Ethernet PAUSE frames.
2551	 * - High water mark should allow for at least two frames to be
2552	 *   received after sending an XOFF.
2553	 * - Low water mark works best when it is very near the high water mark.
2554	 *   This allows the receiver to restart by sending XON when it has
2555	 *   drained a bit.
2556	 */
2557	hwm = min(((pba << 10) * 9 / 10),
2558	    ((pba << 10) - 2 * adapter->max_frame_size));
2559
2560	if (hw->mac.type < e1000_82576) {
2561		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2562		fc->low_water = fc->high_water - 8;
2563	} else {
2564		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2565		fc->low_water = fc->high_water - 16;
2566	}
2567
2568	fc->pause_time = IGB_FC_PAUSE_TIME;
2569	fc->send_xon = TRUE;
2570
2571	/* Set Flow control, use the tunable location if sane */
2572	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2573		fc->requested_mode = igb_fc_setting;
2574	else
2575		fc->requested_mode = e1000_fc_none;
2576
2577	fc->current_mode = fc->requested_mode;
2578
2579	/* Issue a global reset */
2580	e1000_reset_hw(hw);
2581	E1000_WRITE_REG(hw, E1000_WUC, 0);
2582
2583	if (e1000_init_hw(hw) < 0)
2584		device_printf(dev, "Hardware Initialization Failed\n");
2585
2586	if (hw->mac.type == e1000_82580) {
2587		u32 reg;
2588
2589		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2590		/*
2591		 * 0x80000000 - enable DMA COAL
2592		 * 0x10000000 - use L0s as low power
2593		 * 0x20000000 - use L1 as low power
2594		 * X << 16 - exit dma coal when rx data exceeds X kB
2595		 * Y - upper limit to stay in dma coal in units of 32usecs
2596		 */
2597		E1000_WRITE_REG(hw, E1000_DMACR,
2598		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2599
2600		/* set hwm to PBA -  2 * max frame size */
2601		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2602		/*
2603		 * This sets the time to wait before requesting transition to
2604		 * low power state to number of usecs needed to receive 1 512
2605		 * byte frame at gigabit line rate
2606		 */
2607		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2608
2609		/* free space in tx packet buffer to wake from DMA coal */
2610		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2611		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2612
2613		/* make low power state decision controlled by DMA coal */
2614		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2615		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2616		    reg | E1000_PCIEMISC_LX_DECISION);
2617	}
2618
2619	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2620	e1000_get_phy_info(hw);
2621	e1000_check_for_link(hw);
2622	return;
2623}
2624
2625/*********************************************************************
2626 *
2627 *  Setup networking device structure and register an interface.
2628 *
2629 **********************************************************************/
2630static void
2631igb_setup_interface(device_t dev, struct adapter *adapter)
2632{
2633	struct ifnet   *ifp;
2634
2635	INIT_DEBUGOUT("igb_setup_interface: begin");
2636
2637	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2638	if (ifp == NULL)
2639		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2640	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2641	ifp->if_mtu = ETHERMTU;
2642	ifp->if_init =  igb_init;
2643	ifp->if_softc = adapter;
2644	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2645	ifp->if_ioctl = igb_ioctl;
2646	ifp->if_start = igb_start;
2647#if __FreeBSD_version >= 800000
2648	ifp->if_transmit = igb_mq_start;
2649	ifp->if_qflush = igb_qflush;
2650#endif
2651	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2652	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2653	IFQ_SET_READY(&ifp->if_snd);
2654
2655	ether_ifattach(ifp, adapter->hw.mac.addr);
2656
2657	ifp->if_capabilities = ifp->if_capenable = 0;
2658
2659	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2660	ifp->if_capabilities |= IFCAP_TSO4;
2661	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2662	if (igb_header_split)
2663		ifp->if_capabilities |= IFCAP_LRO;
2664
2665	ifp->if_capenable = ifp->if_capabilities;
2666#ifdef DEVICE_POLLING
2667	ifp->if_capabilities |= IFCAP_POLLING;
2668#endif
2669
2670	/*
2671	 * Tell the upper layer(s) we support long frames.
2672	 */
2673	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2674	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2675	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2676
2677	/*
2678	 * Specify the media types supported by this adapter and register
2679	 * callbacks to update media and link information
2680	 */
2681	ifmedia_init(&adapter->media, IFM_IMASK,
2682	    igb_media_change, igb_media_status);
2683	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2684	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2685		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2686			    0, NULL);
2687		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2688	} else {
2689		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2690		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2691			    0, NULL);
2692		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2693			    0, NULL);
2694		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2695			    0, NULL);
2696		if (adapter->hw.phy.type != e1000_phy_ife) {
2697			ifmedia_add(&adapter->media,
2698				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2699			ifmedia_add(&adapter->media,
2700				IFM_ETHER | IFM_1000_T, 0, NULL);
2701		}
2702	}
2703	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2704	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2705}
2706
2707
2708/*
2709 * Manage DMA'able memory.
2710 */
2711static void
2712igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2713{
2714	if (error)
2715		return;
2716	*(bus_addr_t *) arg = segs[0].ds_addr;
2717}
2718
2719static int
2720igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2721        struct igb_dma_alloc *dma, int mapflags)
2722{
2723	int error;
2724
2725	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2726				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2727				BUS_SPACE_MAXADDR,	/* lowaddr */
2728				BUS_SPACE_MAXADDR,	/* highaddr */
2729				NULL, NULL,		/* filter, filterarg */
2730				size,			/* maxsize */
2731				1,			/* nsegments */
2732				size,			/* maxsegsize */
2733				0,			/* flags */
2734				NULL,			/* lockfunc */
2735				NULL,			/* lockarg */
2736				&dma->dma_tag);
2737	if (error) {
2738		device_printf(adapter->dev,
2739		    "%s: bus_dma_tag_create failed: %d\n",
2740		    __func__, error);
2741		goto fail_0;
2742	}
2743
2744	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2745	    BUS_DMA_NOWAIT, &dma->dma_map);
2746	if (error) {
2747		device_printf(adapter->dev,
2748		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2749		    __func__, (uintmax_t)size, error);
2750		goto fail_2;
2751	}
2752
2753	dma->dma_paddr = 0;
2754	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2755	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2756	if (error || dma->dma_paddr == 0) {
2757		device_printf(adapter->dev,
2758		    "%s: bus_dmamap_load failed: %d\n",
2759		    __func__, error);
2760		goto fail_3;
2761	}
2762
2763	return (0);
2764
2765fail_3:
2766	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2767fail_2:
2768	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2769	bus_dma_tag_destroy(dma->dma_tag);
2770fail_0:
2771	dma->dma_map = NULL;
2772	dma->dma_tag = NULL;
2773
2774	return (error);
2775}
2776
2777static void
2778igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2779{
2780	if (dma->dma_tag == NULL)
2781		return;
2782	if (dma->dma_map != NULL) {
2783		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2784		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2785		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2786		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2787		dma->dma_map = NULL;
2788	}
2789	bus_dma_tag_destroy(dma->dma_tag);
2790	dma->dma_tag = NULL;
2791}
2792
2793
2794/*********************************************************************
2795 *
2796 *  Allocate memory for the transmit and receive rings, and then
2797 *  the descriptors associated with each, called only once at attach.
2798 *
2799 **********************************************************************/
2800static int
2801igb_allocate_queues(struct adapter *adapter)
2802{
2803	device_t dev = adapter->dev;
2804	struct igb_queue	*que = NULL;
2805	struct tx_ring		*txr = NULL;
2806	struct rx_ring		*rxr = NULL;
2807	int rsize, tsize, error = E1000_SUCCESS;
2808	int txconf = 0, rxconf = 0;
2809
2810	/* First allocate the top level queue structs */
2811	if (!(adapter->queues =
2812	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2813	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2814		device_printf(dev, "Unable to allocate queue memory\n");
2815		error = ENOMEM;
2816		goto fail;
2817	}
2818
2819	/* Next allocate the TX ring struct memory */
2820	if (!(adapter->tx_rings =
2821	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2822	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2823		device_printf(dev, "Unable to allocate TX ring memory\n");
2824		error = ENOMEM;
2825		goto tx_fail;
2826	}
2827
2828	/* Now allocate the RX */
2829	if (!(adapter->rx_rings =
2830	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2831	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2832		device_printf(dev, "Unable to allocate RX ring memory\n");
2833		error = ENOMEM;
2834		goto rx_fail;
2835	}
2836
2837	tsize = roundup2(adapter->num_tx_desc *
2838	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2839	/*
2840	 * Now set up the TX queues, txconf is needed to handle the
2841	 * possibility that things fail midcourse and we need to
2842	 * undo memory gracefully
2843	 */
2844	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2845		/* Set up some basics */
2846		txr = &adapter->tx_rings[i];
2847		txr->adapter = adapter;
2848		txr->me = i;
2849
2850		/* Initialize the TX lock */
2851		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2852		    device_get_nameunit(dev), txr->me);
2853		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2854
2855		if (igb_dma_malloc(adapter, tsize,
2856			&txr->txdma, BUS_DMA_NOWAIT)) {
2857			device_printf(dev,
2858			    "Unable to allocate TX Descriptor memory\n");
2859			error = ENOMEM;
2860			goto err_tx_desc;
2861		}
2862		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2863		bzero((void *)txr->tx_base, tsize);
2864
2865        	/* Now allocate transmit buffers for the ring */
2866        	if (igb_allocate_transmit_buffers(txr)) {
2867			device_printf(dev,
2868			    "Critical Failure setting up transmit buffers\n");
2869			error = ENOMEM;
2870			goto err_tx_desc;
2871        	}
2872#if __FreeBSD_version >= 800000
2873		/* Allocate a buf ring */
2874		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2875		    M_WAITOK, &txr->tx_mtx);
2876#endif
2877	}
2878
2879	/*
2880	 * Next the RX queues...
2881	 */
2882	rsize = roundup2(adapter->num_rx_desc *
2883	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2884	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2885		rxr = &adapter->rx_rings[i];
2886		rxr->adapter = adapter;
2887		rxr->me = i;
2888
2889		/* Initialize the RX lock */
2890		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2891		    device_get_nameunit(dev), txr->me);
2892		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2893
2894		if (igb_dma_malloc(adapter, rsize,
2895			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2896			device_printf(dev,
2897			    "Unable to allocate RxDescriptor memory\n");
2898			error = ENOMEM;
2899			goto err_rx_desc;
2900		}
2901		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2902		bzero((void *)rxr->rx_base, rsize);
2903
2904        	/* Allocate receive buffers for the ring*/
2905		if (igb_allocate_receive_buffers(rxr)) {
2906			device_printf(dev,
2907			    "Critical Failure setting up receive buffers\n");
2908			error = ENOMEM;
2909			goto err_rx_desc;
2910		}
2911	}
2912
2913	/*
2914	** Finally set up the queue holding structs
2915	*/
2916	for (int i = 0; i < adapter->num_queues; i++) {
2917		que = &adapter->queues[i];
2918		que->adapter = adapter;
2919		que->txr = &adapter->tx_rings[i];
2920		que->rxr = &adapter->rx_rings[i];
2921	}
2922
2923	return (0);
2924
2925err_rx_desc:
2926	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2927		igb_dma_free(adapter, &rxr->rxdma);
2928err_tx_desc:
2929	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2930		igb_dma_free(adapter, &txr->txdma);
2931	free(adapter->rx_rings, M_DEVBUF);
2932rx_fail:
2933	buf_ring_free(txr->br, M_DEVBUF);
2934	free(adapter->tx_rings, M_DEVBUF);
2935tx_fail:
2936	free(adapter->queues, M_DEVBUF);
2937fail:
2938	return (error);
2939}
2940
2941/*********************************************************************
2942 *
2943 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2944 *  the information needed to transmit a packet on the wire. This is
2945 *  called only once at attach, setup is done every reset.
2946 *
2947 **********************************************************************/
2948static int
2949igb_allocate_transmit_buffers(struct tx_ring *txr)
2950{
2951	struct adapter *adapter = txr->adapter;
2952	device_t dev = adapter->dev;
2953	struct igb_tx_buffer *txbuf;
2954	int error, i;
2955
2956	/*
2957	 * Setup DMA descriptor areas.
2958	 */
2959	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2960			       1, 0,			/* alignment, bounds */
2961			       BUS_SPACE_MAXADDR,	/* lowaddr */
2962			       BUS_SPACE_MAXADDR,	/* highaddr */
2963			       NULL, NULL,		/* filter, filterarg */
2964			       IGB_TSO_SIZE,		/* maxsize */
2965			       IGB_MAX_SCATTER,		/* nsegments */
2966			       PAGE_SIZE,		/* maxsegsize */
2967			       0,			/* flags */
2968			       NULL,			/* lockfunc */
2969			       NULL,			/* lockfuncarg */
2970			       &txr->txtag))) {
2971		device_printf(dev,"Unable to allocate TX DMA tag\n");
2972		goto fail;
2973	}
2974
2975	if (!(txr->tx_buffers =
2976	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2977	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2978		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2979		error = ENOMEM;
2980		goto fail;
2981	}
2982
2983        /* Create the descriptor buffer dma maps */
2984	txbuf = txr->tx_buffers;
2985	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2986		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2987		if (error != 0) {
2988			device_printf(dev, "Unable to create TX DMA map\n");
2989			goto fail;
2990		}
2991	}
2992
2993	return 0;
2994fail:
2995	/* We free all, it handles case where we are in the middle */
2996	igb_free_transmit_structures(adapter);
2997	return (error);
2998}
2999
3000/*********************************************************************
3001 *
3002 *  Initialize a transmit ring.
3003 *
3004 **********************************************************************/
3005static void
3006igb_setup_transmit_ring(struct tx_ring *txr)
3007{
3008	struct adapter *adapter = txr->adapter;
3009	struct igb_tx_buffer *txbuf;
3010	int i;
3011
3012	/* Clear the old descriptor contents */
3013	IGB_TX_LOCK(txr);
3014	bzero((void *)txr->tx_base,
3015	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3016	/* Reset indices */
3017	txr->next_avail_desc = 0;
3018	txr->next_to_clean = 0;
3019
3020	/* Free any existing tx buffers. */
3021        txbuf = txr->tx_buffers;
3022	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3023		if (txbuf->m_head != NULL) {
3024			bus_dmamap_sync(txr->txtag, txbuf->map,
3025			    BUS_DMASYNC_POSTWRITE);
3026			bus_dmamap_unload(txr->txtag, txbuf->map);
3027			m_freem(txbuf->m_head);
3028			txbuf->m_head = NULL;
3029		}
3030		/* clear the watch index */
3031		txbuf->next_eop = -1;
3032        }
3033
3034	/* Set number of descriptors available */
3035	txr->tx_avail = adapter->num_tx_desc;
3036
3037	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3038	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3039	IGB_TX_UNLOCK(txr);
3040}
3041
3042/*********************************************************************
3043 *
3044 *  Initialize all transmit rings.
3045 *
3046 **********************************************************************/
3047static void
3048igb_setup_transmit_structures(struct adapter *adapter)
3049{
3050	struct tx_ring *txr = adapter->tx_rings;
3051
3052	for (int i = 0; i < adapter->num_queues; i++, txr++)
3053		igb_setup_transmit_ring(txr);
3054
3055	return;
3056}
3057
3058/*********************************************************************
3059 *
3060 *  Enable transmit unit.
3061 *
3062 **********************************************************************/
3063static void
3064igb_initialize_transmit_units(struct adapter *adapter)
3065{
3066	struct tx_ring	*txr = adapter->tx_rings;
3067	struct e1000_hw *hw = &adapter->hw;
3068	u32		tctl, txdctl;
3069
3070	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3071
3072	/* Setup the Tx Descriptor Rings */
3073	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3074		u64 bus_addr = txr->txdma.dma_paddr;
3075
3076		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3077		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3078		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3079		    (uint32_t)(bus_addr >> 32));
3080		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3081		    (uint32_t)bus_addr);
3082
3083		/* Setup the HW Tx Head and Tail descriptor pointers */
3084		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3085		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3086
3087		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3088		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3089		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3090
3091		txr->watchdog_check = FALSE;
3092
3093		txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3094		txdctl |= IGB_TX_PTHRESH;
3095		txdctl |= IGB_TX_HTHRESH << 8;
3096		txdctl |= IGB_TX_WTHRESH << 16;
3097		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3098		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3099	}
3100
3101	/* Program the Transmit Control Register */
3102	tctl = E1000_READ_REG(hw, E1000_TCTL);
3103	tctl &= ~E1000_TCTL_CT;
3104	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3105		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3106
3107	e1000_config_collision_dist(hw);
3108
3109	/* This write will effectively turn on the transmit unit. */
3110	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3111}
3112
3113/*********************************************************************
3114 *
3115 *  Free all transmit rings.
3116 *
3117 **********************************************************************/
3118static void
3119igb_free_transmit_structures(struct adapter *adapter)
3120{
3121	struct tx_ring *txr = adapter->tx_rings;
3122
3123	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3124		IGB_TX_LOCK(txr);
3125		igb_free_transmit_buffers(txr);
3126		igb_dma_free(adapter, &txr->txdma);
3127		IGB_TX_UNLOCK(txr);
3128		IGB_TX_LOCK_DESTROY(txr);
3129	}
3130	free(adapter->tx_rings, M_DEVBUF);
3131}
3132
3133/*********************************************************************
3134 *
3135 *  Free transmit ring related data structures.
3136 *
3137 **********************************************************************/
3138static void
3139igb_free_transmit_buffers(struct tx_ring *txr)
3140{
3141	struct adapter *adapter = txr->adapter;
3142	struct igb_tx_buffer *tx_buffer;
3143	int             i;
3144
3145	INIT_DEBUGOUT("free_transmit_ring: begin");
3146
3147	if (txr->tx_buffers == NULL)
3148		return;
3149
3150	tx_buffer = txr->tx_buffers;
3151	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3152		if (tx_buffer->m_head != NULL) {
3153			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3154			    BUS_DMASYNC_POSTWRITE);
3155			bus_dmamap_unload(txr->txtag,
3156			    tx_buffer->map);
3157			m_freem(tx_buffer->m_head);
3158			tx_buffer->m_head = NULL;
3159			if (tx_buffer->map != NULL) {
3160				bus_dmamap_destroy(txr->txtag,
3161				    tx_buffer->map);
3162				tx_buffer->map = NULL;
3163			}
3164		} else if (tx_buffer->map != NULL) {
3165			bus_dmamap_unload(txr->txtag,
3166			    tx_buffer->map);
3167			bus_dmamap_destroy(txr->txtag,
3168			    tx_buffer->map);
3169			tx_buffer->map = NULL;
3170		}
3171	}
3172#if __FreeBSD_version >= 800000
3173	if (txr->br != NULL)
3174		buf_ring_free(txr->br, M_DEVBUF);
3175#endif
3176	if (txr->tx_buffers != NULL) {
3177		free(txr->tx_buffers, M_DEVBUF);
3178		txr->tx_buffers = NULL;
3179	}
3180	if (txr->txtag != NULL) {
3181		bus_dma_tag_destroy(txr->txtag);
3182		txr->txtag = NULL;
3183	}
3184	return;
3185}
3186
3187/**********************************************************************
3188 *
3189 *  Setup work for hardware segmentation offload (TSO)
3190 *
3191 **********************************************************************/
3192static boolean_t
3193igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3194{
3195	struct adapter *adapter = txr->adapter;
3196	struct e1000_adv_tx_context_desc *TXD;
3197	struct igb_tx_buffer        *tx_buffer;
3198	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3199	u32 mss_l4len_idx = 0;
3200	u16 vtag = 0;
3201	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3202	struct ether_vlan_header *eh;
3203	struct ip *ip;
3204	struct tcphdr *th;
3205
3206
3207	/*
3208	 * Determine where frame payload starts.
3209	 * Jump over vlan headers if already present
3210	 */
3211	eh = mtod(mp, struct ether_vlan_header *);
3212	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3213		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3214	else
3215		ehdrlen = ETHER_HDR_LEN;
3216
3217	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3218	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3219		return FALSE;
3220
3221	/* Only supports IPV4 for now */
3222	ctxd = txr->next_avail_desc;
3223	tx_buffer = &txr->tx_buffers[ctxd];
3224	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3225
3226	ip = (struct ip *)(mp->m_data + ehdrlen);
3227	if (ip->ip_p != IPPROTO_TCP)
3228                return FALSE;   /* 0 */
3229	ip->ip_sum = 0;
3230	ip_hlen = ip->ip_hl << 2;
3231	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3232	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3233	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3234	tcp_hlen = th->th_off << 2;
3235	/*
3236	 * Calculate header length, this is used
3237	 * in the transmit desc in igb_xmit
3238	 */
3239	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3240
3241	/* VLAN MACLEN IPLEN */
3242	if (mp->m_flags & M_VLANTAG) {
3243		vtag = htole16(mp->m_pkthdr.ether_vtag);
3244		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3245	}
3246
3247	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3248	vlan_macip_lens |= ip_hlen;
3249	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3250
3251	/* ADV DTYPE TUCMD */
3252	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3253	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3254	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3255	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3256
3257	/* MSS L4LEN IDX */
3258	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3259	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3260	/* 82575 needs the queue index added */
3261	if (adapter->hw.mac.type == e1000_82575)
3262		mss_l4len_idx |= txr->me << 4;
3263	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3264
3265	TXD->seqnum_seed = htole32(0);
3266	tx_buffer->m_head = NULL;
3267	tx_buffer->next_eop = -1;
3268
3269	if (++ctxd == adapter->num_tx_desc)
3270		ctxd = 0;
3271
3272	txr->tx_avail--;
3273	txr->next_avail_desc = ctxd;
3274	return TRUE;
3275}
3276
3277
3278/*********************************************************************
3279 *
3280 *  Context Descriptor setup for VLAN or CSUM
3281 *
3282 **********************************************************************/
3283
3284static bool
3285igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3286{
3287	struct adapter *adapter = txr->adapter;
3288	struct e1000_adv_tx_context_desc *TXD;
3289	struct igb_tx_buffer        *tx_buffer;
3290	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3291	struct ether_vlan_header *eh;
3292	struct ip *ip = NULL;
3293	struct ip6_hdr *ip6;
3294	int  ehdrlen, ctxd, ip_hlen = 0;
3295	u16	etype, vtag = 0;
3296	u8	ipproto = 0;
3297	bool	offload = TRUE;
3298
3299	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3300		offload = FALSE;
3301
3302	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3303	ctxd = txr->next_avail_desc;
3304	tx_buffer = &txr->tx_buffers[ctxd];
3305	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3306
3307	/*
3308	** In advanced descriptors the vlan tag must
3309	** be placed into the context descriptor, thus
3310	** we need to be here just for that setup.
3311	*/
3312	if (mp->m_flags & M_VLANTAG) {
3313		vtag = htole16(mp->m_pkthdr.ether_vtag);
3314		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3315	} else if (offload == FALSE)
3316		return FALSE;
3317
3318	/*
3319	 * Determine where frame payload starts.
3320	 * Jump over vlan headers if already present,
3321	 * helpful for QinQ too.
3322	 */
3323	eh = mtod(mp, struct ether_vlan_header *);
3324	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3325		etype = ntohs(eh->evl_proto);
3326		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3327	} else {
3328		etype = ntohs(eh->evl_encap_proto);
3329		ehdrlen = ETHER_HDR_LEN;
3330	}
3331
3332	/* Set the ether header length */
3333	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3334
3335	switch (etype) {
3336		case ETHERTYPE_IP:
3337			ip = (struct ip *)(mp->m_data + ehdrlen);
3338			ip_hlen = ip->ip_hl << 2;
3339			if (mp->m_len < ehdrlen + ip_hlen) {
3340				offload = FALSE;
3341				break;
3342			}
3343			ipproto = ip->ip_p;
3344			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3345			break;
3346		case ETHERTYPE_IPV6:
3347			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3348			ip_hlen = sizeof(struct ip6_hdr);
3349			if (mp->m_len < ehdrlen + ip_hlen)
3350				return (FALSE);
3351			ipproto = ip6->ip6_nxt;
3352			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3353			break;
3354		default:
3355			offload = FALSE;
3356			break;
3357	}
3358
3359	vlan_macip_lens |= ip_hlen;
3360	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3361
3362	switch (ipproto) {
3363		case IPPROTO_TCP:
3364			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3365				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3366			break;
3367		case IPPROTO_UDP:
3368			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3369				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3370			break;
3371#if __FreeBSD_version >= 800000
3372		case IPPROTO_SCTP:
3373			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3374				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3375			break;
3376#endif
3377		default:
3378			offload = FALSE;
3379			break;
3380	}
3381
3382	/* 82575 needs the queue index added */
3383	if (adapter->hw.mac.type == e1000_82575)
3384		mss_l4len_idx = txr->me << 4;
3385
3386	/* Now copy bits into descriptor */
3387	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3388	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3389	TXD->seqnum_seed = htole32(0);
3390	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3391
3392	tx_buffer->m_head = NULL;
3393	tx_buffer->next_eop = -1;
3394
3395	/* We've consumed the first desc, adjust counters */
3396	if (++ctxd == adapter->num_tx_desc)
3397		ctxd = 0;
3398	txr->next_avail_desc = ctxd;
3399	--txr->tx_avail;
3400
3401        return (offload);
3402}
3403
3404
3405/**********************************************************************
3406 *
3407 *  Examine each tx_buffer in the used queue. If the hardware is done
3408 *  processing the packet then free associated resources. The
3409 *  tx_buffer is put back on the free queue.
3410 *
3411 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3412 **********************************************************************/
3413static bool
3414igb_txeof(struct tx_ring *txr)
3415{
3416	struct adapter	*adapter = txr->adapter;
3417        int first, last, done;
3418        struct igb_tx_buffer *tx_buffer;
3419        struct e1000_tx_desc   *tx_desc, *eop_desc;
3420	struct ifnet   *ifp = adapter->ifp;
3421
3422	IGB_TX_LOCK_ASSERT(txr);
3423
3424        if (txr->tx_avail == adapter->num_tx_desc)
3425                return FALSE;
3426
3427        first = txr->next_to_clean;
3428        tx_desc = &txr->tx_base[first];
3429        tx_buffer = &txr->tx_buffers[first];
3430	last = tx_buffer->next_eop;
3431        eop_desc = &txr->tx_base[last];
3432
3433	/*
3434	 * What this does is get the index of the
3435	 * first descriptor AFTER the EOP of the
3436	 * first packet, that way we can do the
3437	 * simple comparison on the inner while loop.
3438	 */
3439	if (++last == adapter->num_tx_desc)
3440 		last = 0;
3441	done = last;
3442
3443        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3444            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3445
3446        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3447		/* We clean the range of the packet */
3448		while (first != done) {
3449                	tx_desc->upper.data = 0;
3450                	tx_desc->lower.data = 0;
3451                	tx_desc->buffer_addr = 0;
3452                	++txr->tx_avail;
3453
3454			if (tx_buffer->m_head) {
3455				txr->bytes +=
3456				    tx_buffer->m_head->m_pkthdr.len;
3457				bus_dmamap_sync(txr->txtag,
3458				    tx_buffer->map,
3459				    BUS_DMASYNC_POSTWRITE);
3460				bus_dmamap_unload(txr->txtag,
3461				    tx_buffer->map);
3462
3463                        	m_freem(tx_buffer->m_head);
3464                        	tx_buffer->m_head = NULL;
3465                	}
3466			tx_buffer->next_eop = -1;
3467			txr->watchdog_time = ticks;
3468
3469	                if (++first == adapter->num_tx_desc)
3470				first = 0;
3471
3472	                tx_buffer = &txr->tx_buffers[first];
3473			tx_desc = &txr->tx_base[first];
3474		}
3475		++txr->packets;
3476		++ifp->if_opackets;
3477		/* See if we can continue to the next packet */
3478		last = tx_buffer->next_eop;
3479		if (last != -1) {
3480        		eop_desc = &txr->tx_base[last];
3481			/* Get new done point */
3482			if (++last == adapter->num_tx_desc) last = 0;
3483			done = last;
3484		} else
3485			break;
3486        }
3487        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3488            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3489
3490        txr->next_to_clean = first;
3491
3492        /*
3493         * If we have enough room, clear IFF_DRV_OACTIVE
3494         * to tell the stack that it is OK to send packets.
3495         */
3496        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3497                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3498		/* All clean, turn off the watchdog */
3499                if (txr->tx_avail == adapter->num_tx_desc) {
3500			txr->watchdog_check = FALSE;
3501			return FALSE;
3502		}
3503        }
3504
3505	return (TRUE);
3506}
3507
3508
3509/*********************************************************************
3510 *
3511 *  Refresh mbuf buffers for RX descriptor rings
3512 *   - now keeps its own state so discards due to resource
3513 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3514 *     it just returns, keeping its placeholder, thus it can simply
3515 *     be recalled to try again.
3516 *
3517 **********************************************************************/
3518static void
3519igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3520{
3521	struct adapter		*adapter = rxr->adapter;
3522	bus_dma_segment_t	hseg[1];
3523	bus_dma_segment_t	pseg[1];
3524	struct igb_rx_buf	*rxbuf;
3525	struct mbuf		*mh, *mp;
3526	int			i, nsegs, error, cleaned;
3527
3528	i = rxr->next_to_refresh;
3529	cleaned = -1; /* Signify no completions */
3530	while (i != limit) {
3531		rxbuf = &rxr->rx_buffers[i];
3532		if (rxbuf->m_head == NULL) {
3533			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3534			if (mh == NULL)
3535				goto update;
3536			mh->m_pkthdr.len = mh->m_len = MHLEN;
3537			mh->m_len = MHLEN;
3538			mh->m_flags |= M_PKTHDR;
3539			m_adj(mh, ETHER_ALIGN);
3540			/* Get the memory mapping */
3541			error = bus_dmamap_load_mbuf_sg(rxr->htag,
3542			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3543			if (error != 0) {
3544				printf("GET BUF: dmamap load"
3545				    " failure - %d\n", error);
3546				m_free(mh);
3547				goto update;
3548			}
3549			rxbuf->m_head = mh;
3550			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3551			    BUS_DMASYNC_PREREAD);
3552			rxr->rx_base[i].read.hdr_addr =
3553			    htole64(hseg[0].ds_addr);
3554		}
3555
3556		if (rxbuf->m_pack == NULL) {
3557			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3558			    M_PKTHDR, adapter->rx_mbuf_sz);
3559			if (mp == NULL)
3560				goto update;
3561			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3562			/* Get the memory mapping */
3563			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3564			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3565			if (error != 0) {
3566				printf("GET BUF: dmamap load"
3567				    " failure - %d\n", error);
3568				m_free(mp);
3569				goto update;
3570			}
3571			rxbuf->m_pack = mp;
3572			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3573			    BUS_DMASYNC_PREREAD);
3574			rxr->rx_base[i].read.pkt_addr =
3575			    htole64(pseg[0].ds_addr);
3576		}
3577
3578		cleaned = i;
3579		/* Calculate next index */
3580		if (++i == adapter->num_rx_desc)
3581			i = 0;
3582		/* This is the work marker for refresh */
3583		rxr->next_to_refresh = i;
3584	}
3585update:
3586	if (cleaned != -1) /* If we refreshed some, bump tail */
3587		E1000_WRITE_REG(&adapter->hw,
3588		    E1000_RDT(rxr->me), cleaned);
3589	return;
3590}
3591
3592
3593/*********************************************************************
3594 *
3595 *  Allocate memory for rx_buffer structures. Since we use one
3596 *  rx_buffer per received packet, the maximum number of rx_buffer's
3597 *  that we'll need is equal to the number of receive descriptors
3598 *  that we've allocated.
3599 *
3600 **********************************************************************/
3601static int
3602igb_allocate_receive_buffers(struct rx_ring *rxr)
3603{
3604	struct	adapter 	*adapter = rxr->adapter;
3605	device_t 		dev = adapter->dev;
3606	struct igb_rx_buf	*rxbuf;
3607	int             	i, bsize, error;
3608
3609	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3610	if (!(rxr->rx_buffers =
3611	    (struct igb_rx_buf *) malloc(bsize,
3612	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3613		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3614		error = ENOMEM;
3615		goto fail;
3616	}
3617
3618	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3619				   1, 0,		/* alignment, bounds */
3620				   BUS_SPACE_MAXADDR,	/* lowaddr */
3621				   BUS_SPACE_MAXADDR,	/* highaddr */
3622				   NULL, NULL,		/* filter, filterarg */
3623				   MSIZE,		/* maxsize */
3624				   1,			/* nsegments */
3625				   MSIZE,		/* maxsegsize */
3626				   0,			/* flags */
3627				   NULL,		/* lockfunc */
3628				   NULL,		/* lockfuncarg */
3629				   &rxr->htag))) {
3630		device_printf(dev, "Unable to create RX DMA tag\n");
3631		goto fail;
3632	}
3633
3634	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3635				   1, 0,		/* alignment, bounds */
3636				   BUS_SPACE_MAXADDR,	/* lowaddr */
3637				   BUS_SPACE_MAXADDR,	/* highaddr */
3638				   NULL, NULL,		/* filter, filterarg */
3639				   MJUMPAGESIZE,	/* maxsize */
3640				   1,			/* nsegments */
3641				   MJUMPAGESIZE,	/* maxsegsize */
3642				   0,			/* flags */
3643				   NULL,		/* lockfunc */
3644				   NULL,		/* lockfuncarg */
3645				   &rxr->ptag))) {
3646		device_printf(dev, "Unable to create RX payload DMA tag\n");
3647		goto fail;
3648	}
3649
3650	for (i = 0; i < adapter->num_rx_desc; i++) {
3651		rxbuf = &rxr->rx_buffers[i];
3652		error = bus_dmamap_create(rxr->htag,
3653		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3654		if (error) {
3655			device_printf(dev,
3656			    "Unable to create RX head DMA maps\n");
3657			goto fail;
3658		}
3659		error = bus_dmamap_create(rxr->ptag,
3660		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3661		if (error) {
3662			device_printf(dev,
3663			    "Unable to create RX packet DMA maps\n");
3664			goto fail;
3665		}
3666	}
3667
3668	return (0);
3669
3670fail:
3671	/* Frees all, but can handle partial completion */
3672	igb_free_receive_structures(adapter);
3673	return (error);
3674}
3675
3676
3677static void
3678igb_free_receive_ring(struct rx_ring *rxr)
3679{
3680	struct	adapter		*adapter;
3681	struct igb_rx_buf	*rxbuf;
3682	int i;
3683
3684	adapter = rxr->adapter;
3685	for (i = 0; i < adapter->num_rx_desc; i++) {
3686		rxbuf = &rxr->rx_buffers[i];
3687		if (rxbuf->m_head != NULL) {
3688			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3689			    BUS_DMASYNC_POSTREAD);
3690			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3691			rxbuf->m_head->m_flags |= M_PKTHDR;
3692			m_freem(rxbuf->m_head);
3693		}
3694		if (rxbuf->m_pack != NULL) {
3695			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3696			    BUS_DMASYNC_POSTREAD);
3697			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3698			rxbuf->m_pack->m_flags |= M_PKTHDR;
3699			m_freem(rxbuf->m_pack);
3700		}
3701		rxbuf->m_head = NULL;
3702		rxbuf->m_pack = NULL;
3703	}
3704}
3705
3706
3707/*********************************************************************
3708 *
3709 *  Initialize a receive ring and its buffers.
3710 *
3711 **********************************************************************/
3712static int
3713igb_setup_receive_ring(struct rx_ring *rxr)
3714{
3715	struct	adapter		*adapter;
3716	struct  ifnet		*ifp;
3717	device_t		dev;
3718	struct igb_rx_buf	*rxbuf;
3719	bus_dma_segment_t	pseg[1], hseg[1];
3720	struct lro_ctrl		*lro = &rxr->lro;
3721	int			rsize, nsegs, error = 0;
3722
3723	adapter = rxr->adapter;
3724	dev = adapter->dev;
3725	ifp = adapter->ifp;
3726
3727	/* Clear the ring contents */
3728	IGB_RX_LOCK(rxr);
3729	rsize = roundup2(adapter->num_rx_desc *
3730	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3731	bzero((void *)rxr->rx_base, rsize);
3732
3733	/*
3734	** Free current RX buffer structures and their mbufs
3735	*/
3736	igb_free_receive_ring(rxr);
3737
3738        /* Now replenish the ring mbufs */
3739	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3740		struct mbuf	*mh, *mp;
3741
3742		rxbuf = &rxr->rx_buffers[j];
3743
3744		/* First the header */
3745		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3746		if (rxbuf->m_head == NULL)
3747                        goto fail;
3748		m_adj(rxbuf->m_head, ETHER_ALIGN);
3749		mh = rxbuf->m_head;
3750		mh->m_len = mh->m_pkthdr.len = MHLEN;
3751		mh->m_flags |= M_PKTHDR;
3752		/* Get the memory mapping */
3753		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3754		    rxbuf->hmap, rxbuf->m_head, hseg,
3755		    &nsegs, BUS_DMA_NOWAIT);
3756		if (error != 0) /* Nothing elegant to do here */
3757                        goto fail;
3758		bus_dmamap_sync(rxr->htag,
3759		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3760		/* Update descriptor */
3761		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3762
3763		/* Now the payload cluster */
3764		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3765		    M_PKTHDR, adapter->rx_mbuf_sz);
3766		if (rxbuf->m_pack == NULL)
3767                        goto fail;
3768		mp = rxbuf->m_pack;
3769		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3770		/* Get the memory mapping */
3771		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3772		    rxbuf->pmap, mp, pseg,
3773		    &nsegs, BUS_DMA_NOWAIT);
3774		if (error != 0)
3775                        goto fail;
3776		bus_dmamap_sync(rxr->ptag,
3777		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3778		/* Update descriptor */
3779		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3780        }
3781	rxr->next_to_refresh = 0;
3782	rxr->lro_enabled = FALSE;
3783
3784	if (igb_header_split)
3785		rxr->hdr_split = TRUE;
3786	else
3787		ifp->if_capabilities &= ~IFCAP_LRO;
3788
3789	rxr->fmp = NULL;
3790	rxr->lmp = NULL;
3791	rxr->discard = FALSE;
3792
3793	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3794	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3795
3796	/*
3797	** Now set up the LRO interface, we
3798	** also only do head split when LRO
3799	** is enabled, since so often they
3800	** are undesireable in similar setups.
3801	*/
3802	if (ifp->if_capenable & IFCAP_LRO) {
3803		int err = tcp_lro_init(lro);
3804		if (err) {
3805			device_printf(dev, "LRO Initialization failed!\n");
3806			goto fail;
3807		}
3808		INIT_DEBUGOUT("RX LRO Initialized\n");
3809		rxr->lro_enabled = TRUE;
3810		lro->ifp = adapter->ifp;
3811	}
3812
3813	IGB_RX_UNLOCK(rxr);
3814	return (0);
3815
3816fail:
3817	igb_free_receive_ring(rxr);
3818	IGB_RX_UNLOCK(rxr);
3819	return (error);
3820}
3821
3822/*********************************************************************
3823 *
3824 *  Initialize all receive rings.
3825 *
3826 **********************************************************************/
3827static int
3828igb_setup_receive_structures(struct adapter *adapter)
3829{
3830	struct rx_ring *rxr = adapter->rx_rings;
3831	int i, j;
3832
3833	for (i = 0; i < adapter->num_queues; i++, rxr++)
3834		if (igb_setup_receive_ring(rxr))
3835			goto fail;
3836
3837	return (0);
3838fail:
3839	/*
3840	 * Free RX buffers allocated so far, we will only handle
3841	 * the rings that completed, the failing case will have
3842	 * cleaned up for itself. The value of 'i' will be the
3843	 * failed ring so we must pre-decrement it.
3844	 */
3845	rxr = adapter->rx_rings;
3846	for (--i; i > 0; i--, rxr++) {
3847		for (j = 0; j < adapter->num_rx_desc; j++)
3848			igb_free_receive_ring(rxr);
3849	}
3850
3851	return (ENOBUFS);
3852}
3853
3854/*********************************************************************
3855 *
3856 *  Enable receive unit.
3857 *
3858 **********************************************************************/
3859static void
3860igb_initialize_receive_units(struct adapter *adapter)
3861{
3862	struct rx_ring	*rxr = adapter->rx_rings;
3863	struct ifnet	*ifp = adapter->ifp;
3864	struct e1000_hw *hw = &adapter->hw;
3865	u32		rctl, rxcsum, psize, srrctl = 0;
3866
3867	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3868
3869	/*
3870	 * Make sure receives are disabled while setting
3871	 * up the descriptor ring
3872	 */
3873	rctl = E1000_READ_REG(hw, E1000_RCTL);
3874	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3875
3876	/*
3877	** Set up for header split
3878	*/
3879	if (rxr->hdr_split) {
3880		/* Use a standard mbuf for the header */
3881		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3882		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3883	} else
3884		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3885
3886	/*
3887	** Set up for jumbo frames
3888	*/
3889	if (ifp->if_mtu > ETHERMTU) {
3890		rctl |= E1000_RCTL_LPE;
3891		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3892		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3893
3894		/* Set maximum packet len */
3895		psize = adapter->max_frame_size;
3896		/* are we on a vlan? */
3897		if (adapter->ifp->if_vlantrunk != NULL)
3898			psize += VLAN_TAG_SIZE;
3899		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3900	} else {
3901		rctl &= ~E1000_RCTL_LPE;
3902		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3903		rctl |= E1000_RCTL_SZ_2048;
3904	}
3905
3906	/* Setup the Base and Length of the Rx Descriptor Rings */
3907	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3908		u64 bus_addr = rxr->rxdma.dma_paddr;
3909		u32 rxdctl;
3910
3911		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3912		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3913		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3914		    (uint32_t)(bus_addr >> 32));
3915		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3916		    (uint32_t)bus_addr);
3917		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3918		/* Enable this Queue */
3919		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3920		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3921		rxdctl &= 0xFFF00000;
3922		rxdctl |= IGB_RX_PTHRESH;
3923		rxdctl |= IGB_RX_HTHRESH << 8;
3924		rxdctl |= IGB_RX_WTHRESH << 16;
3925		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3926	}
3927
3928	/*
3929	** Setup for RX MultiQueue
3930	*/
3931	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3932	if (adapter->num_queues >1) {
3933		u32 random[10], mrqc, shift = 0;
3934		union igb_reta {
3935			u32 dword;
3936			u8  bytes[4];
3937		} reta;
3938
3939		arc4rand(&random, sizeof(random), 0);
3940		if (adapter->hw.mac.type == e1000_82575)
3941			shift = 6;
3942		/* Warning FM follows */
3943		for (int i = 0; i < 128; i++) {
3944			reta.bytes[i & 3] =
3945			    (i % adapter->num_queues) << shift;
3946			if ((i & 3) == 3)
3947				E1000_WRITE_REG(hw,
3948				    E1000_RETA(i >> 2), reta.dword);
3949		}
3950		/* Now fill in hash table */
3951		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3952		for (int i = 0; i < 10; i++)
3953			E1000_WRITE_REG_ARRAY(hw,
3954			    E1000_RSSRK(0), i, random[i]);
3955
3956		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3957		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3958		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3959		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3960		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3961		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3962		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3963		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3964
3965		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3966
3967		/*
3968		** NOTE: Receive Full-Packet Checksum Offload
3969		** is mutually exclusive with Multiqueue. However
3970		** this is not the same as TCP/IP checksums which
3971		** still work.
3972		*/
3973		rxcsum |= E1000_RXCSUM_PCSD;
3974#if __FreeBSD_version >= 800000
3975		/* For SCTP Offload */
3976		if ((hw->mac.type == e1000_82576)
3977		    && (ifp->if_capenable & IFCAP_RXCSUM))
3978			rxcsum |= E1000_RXCSUM_CRCOFL;
3979#endif
3980	} else {
3981		/* Non RSS setup */
3982		if (ifp->if_capenable & IFCAP_RXCSUM) {
3983			rxcsum |= E1000_RXCSUM_IPPCSE;
3984#if __FreeBSD_version >= 800000
3985			if (adapter->hw.mac.type == e1000_82576)
3986				rxcsum |= E1000_RXCSUM_CRCOFL;
3987#endif
3988		} else
3989			rxcsum &= ~E1000_RXCSUM_TUOFL;
3990	}
3991	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
3992
3993	/* Setup the Receive Control Register */
3994	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3995	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3996		   E1000_RCTL_RDMTS_HALF |
3997		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3998	/* Strip CRC bytes. */
3999	rctl |= E1000_RCTL_SECRC;
4000	/* Make sure VLAN Filters are off */
4001	rctl &= ~E1000_RCTL_VFE;
4002	/* Don't store bad packets */
4003	rctl &= ~E1000_RCTL_SBP;
4004
4005	/* Enable Receives */
4006	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4007
4008	/*
4009	 * Setup the HW Rx Head and Tail Descriptor Pointers
4010	 *   - needs to be after enable
4011	 */
4012	for (int i = 0; i < adapter->num_queues; i++) {
4013		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4014		E1000_WRITE_REG(hw, E1000_RDT(i),
4015		     adapter->num_rx_desc - 1);
4016	}
4017	return;
4018}
4019
4020/*********************************************************************
4021 *
4022 *  Free receive rings.
4023 *
4024 **********************************************************************/
4025static void
4026igb_free_receive_structures(struct adapter *adapter)
4027{
4028	struct rx_ring *rxr = adapter->rx_rings;
4029
4030	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4031		struct lro_ctrl	*lro = &rxr->lro;
4032		igb_free_receive_buffers(rxr);
4033		tcp_lro_free(lro);
4034		igb_dma_free(adapter, &rxr->rxdma);
4035	}
4036
4037	free(adapter->rx_rings, M_DEVBUF);
4038}
4039
4040/*********************************************************************
4041 *
4042 *  Free receive ring data structures.
4043 *
4044 **********************************************************************/
4045static void
4046igb_free_receive_buffers(struct rx_ring *rxr)
4047{
4048	struct adapter		*adapter = rxr->adapter;
4049	struct igb_rx_buf	*rxbuf;
4050	int i;
4051
4052	INIT_DEBUGOUT("free_receive_structures: begin");
4053
4054	/* Cleanup any existing buffers */
4055	if (rxr->rx_buffers != NULL) {
4056		for (i = 0; i < adapter->num_rx_desc; i++) {
4057			rxbuf = &rxr->rx_buffers[i];
4058			if (rxbuf->m_head != NULL) {
4059				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4060				    BUS_DMASYNC_POSTREAD);
4061				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4062				rxbuf->m_head->m_flags |= M_PKTHDR;
4063				m_freem(rxbuf->m_head);
4064			}
4065			if (rxbuf->m_pack != NULL) {
4066				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4067				    BUS_DMASYNC_POSTREAD);
4068				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4069				rxbuf->m_pack->m_flags |= M_PKTHDR;
4070				m_freem(rxbuf->m_pack);
4071			}
4072			rxbuf->m_head = NULL;
4073			rxbuf->m_pack = NULL;
4074			if (rxbuf->hmap != NULL) {
4075				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4076				rxbuf->hmap = NULL;
4077			}
4078			if (rxbuf->pmap != NULL) {
4079				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4080				rxbuf->pmap = NULL;
4081			}
4082		}
4083		if (rxr->rx_buffers != NULL) {
4084			free(rxr->rx_buffers, M_DEVBUF);
4085			rxr->rx_buffers = NULL;
4086		}
4087	}
4088
4089	if (rxr->htag != NULL) {
4090		bus_dma_tag_destroy(rxr->htag);
4091		rxr->htag = NULL;
4092	}
4093	if (rxr->ptag != NULL) {
4094		bus_dma_tag_destroy(rxr->ptag);
4095		rxr->ptag = NULL;
4096	}
4097}
4098
4099static __inline void
4100igb_rx_discard(struct rx_ring *rxr, int i)
4101{
4102	struct adapter		*adapter = rxr->adapter;
4103	struct igb_rx_buf	*rbuf;
4104	struct mbuf             *mh, *mp;
4105
4106	rbuf = &rxr->rx_buffers[i];
4107	if (rxr->fmp != NULL) {
4108		rxr->fmp->m_flags |= M_PKTHDR;
4109		m_freem(rxr->fmp);
4110		rxr->fmp = NULL;
4111		rxr->lmp = NULL;
4112	}
4113
4114	mh = rbuf->m_head;
4115	mp = rbuf->m_pack;
4116
4117	/* Reuse loaded DMA map and just update mbuf chain */
4118	mh->m_len = MHLEN;
4119	mh->m_flags |= M_PKTHDR;
4120	mh->m_next = NULL;
4121
4122	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4123	mp->m_data = mp->m_ext.ext_buf;
4124	mp->m_next = NULL;
4125	return;
4126}
4127
4128static __inline void
4129igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4130{
4131
4132	/*
4133	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4134	 * should be computed by hardware. Also it should not have VLAN tag in
4135	 * ethernet header.
4136	 */
4137	if (rxr->lro_enabled &&
4138	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4139	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4140	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4141	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4142	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4143	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4144		/*
4145		 * Send to the stack if:
4146		 **  - LRO not enabled, or
4147		 **  - no LRO resources, or
4148		 **  - lro enqueue fails
4149		 */
4150		if (rxr->lro.lro_cnt != 0)
4151			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4152				return;
4153	}
4154	(*ifp->if_input)(ifp, m);
4155}
4156
4157/*********************************************************************
4158 *
4159 *  This routine executes in interrupt context. It replenishes
4160 *  the mbufs in the descriptor and sends data which has been
4161 *  dma'ed into host memory to upper layer.
4162 *
4163 *  We loop at most count times if count is > 0, or until done if
4164 *  count < 0.
4165 *
4166 *  Return TRUE if more to clean, FALSE otherwise
4167 *********************************************************************/
4168static bool
4169igb_rxeof(struct igb_queue *que, int count)
4170{
4171	struct adapter		*adapter = que->adapter;
4172	struct rx_ring		*rxr = que->rxr;
4173	struct ifnet		*ifp = adapter->ifp;
4174	struct lro_ctrl		*lro = &rxr->lro;
4175	struct lro_entry	*queued;
4176	int			i, processed = 0;
4177	u32			ptype, staterr = 0;
4178	union e1000_adv_rx_desc	*cur;
4179
4180	IGB_RX_LOCK(rxr);
4181	/* Sync the ring. */
4182	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4183	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4184
4185	/* Main clean loop */
4186	for (i = rxr->next_to_check; count != 0;) {
4187		struct mbuf		*sendmp, *mh, *mp;
4188		struct igb_rx_buf	*rxbuf;
4189		u16			hlen, plen, hdr, vtag;
4190		bool			eop = FALSE;
4191
4192		cur = &rxr->rx_base[i];
4193		staterr = le32toh(cur->wb.upper.status_error);
4194		if ((staterr & E1000_RXD_STAT_DD) == 0)
4195			break;
4196		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4197			break;
4198		count--;
4199		sendmp = mh = mp = NULL;
4200		cur->wb.upper.status_error = 0;
4201		rxbuf = &rxr->rx_buffers[i];
4202		plen = le16toh(cur->wb.upper.length);
4203		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4204		vtag = le16toh(cur->wb.upper.vlan);
4205		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4206		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4207
4208		/* Make sure all segments of a bad packet are discarded */
4209		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4210		    (rxr->discard)) {
4211			ifp->if_ierrors++;
4212			++rxr->rx_discarded;
4213			if (!eop) /* Catch subsequent segs */
4214				rxr->discard = TRUE;
4215			else
4216				rxr->discard = FALSE;
4217			igb_rx_discard(rxr, i);
4218			goto next_desc;
4219		}
4220
4221		/*
4222		** The way the hardware is configured to
4223		** split, it will ONLY use the header buffer
4224		** when header split is enabled, otherwise we
4225		** get normal behavior, ie, both header and
4226		** payload are DMA'd into the payload buffer.
4227		**
4228		** The fmp test is to catch the case where a
4229		** packet spans multiple descriptors, in that
4230		** case only the first header is valid.
4231		*/
4232		if (rxr->hdr_split && rxr->fmp == NULL) {
4233			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4234			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4235			if (hlen > IGB_HDR_BUF)
4236				hlen = IGB_HDR_BUF;
4237			/* Handle the header mbuf */
4238			mh = rxr->rx_buffers[i].m_head;
4239			mh->m_len = hlen;
4240			/* clear buf info for refresh */
4241			rxbuf->m_head = NULL;
4242			/*
4243			** Get the payload length, this
4244			** could be zero if its a small
4245			** packet.
4246			*/
4247			if (plen > 0) {
4248				mp = rxr->rx_buffers[i].m_pack;
4249				mp->m_len = plen;
4250				mh->m_next = mp;
4251				/* clear buf info for refresh */
4252				rxbuf->m_pack = NULL;
4253				rxr->rx_split_packets++;
4254			}
4255		} else {
4256			/*
4257			** Either no header split, or a
4258			** secondary piece of a fragmented
4259			** split packet.
4260			*/
4261			mh = rxr->rx_buffers[i].m_pack;
4262			mh->m_len = plen;
4263			/* clear buf info for refresh */
4264			rxbuf->m_pack = NULL;
4265		}
4266
4267		++processed; /* So we know when to refresh */
4268
4269		/* Initial frame - setup */
4270		if (rxr->fmp == NULL) {
4271			mh->m_pkthdr.len = mh->m_len;
4272			/* Store the first mbuf */
4273			rxr->fmp = mh;
4274			rxr->lmp = mh;
4275			if (mp != NULL) {
4276				/* Add payload if split */
4277				mh->m_pkthdr.len += mp->m_len;
4278				rxr->lmp = mh->m_next;
4279			}
4280		} else {
4281			/* Chain mbuf's together */
4282			rxr->lmp->m_next = mh;
4283			rxr->lmp = rxr->lmp->m_next;
4284			rxr->fmp->m_pkthdr.len += mh->m_len;
4285		}
4286
4287		if (eop) {
4288			rxr->fmp->m_pkthdr.rcvif = ifp;
4289			ifp->if_ipackets++;
4290			rxr->rx_packets++;
4291			/* capture data for AIM */
4292			rxr->packets++;
4293			rxr->bytes += rxr->fmp->m_pkthdr.len;
4294			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4295
4296			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4297				igb_rx_checksum(staterr, rxr->fmp, ptype);
4298
4299			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4300			    (staterr & E1000_RXD_STAT_VP) != 0) {
4301				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4302				rxr->fmp->m_flags |= M_VLANTAG;
4303			}
4304#if __FreeBSD_version >= 800000
4305			rxr->fmp->m_pkthdr.flowid = que->msix;
4306			rxr->fmp->m_flags |= M_FLOWID;
4307#endif
4308			sendmp = rxr->fmp;
4309			/* Make sure to set M_PKTHDR. */
4310			sendmp->m_flags |= M_PKTHDR;
4311			rxr->fmp = NULL;
4312			rxr->lmp = NULL;
4313		}
4314
4315next_desc:
4316		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4317		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4318
4319		/* Advance our pointers to the next descriptor. */
4320		if (++i == adapter->num_rx_desc)
4321			i = 0;
4322		/*
4323		** Send to the stack or LRO
4324		*/
4325		if (sendmp != NULL)
4326			igb_rx_input(rxr, ifp, sendmp, ptype);
4327
4328		/* Every 8 descriptors we go to refresh mbufs */
4329		if (processed == 8) {
4330                        igb_refresh_mbufs(rxr, i);
4331                        processed = 0;
4332		}
4333	}
4334
4335	/* Catch any remainders */
4336	if (processed != 0) {
4337		igb_refresh_mbufs(rxr, i);
4338		processed = 0;
4339	}
4340
4341	rxr->next_to_check = i;
4342
4343	/*
4344	 * Flush any outstanding LRO work
4345	 */
4346	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4347		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4348		tcp_lro_flush(lro, queued);
4349	}
4350
4351	IGB_RX_UNLOCK(rxr);
4352
4353	/*
4354	** We still have cleaning to do?
4355	** Schedule another interrupt if so.
4356	*/
4357	if ((staterr & E1000_RXD_STAT_DD) != 0)
4358		return (TRUE);
4359
4360	return (FALSE);
4361}
4362
4363/*********************************************************************
4364 *
4365 *  Verify that the hardware indicated that the checksum is valid.
4366 *  Inform the stack about the status of checksum so that stack
4367 *  doesn't spend time verifying the checksum.
4368 *
4369 *********************************************************************/
4370static void
4371igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4372{
4373	u16 status = (u16)staterr;
4374	u8  errors = (u8) (staterr >> 24);
4375	int sctp;
4376
4377	/* Ignore Checksum bit is set */
4378	if (status & E1000_RXD_STAT_IXSM) {
4379		mp->m_pkthdr.csum_flags = 0;
4380		return;
4381	}
4382
4383	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4384	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4385		sctp = 1;
4386	else
4387		sctp = 0;
4388	if (status & E1000_RXD_STAT_IPCS) {
4389		/* Did it pass? */
4390		if (!(errors & E1000_RXD_ERR_IPE)) {
4391			/* IP Checksum Good */
4392			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4393			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4394		} else
4395			mp->m_pkthdr.csum_flags = 0;
4396	}
4397
4398	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4399		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4400#if __FreeBSD_version >= 800000
4401		if (sctp) /* reassign */
4402			type = CSUM_SCTP_VALID;
4403#endif
4404		/* Did it pass? */
4405		if (!(errors & E1000_RXD_ERR_TCPE)) {
4406			mp->m_pkthdr.csum_flags |= type;
4407			if (sctp == 0)
4408				mp->m_pkthdr.csum_data = htons(0xffff);
4409		}
4410	}
4411	return;
4412}
4413
4414/*
4415 * This routine is run via an vlan
4416 * config EVENT
4417 */
4418static void
4419igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4420{
4421	struct adapter	*adapter = ifp->if_softc;
4422	u32		index, bit;
4423
4424	if (ifp->if_softc !=  arg)   /* Not our event */
4425		return;
4426
4427	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4428                return;
4429
4430	index = (vtag >> 5) & 0x7F;
4431	bit = vtag & 0x1F;
4432	igb_shadow_vfta[index] |= (1 << bit);
4433	++adapter->num_vlans;
4434	/* Re-init to load the changes */
4435	igb_init(adapter);
4436}
4437
4438/*
4439 * This routine is run via an vlan
4440 * unconfig EVENT
4441 */
4442static void
4443igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4444{
4445	struct adapter	*adapter = ifp->if_softc;
4446	u32		index, bit;
4447
4448	if (ifp->if_softc !=  arg)
4449		return;
4450
4451	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4452                return;
4453
4454	index = (vtag >> 5) & 0x7F;
4455	bit = vtag & 0x1F;
4456	igb_shadow_vfta[index] &= ~(1 << bit);
4457	--adapter->num_vlans;
4458	/* Re-init to load the changes */
4459	igb_init(adapter);
4460}
4461
4462static void
4463igb_setup_vlan_hw_support(struct adapter *adapter)
4464{
4465	struct e1000_hw *hw = &adapter->hw;
4466	u32             reg;
4467
4468	/*
4469	** We get here thru init_locked, meaning
4470	** a soft reset, this has already cleared
4471	** the VFTA and other state, so if there
4472	** have been no vlan's registered do nothing.
4473	*/
4474	if (adapter->num_vlans == 0)
4475                return;
4476
4477	/*
4478	** A soft reset zero's out the VFTA, so
4479	** we need to repopulate it now.
4480	*/
4481	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4482                if (igb_shadow_vfta[i] != 0)
4483			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4484                            i, igb_shadow_vfta[i]);
4485
4486	reg = E1000_READ_REG(hw, E1000_CTRL);
4487	reg |= E1000_CTRL_VME;
4488	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4489
4490	/* Enable the Filter Table */
4491	reg = E1000_READ_REG(hw, E1000_RCTL);
4492	reg &= ~E1000_RCTL_CFIEN;
4493	reg |= E1000_RCTL_VFE;
4494	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4495
4496	/* Update the frame size */
4497	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4498	    adapter->max_frame_size + VLAN_TAG_SIZE);
4499}
4500
4501static void
4502igb_enable_intr(struct adapter *adapter)
4503{
4504	/* With RSS set up what to auto clear */
4505	if (adapter->msix_mem) {
4506		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4507		    adapter->eims_mask);
4508		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4509		    adapter->eims_mask);
4510		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4511		    adapter->eims_mask);
4512		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4513		    E1000_IMS_LSC);
4514	} else {
4515		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4516		    IMS_ENABLE_MASK);
4517	}
4518	E1000_WRITE_FLUSH(&adapter->hw);
4519
4520	return;
4521}
4522
4523static void
4524igb_disable_intr(struct adapter *adapter)
4525{
4526	if (adapter->msix_mem) {
4527		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4528		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4529	}
4530	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4531	E1000_WRITE_FLUSH(&adapter->hw);
4532	return;
4533}
4534
4535/*
4536 * Bit of a misnomer, what this really means is
4537 * to enable OS management of the system... aka
4538 * to disable special hardware management features
4539 */
4540static void
4541igb_init_manageability(struct adapter *adapter)
4542{
4543	if (adapter->has_manage) {
4544		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4545		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4546
4547		/* disable hardware interception of ARP */
4548		manc &= ~(E1000_MANC_ARP_EN);
4549
4550                /* enable receiving management packets to the host */
4551		manc |= E1000_MANC_EN_MNG2HOST;
4552		manc2h |= 1 << 5;  /* Mng Port 623 */
4553		manc2h |= 1 << 6;  /* Mng Port 664 */
4554		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4555		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4556	}
4557}
4558
4559/*
4560 * Give control back to hardware management
4561 * controller if there is one.
4562 */
4563static void
4564igb_release_manageability(struct adapter *adapter)
4565{
4566	if (adapter->has_manage) {
4567		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4568
4569		/* re-enable hardware interception of ARP */
4570		manc |= E1000_MANC_ARP_EN;
4571		manc &= ~E1000_MANC_EN_MNG2HOST;
4572
4573		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4574	}
4575}
4576
4577/*
4578 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4579 * For ASF and Pass Through versions of f/w this means that
4580 * the driver is loaded.
4581 *
4582 */
4583static void
4584igb_get_hw_control(struct adapter *adapter)
4585{
4586	u32 ctrl_ext;
4587
4588	/* Let firmware know the driver has taken over */
4589	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4590	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4591	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4592}
4593
4594/*
4595 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4596 * For ASF and Pass Through versions of f/w this means that the
4597 * driver is no longer loaded.
4598 *
4599 */
4600static void
4601igb_release_hw_control(struct adapter *adapter)
4602{
4603	u32 ctrl_ext;
4604
4605	/* Let firmware taken over control of h/w */
4606	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4607	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4608	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4609}
4610
4611static int
4612igb_is_valid_ether_addr(uint8_t *addr)
4613{
4614	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4615
4616	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4617		return (FALSE);
4618	}
4619
4620	return (TRUE);
4621}
4622
4623
4624/*
4625 * Enable PCI Wake On Lan capability
4626 */
4627static void
4628igb_enable_wakeup(device_t dev)
4629{
4630	u16     cap, status;
4631	u8      id;
4632
4633	/* First find the capabilities pointer*/
4634	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4635	/* Read the PM Capabilities */
4636	id = pci_read_config(dev, cap, 1);
4637	if (id != PCIY_PMG)     /* Something wrong */
4638		return;
4639	/* OK, we have the power capabilities, so
4640	   now get the status register */
4641	cap += PCIR_POWER_STATUS;
4642	status = pci_read_config(dev, cap, 2);
4643	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4644	pci_write_config(dev, cap, status, 2);
4645	return;
4646}
4647
4648static void
4649igb_led_func(void *arg, int onoff)
4650{
4651	struct adapter	*adapter = arg;
4652
4653	IGB_CORE_LOCK(adapter);
4654	if (onoff) {
4655		e1000_setup_led(&adapter->hw);
4656		e1000_led_on(&adapter->hw);
4657	} else {
4658		e1000_led_off(&adapter->hw);
4659		e1000_cleanup_led(&adapter->hw);
4660	}
4661	IGB_CORE_UNLOCK(adapter);
4662}
4663
4664/**********************************************************************
4665 *
4666 *  Update the board statistics counters.
4667 *
4668 **********************************************************************/
4669static void
4670igb_update_stats_counters(struct adapter *adapter)
4671{
4672	struct ifnet   *ifp;
4673
4674	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4675	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4676		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4677		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4678	}
4679	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4680	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4681	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4682	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4683
4684	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4685	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4686	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4687	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4688	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4689	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4690	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4691	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4692	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4693	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4694	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4695	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4696	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4697	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4698	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4699	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4700	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4701	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4702	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4703	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4704
4705	/* For the 64-bit byte counters the low dword must be read first. */
4706	/* Both registers clear on the read of the high dword */
4707
4708	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4709	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4710
4711	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4712	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4713	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4714	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4715	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4716
4717	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4718	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4719
4720	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4721	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4722	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4723	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4724	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4725	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4726	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4727	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4728	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4729	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4730
4731	adapter->stats.algnerrc +=
4732		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4733	adapter->stats.rxerrc +=
4734		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4735	adapter->stats.tncrs +=
4736		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4737	adapter->stats.cexterr +=
4738		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4739	adapter->stats.tsctc +=
4740		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4741	adapter->stats.tsctfc +=
4742		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4743	ifp = adapter->ifp;
4744
4745	ifp->if_collisions = adapter->stats.colc;
4746
4747	/* Rx Errors */
4748	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4749	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4750	    adapter->stats.ruc + adapter->stats.roc +
4751	    adapter->stats.mpc + adapter->stats.cexterr;
4752
4753	/* Tx Errors */
4754	ifp->if_oerrors = adapter->stats.ecol +
4755	    adapter->stats.latecol + adapter->watchdog_events;
4756}
4757
4758
4759/**********************************************************************
4760 *
4761 *  This routine is called only when igb_display_debug_stats is enabled.
4762 *  This routine provides a way to take a look at important statistics
4763 *  maintained by the driver and hardware.
4764 *
4765 **********************************************************************/
4766static void
4767igb_print_debug_info(struct adapter *adapter)
4768{
4769	device_t dev = adapter->dev;
4770	struct igb_queue *que = adapter->queues;
4771	struct rx_ring *rxr = adapter->rx_rings;
4772	struct tx_ring *txr = adapter->tx_rings;
4773	uint8_t *hw_addr = adapter->hw.hw_addr;
4774
4775	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4776	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4777	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4778	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4779
4780#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4781	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4782	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4783	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4784#endif
4785
4786	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4787	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4788	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4789	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4790	    adapter->hw.fc.high_water,
4791	    adapter->hw.fc.low_water);
4792
4793	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4794		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4795		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4796		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4797		device_printf(dev, "rdh = %d, rdt = %d\n",
4798		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4799		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4800		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4801		    txr->me, (long long)txr->no_desc_avail);
4802		device_printf(dev, "TX(%d) Packets sent = %lld\n",
4803		    txr->me, (long long)txr->tx_packets);
4804		device_printf(dev, "RX(%d) Packets received = %lld  ",
4805		    rxr->me, (long long)rxr->rx_packets);
4806	}
4807
4808	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4809		struct lro_ctrl *lro = &rxr->lro;
4810		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4811		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4812		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4813		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4814		    (long long)rxr->rx_packets);
4815		device_printf(dev, " Split Packets = %lld ",
4816		    (long long)rxr->rx_split_packets);
4817		device_printf(dev, " Byte count = %lld\n",
4818		    (long long)rxr->rx_bytes);
4819		device_printf(dev,"RX(%d) LRO Queued= %d  ",
4820		    i, lro->lro_queued);
4821		device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4822	}
4823
4824	for (int i = 0; i < adapter->num_queues; i++, que++)
4825		device_printf(dev,"QUE(%d) IRQs = %llx\n",
4826		    i, (long long)que->irqs);
4827
4828	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4829	device_printf(dev, "Mbuf defrag failed = %ld\n",
4830	    adapter->mbuf_defrag_failed);
4831	device_printf(dev, "Std mbuf header failed = %ld\n",
4832	    adapter->mbuf_header_failed);
4833	device_printf(dev, "Std mbuf packet failed = %ld\n",
4834	    adapter->mbuf_packet_failed);
4835	device_printf(dev, "Driver dropped packets = %ld\n",
4836	    adapter->dropped_pkts);
4837	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4838		adapter->no_tx_dma_setup);
4839}
4840
4841static void
4842igb_print_hw_stats(struct adapter *adapter)
4843{
4844	device_t dev = adapter->dev;
4845
4846	device_printf(dev, "Excessive collisions = %lld\n",
4847	    (long long)adapter->stats.ecol);
4848#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4849	device_printf(dev, "Symbol errors = %lld\n",
4850	    (long long)adapter->stats.symerrs);
4851#endif
4852	device_printf(dev, "Sequence errors = %lld\n",
4853	    (long long)adapter->stats.sec);
4854	device_printf(dev, "Defer count = %lld\n",
4855	    (long long)adapter->stats.dc);
4856	device_printf(dev, "Missed Packets = %lld\n",
4857	    (long long)adapter->stats.mpc);
4858	device_printf(dev, "Receive No Buffers = %lld\n",
4859	    (long long)adapter->stats.rnbc);
4860	/* RLEC is inaccurate on some hardware, calculate our own. */
4861	device_printf(dev, "Receive Length Errors = %lld\n",
4862	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4863	device_printf(dev, "Receive errors = %lld\n",
4864	    (long long)adapter->stats.rxerrc);
4865	device_printf(dev, "Crc errors = %lld\n",
4866	    (long long)adapter->stats.crcerrs);
4867	device_printf(dev, "Alignment errors = %lld\n",
4868	    (long long)adapter->stats.algnerrc);
4869	/* On 82575 these are collision counts */
4870	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4871	    (long long)adapter->stats.cexterr);
4872	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4873	device_printf(dev, "watchdog timeouts = %ld\n",
4874	    adapter->watchdog_events);
4875	device_printf(dev, "XON Rcvd = %lld\n",
4876	    (long long)adapter->stats.xonrxc);
4877	device_printf(dev, "XON Xmtd = %lld\n",
4878	    (long long)adapter->stats.xontxc);
4879	device_printf(dev, "XOFF Rcvd = %lld\n",
4880	    (long long)adapter->stats.xoffrxc);
4881	device_printf(dev, "XOFF Xmtd = %lld\n",
4882	    (long long)adapter->stats.xofftxc);
4883	device_printf(dev, "Good Packets Rcvd = %lld\n",
4884	    (long long)adapter->stats.gprc);
4885	device_printf(dev, "Good Packets Xmtd = %lld\n",
4886	    (long long)adapter->stats.gptc);
4887	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4888	    (long long)adapter->stats.tsctc);
4889	device_printf(dev, "TSO Contexts Failed = %lld\n",
4890	    (long long)adapter->stats.tsctfc);
4891}
4892
4893/**********************************************************************
4894 *
4895 *  This routine provides a way to dump out the adapter eeprom,
4896 *  often a useful debug/service tool. This only dumps the first
4897 *  32 words, stuff that matters is in that extent.
4898 *
4899 **********************************************************************/
4900static void
4901igb_print_nvm_info(struct adapter *adapter)
4902{
4903	u16	eeprom_data;
4904	int	i, j, row = 0;
4905
4906	/* Its a bit crude, but it gets the job done */
4907	printf("\nInterface EEPROM Dump:\n");
4908	printf("Offset\n0x0000  ");
4909	for (i = 0, j = 0; i < 32; i++, j++) {
4910		if (j == 8) { /* Make the offset block */
4911			j = 0; ++row;
4912			printf("\n0x00%x0  ",row);
4913		}
4914		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4915		printf("%04x ", eeprom_data);
4916	}
4917	printf("\n");
4918}
4919
4920static int
4921igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4922{
4923	struct adapter *adapter;
4924	int error;
4925	int result;
4926
4927	result = -1;
4928	error = sysctl_handle_int(oidp, &result, 0, req);
4929
4930	if (error || !req->newptr)
4931		return (error);
4932
4933	if (result == 1) {
4934		adapter = (struct adapter *)arg1;
4935		igb_print_debug_info(adapter);
4936	}
4937	/*
4938	 * This value will cause a hex dump of the
4939	 * first 32 16-bit words of the EEPROM to
4940	 * the screen.
4941	 */
4942	if (result == 2) {
4943		adapter = (struct adapter *)arg1;
4944		igb_print_nvm_info(adapter);
4945        }
4946
4947	return (error);
4948}
4949
4950
4951static int
4952igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4953{
4954	struct adapter *adapter;
4955	int error;
4956	int result;
4957
4958	result = -1;
4959	error = sysctl_handle_int(oidp, &result, 0, req);
4960
4961	if (error || !req->newptr)
4962		return (error);
4963
4964	if (result == 1) {
4965		adapter = (struct adapter *)arg1;
4966		igb_print_hw_stats(adapter);
4967	}
4968
4969	return (error);
4970}
4971
4972static void
4973igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4974	const char *description, int *limit, int value)
4975{
4976	*limit = value;
4977	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4978	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4979	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4980}
4981