if_igb.c revision 206431
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 206431 2010-04-09 21:18:46Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 1.9.4";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136						PCI_ANY_ID, PCI_ANY_ID, 0},
137	/* required last entry */
138	{ 0, 0, 0, 0, 0}
139};
140
141/*********************************************************************
142 *  Table of branding strings for all supported NICs.
143 *********************************************************************/
144
145static char *igb_strings[] = {
146	"Intel(R) PRO/1000 Network Connection"
147};
148
149/*********************************************************************
150 *  Function prototypes
151 *********************************************************************/
152static int	igb_probe(device_t);
153static int	igb_attach(device_t);
154static int	igb_detach(device_t);
155static int	igb_shutdown(device_t);
156static int	igb_suspend(device_t);
157static int	igb_resume(device_t);
158static void	igb_start(struct ifnet *);
159static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160#if __FreeBSD_version >= 800000
161static int	igb_mq_start(struct ifnet *, struct mbuf *);
162static int	igb_mq_start_locked(struct ifnet *,
163		    struct tx_ring *, struct mbuf *);
164static void	igb_qflush(struct ifnet *);
165#endif
166static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
167static void	igb_init(void *);
168static void	igb_init_locked(struct adapter *);
169static void	igb_stop(void *);
170static void	igb_media_status(struct ifnet *, struct ifmediareq *);
171static int	igb_media_change(struct ifnet *);
172static void	igb_identify_hardware(struct adapter *);
173static int	igb_allocate_pci_resources(struct adapter *);
174static int	igb_allocate_msix(struct adapter *);
175static int	igb_allocate_legacy(struct adapter *);
176static int	igb_setup_msix(struct adapter *);
177static void	igb_free_pci_resources(struct adapter *);
178static void	igb_local_timer(void *);
179static void	igb_reset(struct adapter *);
180static void	igb_setup_interface(device_t, struct adapter *);
181static int	igb_allocate_queues(struct adapter *);
182static void	igb_configure_queues(struct adapter *);
183
184static int	igb_allocate_transmit_buffers(struct tx_ring *);
185static void	igb_setup_transmit_structures(struct adapter *);
186static void	igb_setup_transmit_ring(struct tx_ring *);
187static void	igb_initialize_transmit_units(struct adapter *);
188static void	igb_free_transmit_structures(struct adapter *);
189static void	igb_free_transmit_buffers(struct tx_ring *);
190
191static int	igb_allocate_receive_buffers(struct rx_ring *);
192static int	igb_setup_receive_structures(struct adapter *);
193static int	igb_setup_receive_ring(struct rx_ring *);
194static void	igb_initialize_receive_units(struct adapter *);
195static void	igb_free_receive_structures(struct adapter *);
196static void	igb_free_receive_buffers(struct rx_ring *);
197static void	igb_free_receive_ring(struct rx_ring *);
198
199static void	igb_enable_intr(struct adapter *);
200static void	igb_disable_intr(struct adapter *);
201static void	igb_update_stats_counters(struct adapter *);
202static bool	igb_txeof(struct tx_ring *);
203
204static __inline	void igb_rx_discard(struct rx_ring *, int);
205static __inline void igb_rx_input(struct rx_ring *,
206		    struct ifnet *, struct mbuf *, u32);
207
208static bool	igb_rxeof(struct igb_queue *, int);
209static void	igb_rx_checksum(u32, struct mbuf *, u32);
210static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212static void	igb_set_promisc(struct adapter *);
213static void	igb_disable_promisc(struct adapter *);
214static void	igb_set_multi(struct adapter *);
215static void	igb_print_hw_stats(struct adapter *);
216static void	igb_update_link_status(struct adapter *);
217static void	igb_refresh_mbufs(struct rx_ring *, int);
218
219static void	igb_register_vlan(void *, struct ifnet *, u16);
220static void	igb_unregister_vlan(void *, struct ifnet *, u16);
221static void	igb_setup_vlan_hw_support(struct adapter *);
222
223static int	igb_xmit(struct tx_ring *, struct mbuf **);
224static int	igb_dma_malloc(struct adapter *, bus_size_t,
225		    struct igb_dma_alloc *, int);
226static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227static void	igb_print_debug_info(struct adapter *);
228static void	igb_print_nvm_info(struct adapter *);
229static int 	igb_is_valid_ether_addr(u8 *);
230static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232/* Management and WOL Support */
233static void	igb_init_manageability(struct adapter *);
234static void	igb_release_manageability(struct adapter *);
235static void     igb_get_hw_control(struct adapter *);
236static void     igb_release_hw_control(struct adapter *);
237static void     igb_enable_wakeup(device_t);
238static void     igb_led_func(void *, int);
239
240static int	igb_irq_fast(void *);
241static void	igb_add_rx_process_limit(struct adapter *, const char *,
242		    const char *, int *, int);
243static void	igb_handle_rxtx(void *context, int pending);
244static void	igb_handle_que(void *context, int pending);
245static void	igb_handle_link(void *context, int pending);
246
247/* These are MSIX only irq handlers */
248static void	igb_msix_que(void *);
249static void	igb_msix_link(void *);
250
251#ifdef DEVICE_POLLING
252static poll_handler_t igb_poll;
253#endif /* POLLING */
254
255/*********************************************************************
256 *  FreeBSD Device Interface Entry Points
257 *********************************************************************/
258
259static device_method_t igb_methods[] = {
260	/* Device interface */
261	DEVMETHOD(device_probe, igb_probe),
262	DEVMETHOD(device_attach, igb_attach),
263	DEVMETHOD(device_detach, igb_detach),
264	DEVMETHOD(device_shutdown, igb_shutdown),
265	DEVMETHOD(device_suspend, igb_suspend),
266	DEVMETHOD(device_resume, igb_resume),
267	{0, 0}
268};
269
270static driver_t igb_driver = {
271	"igb", igb_methods, sizeof(struct adapter),
272};
273
274static devclass_t igb_devclass;
275DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276MODULE_DEPEND(igb, pci, 1, 1, 1);
277MODULE_DEPEND(igb, ether, 1, 1, 1);
278
279/*********************************************************************
280 *  Tunable default values.
281 *********************************************************************/
282
283/* Descriptor defaults */
284static int igb_rxd = IGB_DEFAULT_RXD;
285static int igb_txd = IGB_DEFAULT_TXD;
286TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287TUNABLE_INT("hw.igb.txd", &igb_txd);
288
289/*
290** AIM: Adaptive Interrupt Moderation
291** which means that the interrupt rate
292** is varied over time based on the
293** traffic for that interrupt vector
294*/
295static int igb_enable_aim = TRUE;
296TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297
298/*
299 * MSIX should be the default for best performance,
300 * but this allows it to be forced off for testing.
301 */
302static int igb_enable_msix = 1;
303TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
304
305/*
306 * Header split has seemed to be beneficial in
307 * many circumstances tested, however there have
308 * been some stability issues, so the default is
309 * off.
310 */
311static bool igb_header_split = FALSE;
312TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
313
314/*
315** This will autoconfigure based on
316** the number of CPUs if left at 0.
317*/
318static int igb_num_queues = 0;
319TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
320
321/* How many packets rxeof tries to clean at a time */
322static int igb_rx_process_limit = 100;
323TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
324
325/* Flow control setting - default to FULL */
326static int igb_fc_setting = e1000_fc_full;
327TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
328
329/*
330** Shadow VFTA table, this is needed because
331** the real filter table gets cleared during
332** a soft reset and the driver needs to be able
333** to repopulate it.
334*/
335static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
336
337
338/*********************************************************************
339 *  Device identification routine
340 *
341 *  igb_probe determines if the driver should be loaded on
342 *  adapter based on PCI vendor/device id of the adapter.
343 *
344 *  return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
346
347static int
348igb_probe(device_t dev)
349{
350	char		adapter_name[60];
351	uint16_t	pci_vendor_id = 0;
352	uint16_t	pci_device_id = 0;
353	uint16_t	pci_subvendor_id = 0;
354	uint16_t	pci_subdevice_id = 0;
355	igb_vendor_info_t *ent;
356
357	INIT_DEBUGOUT("igb_probe: begin");
358
359	pci_vendor_id = pci_get_vendor(dev);
360	if (pci_vendor_id != IGB_VENDOR_ID)
361		return (ENXIO);
362
363	pci_device_id = pci_get_device(dev);
364	pci_subvendor_id = pci_get_subvendor(dev);
365	pci_subdevice_id = pci_get_subdevice(dev);
366
367	ent = igb_vendor_info_array;
368	while (ent->vendor_id != 0) {
369		if ((pci_vendor_id == ent->vendor_id) &&
370		    (pci_device_id == ent->device_id) &&
371
372		    ((pci_subvendor_id == ent->subvendor_id) ||
373		    (ent->subvendor_id == PCI_ANY_ID)) &&
374
375		    ((pci_subdevice_id == ent->subdevice_id) ||
376		    (ent->subdevice_id == PCI_ANY_ID))) {
377			sprintf(adapter_name, "%s %s",
378				igb_strings[ent->index],
379				igb_driver_version);
380			device_set_desc_copy(dev, adapter_name);
381			return (BUS_PROBE_DEFAULT);
382		}
383		ent++;
384	}
385
386	return (ENXIO);
387}
388
389/*********************************************************************
390 *  Device initialization routine
391 *
392 *  The attach entry point is called when the driver is being loaded.
393 *  This routine identifies the type of hardware, allocates all resources
394 *  and initializes the hardware.
395 *
396 *  return 0 on success, positive on failure
397 *********************************************************************/
398
399static int
400igb_attach(device_t dev)
401{
402	struct adapter	*adapter;
403	int		error = 0;
404	u16		eeprom_data;
405
406	INIT_DEBUGOUT("igb_attach: begin");
407
408	adapter = device_get_softc(dev);
409	adapter->dev = adapter->osdep.dev = dev;
410	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412	/* SYSCTL stuff */
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    igb_sysctl_debug_info, "I", "Debug Information");
417
418	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421	    igb_sysctl_stats, "I", "Statistics");
422
423	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426	    &igb_fc_setting, 0, "Flow Control");
427
428	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431	    &igb_enable_aim, 1, "Interrupt Moderation");
432
433	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435	/* Determine hardware and mac info */
436	igb_identify_hardware(adapter);
437
438	/* Setup PCI resources */
439	if (igb_allocate_pci_resources(adapter)) {
440		device_printf(dev, "Allocation of PCI resources failed\n");
441		error = ENXIO;
442		goto err_pci;
443	}
444
445	/* Do Shared Code initialization */
446	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447		device_printf(dev, "Setup of Shared code failed\n");
448		error = ENXIO;
449		goto err_pci;
450	}
451
452	e1000_get_bus_info(&adapter->hw);
453
454	/* Sysctls for limiting the amount of work done in the taskqueue */
455	igb_add_rx_process_limit(adapter, "rx_processing_limit",
456	    "max number of rx packets to process", &adapter->rx_process_limit,
457	    igb_rx_process_limit);
458
459	/*
460	 * Validate number of transmit and receive descriptors. It
461	 * must not exceed hardware maximum, and must be multiple
462	 * of E1000_DBA_ALIGN.
463	 */
464	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467		    IGB_DEFAULT_TXD, igb_txd);
468		adapter->num_tx_desc = IGB_DEFAULT_TXD;
469	} else
470		adapter->num_tx_desc = igb_txd;
471	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474		    IGB_DEFAULT_RXD, igb_rxd);
475		adapter->num_rx_desc = IGB_DEFAULT_RXD;
476	} else
477		adapter->num_rx_desc = igb_rxd;
478
479	adapter->hw.mac.autoneg = DO_AUTO_NEG;
480	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483	/* Copper options */
484	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485		adapter->hw.phy.mdix = AUTO_ALL_MODES;
486		adapter->hw.phy.disable_polarity_correction = FALSE;
487		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488	}
489
490	/*
491	 * Set the frame limits assuming
492	 * standard ethernet sized frames.
493	 */
494	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497	/*
498	** Allocate and Setup Queues
499	*/
500	if (igb_allocate_queues(adapter)) {
501		error = ENOMEM;
502		goto err_pci;
503	}
504
505	/*
506	** Start from a known state, this is
507	** important in reading the nvm and
508	** mac from that.
509	*/
510	e1000_reset_hw(&adapter->hw);
511
512	/* Make sure we have a good EEPROM before we read from it */
513	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514		/*
515		** Some PCI-E parts fail the first check due to
516		** the link being in sleep state, call it again,
517		** if it fails a second time its a real issue.
518		*/
519		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520			device_printf(dev,
521			    "The EEPROM Checksum Is Not Valid\n");
522			error = EIO;
523			goto err_late;
524		}
525	}
526
527	/*
528	** Copy the permanent MAC address out of the EEPROM
529	*/
530	if (e1000_read_mac_addr(&adapter->hw) < 0) {
531		device_printf(dev, "EEPROM read error while reading MAC"
532		    " address\n");
533		error = EIO;
534		goto err_late;
535	}
536	/* Check its sanity */
537	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538		device_printf(dev, "Invalid MAC address\n");
539		error = EIO;
540		goto err_late;
541	}
542
543	/*
544	** Configure Interrupts
545	*/
546	if ((adapter->msix > 1) && (igb_enable_msix))
547		error = igb_allocate_msix(adapter);
548	else /* MSI or Legacy */
549		error = igb_allocate_legacy(adapter);
550	if (error)
551		goto err_late;
552
553	/* Setup OS specific network interface */
554	igb_setup_interface(dev, adapter);
555
556	/* Now get a good starting state */
557	igb_reset(adapter);
558
559	/* Initialize statistics */
560	igb_update_stats_counters(adapter);
561
562	adapter->hw.mac.get_link_status = 1;
563	igb_update_link_status(adapter);
564
565	/* Indicate SOL/IDER usage */
566	if (e1000_check_reset_block(&adapter->hw))
567		device_printf(dev,
568		    "PHY reset is blocked due to SOL/IDER session.\n");
569
570	/* Determine if we have to control management hardware */
571	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
572
573	/*
574	 * Setup Wake-on-Lan
575	 */
576	/* APME bit in EEPROM is mapped to WUC.APME */
577	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
578	if (eeprom_data)
579		adapter->wol = E1000_WUFC_MAG;
580
581	/* Register for VLAN events */
582	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586
587	/* Tell the stack that the interface is not active */
588	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
589
590	adapter->led_dev = led_create(igb_led_func, adapter,
591	    device_get_nameunit(dev));
592
593	INIT_DEBUGOUT("igb_attach: end");
594
595	return (0);
596
597err_late:
598	igb_free_transmit_structures(adapter);
599	igb_free_receive_structures(adapter);
600	igb_release_hw_control(adapter);
601err_pci:
602	igb_free_pci_resources(adapter);
603	IGB_CORE_LOCK_DESTROY(adapter);
604
605	return (error);
606}
607
608/*********************************************************************
609 *  Device removal routine
610 *
611 *  The detach entry point is called when the driver is being removed.
612 *  This routine stops the adapter and deallocates all the resources
613 *  that were allocated for driver operation.
614 *
615 *  return 0 on success, positive on failure
616 *********************************************************************/
617
618static int
619igb_detach(device_t dev)
620{
621	struct adapter	*adapter = device_get_softc(dev);
622	struct ifnet	*ifp = adapter->ifp;
623
624	INIT_DEBUGOUT("igb_detach: begin");
625
626	/* Make sure VLANS are not using driver */
627	if (adapter->ifp->if_vlantrunk != NULL) {
628		device_printf(dev,"Vlan in use, detach first\n");
629		return (EBUSY);
630	}
631
632	if (adapter->led_dev != NULL)
633		led_destroy(adapter->led_dev);
634
635#ifdef DEVICE_POLLING
636	if (ifp->if_capenable & IFCAP_POLLING)
637		ether_poll_deregister(ifp);
638#endif
639
640	IGB_CORE_LOCK(adapter);
641	adapter->in_detach = 1;
642	igb_stop(adapter);
643	IGB_CORE_UNLOCK(adapter);
644
645	e1000_phy_hw_reset(&adapter->hw);
646
647	/* Give control back to firmware */
648	igb_release_manageability(adapter);
649	igb_release_hw_control(adapter);
650
651	if (adapter->wol) {
652		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654		igb_enable_wakeup(dev);
655	}
656
657	/* Unregister VLAN events */
658	if (adapter->vlan_attach != NULL)
659		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660	if (adapter->vlan_detach != NULL)
661		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
662
663	ether_ifdetach(adapter->ifp);
664
665	callout_drain(&adapter->timer);
666
667	igb_free_pci_resources(adapter);
668	bus_generic_detach(dev);
669	if_free(ifp);
670
671	igb_free_transmit_structures(adapter);
672	igb_free_receive_structures(adapter);
673
674	IGB_CORE_LOCK_DESTROY(adapter);
675
676	return (0);
677}
678
679/*********************************************************************
680 *
681 *  Shutdown entry point
682 *
683 **********************************************************************/
684
685static int
686igb_shutdown(device_t dev)
687{
688	return igb_suspend(dev);
689}
690
691/*
692 * Suspend/resume device methods.
693 */
694static int
695igb_suspend(device_t dev)
696{
697	struct adapter *adapter = device_get_softc(dev);
698
699	IGB_CORE_LOCK(adapter);
700
701	igb_stop(adapter);
702
703        igb_release_manageability(adapter);
704	igb_release_hw_control(adapter);
705
706        if (adapter->wol) {
707                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709                igb_enable_wakeup(dev);
710        }
711
712	IGB_CORE_UNLOCK(adapter);
713
714	return bus_generic_suspend(dev);
715}
716
717static int
718igb_resume(device_t dev)
719{
720	struct adapter *adapter = device_get_softc(dev);
721	struct ifnet *ifp = adapter->ifp;
722
723	IGB_CORE_LOCK(adapter);
724	igb_init_locked(adapter);
725	igb_init_manageability(adapter);
726
727	if ((ifp->if_flags & IFF_UP) &&
728	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
729		igb_start(ifp);
730
731	IGB_CORE_UNLOCK(adapter);
732
733	return bus_generic_resume(dev);
734}
735
736
737/*********************************************************************
738 *  Transmit entry point
739 *
740 *  igb_start is called by the stack to initiate a transmit.
741 *  The driver will remain in this routine as long as there are
742 *  packets to transmit and transmit resources are available.
743 *  In case resources are not available stack is notified and
744 *  the packet is requeued.
745 **********************************************************************/
746
747static void
748igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749{
750	struct adapter	*adapter = ifp->if_softc;
751	struct mbuf	*m_head;
752
753	IGB_TX_LOCK_ASSERT(txr);
754
755	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
756	    IFF_DRV_RUNNING)
757		return;
758	if (!adapter->link_active)
759		return;
760
761	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
762
763		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
764		if (m_head == NULL)
765			break;
766		/*
767		 *  Encapsulation can modify our pointer, and or make it
768		 *  NULL on failure.  In that event, we can't requeue.
769		 */
770		if (igb_xmit(txr, &m_head)) {
771			if (m_head == NULL)
772				break;
773			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
774			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775			break;
776		}
777
778		/* Send a copy of the frame to the BPF listener */
779		ETHER_BPF_MTAP(ifp, m_head);
780
781		/* Set watchdog on */
782		txr->watchdog_check = TRUE;
783	}
784}
785
786/*
787 * Legacy TX driver routine, called from the
788 * stack, always uses tx[0], and spins for it.
789 * Should not be used with multiqueue tx
790 */
791static void
792igb_start(struct ifnet *ifp)
793{
794	struct adapter	*adapter = ifp->if_softc;
795	struct tx_ring	*txr = adapter->tx_rings;
796
797	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
798		IGB_TX_LOCK(txr);
799		igb_start_locked(txr, ifp);
800		IGB_TX_UNLOCK(txr);
801	}
802	return;
803}
804
805#if __FreeBSD_version >= 800000
806/*
807** Multiqueue Transmit driver
808**
809*/
810static int
811igb_mq_start(struct ifnet *ifp, struct mbuf *m)
812{
813	struct adapter	*adapter = ifp->if_softc;
814	struct tx_ring	*txr;
815	int 		i = 0, err = 0;
816
817	/* Which queue to use */
818	if ((m->m_flags & M_FLOWID) != 0)
819		i = m->m_pkthdr.flowid % adapter->num_queues;
820	else
821		i = curcpu % adapter->num_queues;
822
823	txr = &adapter->tx_rings[i];
824
825	if (IGB_TX_TRYLOCK(txr)) {
826		err = igb_mq_start_locked(ifp, txr, m);
827		IGB_TX_UNLOCK(txr);
828	} else
829		err = drbr_enqueue(ifp, txr->br, m);
830
831	return (err);
832}
833
834static int
835igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
836{
837	struct adapter  *adapter = txr->adapter;
838        struct mbuf     *next;
839        int             err = 0, enq;
840
841	IGB_TX_LOCK_ASSERT(txr);
842
843	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
844	    IFF_DRV_RUNNING || adapter->link_active == 0) {
845		if (m != NULL)
846			err = drbr_enqueue(ifp, txr->br, m);
847		return (err);
848	}
849
850	enq = 0;
851	if (m == NULL) {
852		next = drbr_dequeue(ifp, txr->br);
853	} else if (drbr_needs_enqueue(ifp, txr->br)) {
854		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
855			return (err);
856		next = drbr_dequeue(ifp, txr->br);
857	} else
858		next = m;
859	/* Process the queue */
860	while (next != NULL) {
861		if ((err = igb_xmit(txr, &next)) != 0) {
862			if (next != NULL)
863				err = drbr_enqueue(ifp, txr->br, next);
864			break;
865		}
866		enq++;
867		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
868		ETHER_BPF_MTAP(ifp, next);
869		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
870			break;
871		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
872			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
873			break;
874		}
875		next = drbr_dequeue(ifp, txr->br);
876	}
877	if (enq > 0) {
878		/* Set the watchdog */
879		txr->watchdog_check = TRUE;
880	}
881	return (err);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888igb_qflush(struct ifnet *ifp)
889{
890	struct adapter	*adapter = ifp->if_softc;
891	struct tx_ring	*txr = adapter->tx_rings;
892	struct mbuf	*m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		IGB_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		IGB_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902#endif /* __FreeBSD_version >= 800000 */
903
904/*********************************************************************
905 *  Ioctl entry point
906 *
907 *  igb_ioctl is called when the user wants to configure the
908 *  interface.
909 *
910 *  return 0 on success, positive on failure
911 **********************************************************************/
912
913static int
914igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
915{
916	struct adapter	*adapter = ifp->if_softc;
917	struct ifreq *ifr = (struct ifreq *)data;
918#ifdef INET
919	struct ifaddr *ifa = (struct ifaddr *)data;
920#endif
921	int error = 0;
922
923	if (adapter->in_detach)
924		return (error);
925
926	switch (command) {
927	case SIOCSIFADDR:
928#ifdef INET
929		if (ifa->ifa_addr->sa_family == AF_INET) {
930			/*
931			 * XXX
932			 * Since resetting hardware takes a very long time
933			 * and results in link renegotiation we only
934			 * initialize the hardware only when it is absolutely
935			 * required.
936			 */
937			ifp->if_flags |= IFF_UP;
938			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
939				IGB_CORE_LOCK(adapter);
940				igb_init_locked(adapter);
941				IGB_CORE_UNLOCK(adapter);
942			}
943			if (!(ifp->if_flags & IFF_NOARP))
944				arp_ifinit(ifp, ifa);
945		} else
946#endif
947			error = ether_ioctl(ifp, command, data);
948		break;
949	case SIOCSIFMTU:
950	    {
951		int max_frame_size;
952
953		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
954
955		IGB_CORE_LOCK(adapter);
956		max_frame_size = 9234;
957		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
958		    ETHER_CRC_LEN) {
959			IGB_CORE_UNLOCK(adapter);
960			error = EINVAL;
961			break;
962		}
963
964		ifp->if_mtu = ifr->ifr_mtu;
965		adapter->max_frame_size =
966		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
967		igb_init_locked(adapter);
968		IGB_CORE_UNLOCK(adapter);
969		break;
970	    }
971	case SIOCSIFFLAGS:
972		IOCTL_DEBUGOUT("ioctl rcv'd:\
973		    SIOCSIFFLAGS (Set Interface Flags)");
974		IGB_CORE_LOCK(adapter);
975		if (ifp->if_flags & IFF_UP) {
976			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
977				if ((ifp->if_flags ^ adapter->if_flags) &
978				    (IFF_PROMISC | IFF_ALLMULTI)) {
979					igb_disable_promisc(adapter);
980					igb_set_promisc(adapter);
981				}
982			} else
983				igb_init_locked(adapter);
984		} else
985			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
986				igb_stop(adapter);
987		adapter->if_flags = ifp->if_flags;
988		IGB_CORE_UNLOCK(adapter);
989		break;
990	case SIOCADDMULTI:
991	case SIOCDELMULTI:
992		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
993		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
994			IGB_CORE_LOCK(adapter);
995			igb_disable_intr(adapter);
996			igb_set_multi(adapter);
997#ifdef DEVICE_POLLING
998			if (!(ifp->if_capenable & IFCAP_POLLING))
999#endif
1000				igb_enable_intr(adapter);
1001			IGB_CORE_UNLOCK(adapter);
1002		}
1003		break;
1004	case SIOCSIFMEDIA:
1005		/* Check SOL/IDER usage */
1006		IGB_CORE_LOCK(adapter);
1007		if (e1000_check_reset_block(&adapter->hw)) {
1008			IGB_CORE_UNLOCK(adapter);
1009			device_printf(adapter->dev, "Media change is"
1010			    " blocked due to SOL/IDER session.\n");
1011			break;
1012		}
1013		IGB_CORE_UNLOCK(adapter);
1014	case SIOCGIFMEDIA:
1015		IOCTL_DEBUGOUT("ioctl rcv'd: \
1016		    SIOCxIFMEDIA (Get/Set Interface Media)");
1017		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1018		break;
1019	case SIOCSIFCAP:
1020	    {
1021		int mask, reinit;
1022
1023		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1024		reinit = 0;
1025		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1026#ifdef DEVICE_POLLING
1027		if (mask & IFCAP_POLLING) {
1028			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1029				error = ether_poll_register(igb_poll, ifp);
1030				if (error)
1031					return (error);
1032				IGB_CORE_LOCK(adapter);
1033				igb_disable_intr(adapter);
1034				ifp->if_capenable |= IFCAP_POLLING;
1035				IGB_CORE_UNLOCK(adapter);
1036			} else {
1037				error = ether_poll_deregister(ifp);
1038				/* Enable interrupt even in error case */
1039				IGB_CORE_LOCK(adapter);
1040				igb_enable_intr(adapter);
1041				ifp->if_capenable &= ~IFCAP_POLLING;
1042				IGB_CORE_UNLOCK(adapter);
1043			}
1044		}
1045#endif
1046		if (mask & IFCAP_HWCSUM) {
1047			ifp->if_capenable ^= IFCAP_HWCSUM;
1048			reinit = 1;
1049		}
1050		if (mask & IFCAP_TSO4) {
1051			ifp->if_capenable ^= IFCAP_TSO4;
1052			reinit = 1;
1053		}
1054		if (mask & IFCAP_VLAN_HWTAGGING) {
1055			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1056			reinit = 1;
1057		}
1058		if (mask & IFCAP_VLAN_HWFILTER) {
1059			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1060			reinit = 1;
1061		}
1062		if (mask & IFCAP_LRO) {
1063			ifp->if_capenable ^= IFCAP_LRO;
1064			reinit = 1;
1065		}
1066		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1067			igb_init(adapter);
1068		VLAN_CAPABILITIES(ifp);
1069		break;
1070	    }
1071
1072	default:
1073		error = ether_ioctl(ifp, command, data);
1074		break;
1075	}
1076
1077	return (error);
1078}
1079
1080
1081/*********************************************************************
1082 *  Init entry point
1083 *
1084 *  This routine is used in two ways. It is used by the stack as
1085 *  init entry point in network interface structure. It is also used
1086 *  by the driver as a hw/sw initialization routine to get to a
1087 *  consistent state.
1088 *
1089 *  return 0 on success, positive on failure
1090 **********************************************************************/
1091
1092static void
1093igb_init_locked(struct adapter *adapter)
1094{
1095	struct ifnet	*ifp = adapter->ifp;
1096	device_t	dev = adapter->dev;
1097
1098	INIT_DEBUGOUT("igb_init: begin");
1099
1100	IGB_CORE_LOCK_ASSERT(adapter);
1101
1102	igb_disable_intr(adapter);
1103	callout_stop(&adapter->timer);
1104
1105	/* Get the latest mac address, User can use a LAA */
1106        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1107              ETHER_ADDR_LEN);
1108
1109	/* Put the address into the Receive Address Array */
1110	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1111
1112	igb_reset(adapter);
1113	igb_update_link_status(adapter);
1114
1115	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1116
1117        /* Use real VLAN Filter support? */
1118	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1119		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1120			/* Use real VLAN Filter support */
1121			igb_setup_vlan_hw_support(adapter);
1122		else {
1123			u32 ctrl;
1124			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1125			ctrl |= E1000_CTRL_VME;
1126			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1127		}
1128	}
1129
1130	/* Set hardware offload abilities */
1131	ifp->if_hwassist = 0;
1132	if (ifp->if_capenable & IFCAP_TXCSUM) {
1133		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1134#if __FreeBSD_version >= 800000
1135		if (adapter->hw.mac.type == e1000_82576)
1136			ifp->if_hwassist |= CSUM_SCTP;
1137#endif
1138	}
1139
1140	if (ifp->if_capenable & IFCAP_TSO4)
1141		ifp->if_hwassist |= CSUM_TSO;
1142
1143	/* Configure for OS presence */
1144	igb_init_manageability(adapter);
1145
1146	/* Prepare transmit descriptors and buffers */
1147	igb_setup_transmit_structures(adapter);
1148	igb_initialize_transmit_units(adapter);
1149
1150	/* Setup Multicast table */
1151	igb_set_multi(adapter);
1152
1153	/*
1154	** Figure out the desired mbuf pool
1155	** for doing jumbo/packetsplit
1156	*/
1157	if (ifp->if_mtu > ETHERMTU)
1158		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1159	else
1160		adapter->rx_mbuf_sz = MCLBYTES;
1161
1162	/* Prepare receive descriptors and buffers */
1163	if (igb_setup_receive_structures(adapter)) {
1164		device_printf(dev, "Could not setup receive structures\n");
1165		return;
1166	}
1167	igb_initialize_receive_units(adapter);
1168
1169	/* Don't lose promiscuous settings */
1170	igb_set_promisc(adapter);
1171
1172	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1173	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1174
1175	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1176	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1177
1178	if (adapter->msix > 1) /* Set up queue routing */
1179		igb_configure_queues(adapter);
1180
1181	/* Set up VLAN tag offload and filter */
1182	igb_setup_vlan_hw_support(adapter);
1183
1184	/* this clears any pending interrupts */
1185	E1000_READ_REG(&adapter->hw, E1000_ICR);
1186#ifdef DEVICE_POLLING
1187	/*
1188	 * Only enable interrupts if we are not polling, make sure
1189	 * they are off otherwise.
1190	 */
1191	if (ifp->if_capenable & IFCAP_POLLING)
1192		igb_disable_intr(adapter);
1193	else
1194#endif /* DEVICE_POLLING */
1195	{
1196	igb_enable_intr(adapter);
1197	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1198	}
1199
1200	/* Don't reset the phy next time init gets called */
1201	adapter->hw.phy.reset_disable = TRUE;
1202}
1203
1204static void
1205igb_init(void *arg)
1206{
1207	struct adapter *adapter = arg;
1208
1209	IGB_CORE_LOCK(adapter);
1210	igb_init_locked(adapter);
1211	IGB_CORE_UNLOCK(adapter);
1212}
1213
1214
1215static void
1216igb_handle_rxtx(void *context, int pending)
1217{
1218	struct igb_queue	*que = context;
1219	struct adapter		*adapter = que->adapter;
1220	struct tx_ring		*txr = adapter->tx_rings;
1221	struct ifnet		*ifp;
1222
1223	ifp = adapter->ifp;
1224
1225	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1226		if (igb_rxeof(que, adapter->rx_process_limit))
1227			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1228		IGB_TX_LOCK(txr);
1229		igb_txeof(txr);
1230
1231#if __FreeBSD_version >= 800000
1232		if (!drbr_empty(ifp, txr->br))
1233			igb_mq_start_locked(ifp, txr, NULL);
1234#else
1235		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1236			igb_start_locked(txr, ifp);
1237#endif
1238		IGB_TX_UNLOCK(txr);
1239	}
1240
1241	igb_enable_intr(adapter);
1242}
1243
1244static void
1245igb_handle_que(void *context, int pending)
1246{
1247	struct igb_queue *que = context;
1248	struct adapter *adapter = que->adapter;
1249	struct tx_ring *txr = que->txr;
1250	struct ifnet	*ifp = adapter->ifp;
1251	u32		loop = IGB_MAX_LOOP;
1252	bool		more;
1253
1254	/* RX first */
1255	do {
1256		more = igb_rxeof(que, -1);
1257	} while (loop-- && more);
1258
1259	if (IGB_TX_TRYLOCK(txr)) {
1260		loop = IGB_MAX_LOOP;
1261		do {
1262			more = igb_txeof(txr);
1263		} while (loop-- && more);
1264#if __FreeBSD_version >= 800000
1265		igb_mq_start_locked(ifp, txr, NULL);
1266#else
1267		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1268			igb_start_locked(txr, ifp);
1269#endif
1270		IGB_TX_UNLOCK(txr);
1271	}
1272
1273	/* Reenable this interrupt */
1274#ifdef DEVICE_POLLING
1275	if (!(ifp->if_capenable & IFCAP_POLLING))
1276#endif
1277	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1278}
1279
1280/* Deal with link in a sleepable context */
1281static void
1282igb_handle_link(void *context, int pending)
1283{
1284	struct adapter *adapter = context;
1285
1286	adapter->hw.mac.get_link_status = 1;
1287	igb_update_link_status(adapter);
1288}
1289
1290/*********************************************************************
1291 *
1292 *  MSI/Legacy Deferred
1293 *  Interrupt Service routine
1294 *
1295 *********************************************************************/
1296static int
1297igb_irq_fast(void *arg)
1298{
1299	struct adapter	*adapter = arg;
1300	uint32_t	reg_icr;
1301
1302
1303	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1304
1305	/* Hot eject?  */
1306	if (reg_icr == 0xffffffff)
1307		return FILTER_STRAY;
1308
1309	/* Definitely not our interrupt.  */
1310	if (reg_icr == 0x0)
1311		return FILTER_STRAY;
1312
1313	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1314		return FILTER_STRAY;
1315
1316	/*
1317	 * Mask interrupts until the taskqueue is finished running.  This is
1318	 * cheap, just assume that it is needed.  This also works around the
1319	 * MSI message reordering errata on certain systems.
1320	 */
1321	igb_disable_intr(adapter);
1322	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1323
1324	/* Link status change */
1325	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1326		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1327
1328	if (reg_icr & E1000_ICR_RXO)
1329		adapter->rx_overruns++;
1330	return FILTER_HANDLED;
1331}
1332
1333#ifdef DEVICE_POLLING
1334/*********************************************************************
1335 *
1336 *  Legacy polling routine : if using this code you MUST be sure that
1337 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1338 *
1339 *********************************************************************/
1340#if __FreeBSD_version >= 800000
1341#define POLL_RETURN_COUNT(a) (a)
1342static int
1343#else
1344#define POLL_RETURN_COUNT(a)
1345static void
1346#endif
1347igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1348{
1349	struct adapter		*adapter = ifp->if_softc;
1350	struct igb_queue	*que = adapter->queues;
1351	struct tx_ring		*txr = adapter->tx_rings;
1352	u32			reg_icr, rx_done = 0;
1353	u32			loop = IGB_MAX_LOOP;
1354	bool			more;
1355
1356	IGB_CORE_LOCK(adapter);
1357	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1358		IGB_CORE_UNLOCK(adapter);
1359		return POLL_RETURN_COUNT(rx_done);
1360	}
1361
1362	if (cmd == POLL_AND_CHECK_STATUS) {
1363		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364		/* Link status change */
1365		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1366			taskqueue_enqueue(adapter->tq, &adapter->link_task);
1367
1368		if (reg_icr & E1000_ICR_RXO)
1369			adapter->rx_overruns++;
1370	}
1371	IGB_CORE_UNLOCK(adapter);
1372
1373	/* TODO: rx_count */
1374	rx_done = igb_rxeof(que, count) ? 1 : 0;
1375
1376	IGB_TX_LOCK(txr);
1377	do {
1378		more = igb_txeof(txr);
1379	} while (loop-- && more);
1380#if __FreeBSD_version >= 800000
1381	if (!drbr_empty(ifp, txr->br))
1382		igb_mq_start_locked(ifp, txr, NULL);
1383#else
1384	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1385		igb_start_locked(txr, ifp);
1386#endif
1387	IGB_TX_UNLOCK(txr);
1388	return POLL_RETURN_COUNT(rx_done);
1389}
1390#endif /* DEVICE_POLLING */
1391
1392/*********************************************************************
1393 *
1394 *  MSIX TX Interrupt Service routine
1395 *
1396 **********************************************************************/
1397static void
1398igb_msix_que(void *arg)
1399{
1400	struct igb_queue *que = arg;
1401	struct adapter *adapter = que->adapter;
1402	struct tx_ring *txr = que->txr;
1403	struct rx_ring *rxr = que->rxr;
1404	u32		newitr = 0;
1405	bool		more_tx, more_rx;
1406
1407	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1408	++que->irqs;
1409
1410	IGB_TX_LOCK(txr);
1411	more_tx = igb_txeof(txr);
1412	IGB_TX_UNLOCK(txr);
1413
1414	more_rx = igb_rxeof(que, adapter->rx_process_limit);
1415
1416	if (igb_enable_aim == FALSE)
1417		goto no_calc;
1418	/*
1419	** Do Adaptive Interrupt Moderation:
1420        **  - Write out last calculated setting
1421	**  - Calculate based on average size over
1422	**    the last interval.
1423	*/
1424        if (que->eitr_setting)
1425                E1000_WRITE_REG(&adapter->hw,
1426                    E1000_EITR(que->msix), que->eitr_setting);
1427
1428        que->eitr_setting = 0;
1429
1430        /* Idle, do nothing */
1431        if ((txr->bytes == 0) && (rxr->bytes == 0))
1432                goto no_calc;
1433
1434        /* Used half Default if sub-gig */
1435        if (adapter->link_speed != 1000)
1436                newitr = IGB_DEFAULT_ITR / 2;
1437        else {
1438		if ((txr->bytes) && (txr->packets))
1439                	newitr = txr->bytes/txr->packets;
1440		if ((rxr->bytes) && (rxr->packets))
1441			newitr = max(newitr,
1442			    (rxr->bytes / rxr->packets));
1443                newitr += 24; /* account for hardware frame, crc */
1444		/* set an upper boundary */
1445		newitr = min(newitr, 3000);
1446		/* Be nice to the mid range */
1447                if ((newitr > 300) && (newitr < 1200))
1448                        newitr = (newitr / 3);
1449                else
1450                        newitr = (newitr / 2);
1451        }
1452        newitr &= 0x7FFC;  /* Mask invalid bits */
1453        if (adapter->hw.mac.type == e1000_82575)
1454                newitr |= newitr << 16;
1455        else
1456                newitr |= E1000_EITR_CNT_IGNR;
1457
1458        /* save for next interrupt */
1459        que->eitr_setting = newitr;
1460
1461        /* Reset state */
1462        txr->bytes = 0;
1463        txr->packets = 0;
1464        rxr->bytes = 0;
1465        rxr->packets = 0;
1466
1467no_calc:
1468	/* Schedule a clean task if needed*/
1469	if (more_tx || more_rx)
1470		taskqueue_enqueue(que->tq, &que->que_task);
1471	else
1472		/* Reenable this interrupt */
1473		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1474	return;
1475}
1476
1477
1478/*********************************************************************
1479 *
1480 *  MSIX Link Interrupt Service routine
1481 *
1482 **********************************************************************/
1483
1484static void
1485igb_msix_link(void *arg)
1486{
1487	struct adapter	*adapter = arg;
1488	u32       	icr;
1489
1490	++adapter->link_irq;
1491	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492	if (!(icr & E1000_ICR_LSC))
1493		goto spurious;
1494	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1495
1496spurious:
1497	/* Rearm */
1498	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1499	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1500	return;
1501}
1502
1503
1504/*********************************************************************
1505 *
1506 *  Media Ioctl callback
1507 *
1508 *  This routine is called whenever the user queries the status of
1509 *  the interface using ifconfig.
1510 *
1511 **********************************************************************/
1512static void
1513igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1514{
1515	struct adapter *adapter = ifp->if_softc;
1516	u_char fiber_type = IFM_1000_SX;
1517
1518	INIT_DEBUGOUT("igb_media_status: begin");
1519
1520	IGB_CORE_LOCK(adapter);
1521	igb_update_link_status(adapter);
1522
1523	ifmr->ifm_status = IFM_AVALID;
1524	ifmr->ifm_active = IFM_ETHER;
1525
1526	if (!adapter->link_active) {
1527		IGB_CORE_UNLOCK(adapter);
1528		return;
1529	}
1530
1531	ifmr->ifm_status |= IFM_ACTIVE;
1532
1533	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1534	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1535		ifmr->ifm_active |= fiber_type | IFM_FDX;
1536	else {
1537		switch (adapter->link_speed) {
1538		case 10:
1539			ifmr->ifm_active |= IFM_10_T;
1540			break;
1541		case 100:
1542			ifmr->ifm_active |= IFM_100_TX;
1543			break;
1544		case 1000:
1545			ifmr->ifm_active |= IFM_1000_T;
1546			break;
1547		}
1548		if (adapter->link_duplex == FULL_DUPLEX)
1549			ifmr->ifm_active |= IFM_FDX;
1550		else
1551			ifmr->ifm_active |= IFM_HDX;
1552	}
1553	IGB_CORE_UNLOCK(adapter);
1554}
1555
1556/*********************************************************************
1557 *
1558 *  Media Ioctl callback
1559 *
1560 *  This routine is called when the user changes speed/duplex using
1561 *  media/mediopt option with ifconfig.
1562 *
1563 **********************************************************************/
1564static int
1565igb_media_change(struct ifnet *ifp)
1566{
1567	struct adapter *adapter = ifp->if_softc;
1568	struct ifmedia  *ifm = &adapter->media;
1569
1570	INIT_DEBUGOUT("igb_media_change: begin");
1571
1572	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1573		return (EINVAL);
1574
1575	IGB_CORE_LOCK(adapter);
1576	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1577	case IFM_AUTO:
1578		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1579		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1580		break;
1581	case IFM_1000_LX:
1582	case IFM_1000_SX:
1583	case IFM_1000_T:
1584		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1585		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1586		break;
1587	case IFM_100_TX:
1588		adapter->hw.mac.autoneg = FALSE;
1589		adapter->hw.phy.autoneg_advertised = 0;
1590		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1591			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1592		else
1593			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1594		break;
1595	case IFM_10_T:
1596		adapter->hw.mac.autoneg = FALSE;
1597		adapter->hw.phy.autoneg_advertised = 0;
1598		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1599			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1600		else
1601			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1602		break;
1603	default:
1604		device_printf(adapter->dev, "Unsupported media type\n");
1605	}
1606
1607	/* As the speed/duplex settings my have changed we need to
1608	 * reset the PHY.
1609	 */
1610	adapter->hw.phy.reset_disable = FALSE;
1611
1612	igb_init_locked(adapter);
1613	IGB_CORE_UNLOCK(adapter);
1614
1615	return (0);
1616}
1617
1618
1619/*********************************************************************
1620 *
1621 *  This routine maps the mbufs to Advanced TX descriptors.
1622 *  used by the 82575 adapter.
1623 *
1624 **********************************************************************/
1625
1626static int
1627igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1628{
1629	struct adapter		*adapter = txr->adapter;
1630	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1631	bus_dmamap_t		map;
1632	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1633	union e1000_adv_tx_desc	*txd = NULL;
1634	struct mbuf		*m_head;
1635	u32			olinfo_status = 0, cmd_type_len = 0;
1636	int			nsegs, i, j, error, first, last = 0;
1637	u32			hdrlen = 0;
1638
1639	m_head = *m_headp;
1640
1641
1642	/* Set basic descriptor constants */
1643	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1644	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1645	if (m_head->m_flags & M_VLANTAG)
1646		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1647
1648        /*
1649         * Force a cleanup if number of TX descriptors
1650         * available hits the threshold
1651         */
1652	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1653		igb_txeof(txr);
1654		/* Now do we at least have a minimal? */
1655		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1656			txr->no_desc_avail++;
1657			return (ENOBUFS);
1658		}
1659	}
1660
1661	/*
1662         * Map the packet for DMA.
1663	 *
1664	 * Capture the first descriptor index,
1665	 * this descriptor will have the index
1666	 * of the EOP which is the only one that
1667	 * now gets a DONE bit writeback.
1668	 */
1669	first = txr->next_avail_desc;
1670	tx_buffer = &txr->tx_buffers[first];
1671	tx_buffer_mapped = tx_buffer;
1672	map = tx_buffer->map;
1673
1674	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1675	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1676
1677	if (error == EFBIG) {
1678		struct mbuf *m;
1679
1680		m = m_defrag(*m_headp, M_DONTWAIT);
1681		if (m == NULL) {
1682			adapter->mbuf_defrag_failed++;
1683			m_freem(*m_headp);
1684			*m_headp = NULL;
1685			return (ENOBUFS);
1686		}
1687		*m_headp = m;
1688
1689		/* Try it again */
1690		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1691		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1692
1693		if (error == ENOMEM) {
1694			adapter->no_tx_dma_setup++;
1695			return (error);
1696		} else if (error != 0) {
1697			adapter->no_tx_dma_setup++;
1698			m_freem(*m_headp);
1699			*m_headp = NULL;
1700			return (error);
1701		}
1702	} else if (error == ENOMEM) {
1703		adapter->no_tx_dma_setup++;
1704		return (error);
1705	} else if (error != 0) {
1706		adapter->no_tx_dma_setup++;
1707		m_freem(*m_headp);
1708		*m_headp = NULL;
1709		return (error);
1710	}
1711
1712	/* Check again to be sure we have enough descriptors */
1713        if (nsegs > (txr->tx_avail - 2)) {
1714                txr->no_desc_avail++;
1715		bus_dmamap_unload(txr->txtag, map);
1716		return (ENOBUFS);
1717        }
1718	m_head = *m_headp;
1719
1720        /*
1721         * Set up the context descriptor:
1722         * used when any hardware offload is done.
1723	 * This includes CSUM, VLAN, and TSO. It
1724	 * will use the first descriptor.
1725         */
1726        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1727		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1728			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1729			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1730			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1731		} else
1732			return (ENXIO);
1733	} else if (igb_tx_ctx_setup(txr, m_head))
1734		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1735
1736	/* Calculate payload length */
1737	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1738	    << E1000_ADVTXD_PAYLEN_SHIFT);
1739
1740	/* 82575 needs the queue index added */
1741	if (adapter->hw.mac.type == e1000_82575)
1742		olinfo_status |= txr->me << 4;
1743
1744	/* Set up our transmit descriptors */
1745	i = txr->next_avail_desc;
1746	for (j = 0; j < nsegs; j++) {
1747		bus_size_t seg_len;
1748		bus_addr_t seg_addr;
1749
1750		tx_buffer = &txr->tx_buffers[i];
1751		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1752		seg_addr = segs[j].ds_addr;
1753		seg_len  = segs[j].ds_len;
1754
1755		txd->read.buffer_addr = htole64(seg_addr);
1756		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1757		txd->read.olinfo_status = htole32(olinfo_status);
1758		last = i;
1759		if (++i == adapter->num_tx_desc)
1760			i = 0;
1761		tx_buffer->m_head = NULL;
1762		tx_buffer->next_eop = -1;
1763	}
1764
1765	txr->next_avail_desc = i;
1766	txr->tx_avail -= nsegs;
1767
1768        tx_buffer->m_head = m_head;
1769	tx_buffer_mapped->map = tx_buffer->map;
1770	tx_buffer->map = map;
1771        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1772
1773        /*
1774         * Last Descriptor of Packet
1775	 * needs End Of Packet (EOP)
1776	 * and Report Status (RS)
1777         */
1778        txd->read.cmd_type_len |=
1779	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1780	/*
1781	 * Keep track in the first buffer which
1782	 * descriptor will be written back
1783	 */
1784	tx_buffer = &txr->tx_buffers[first];
1785	tx_buffer->next_eop = last;
1786	txr->watchdog_time = ticks;
1787
1788	/*
1789	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1790	 * that this frame is available to transmit.
1791	 */
1792	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1793	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1794	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1795	++txr->tx_packets;
1796
1797	return (0);
1798
1799}
1800
1801static void
1802igb_set_promisc(struct adapter *adapter)
1803{
1804	struct ifnet	*ifp = adapter->ifp;
1805	uint32_t	reg_rctl;
1806
1807	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1808
1809	if (ifp->if_flags & IFF_PROMISC) {
1810		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1811		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1812	} else if (ifp->if_flags & IFF_ALLMULTI) {
1813		reg_rctl |= E1000_RCTL_MPE;
1814		reg_rctl &= ~E1000_RCTL_UPE;
1815		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1816	}
1817}
1818
1819static void
1820igb_disable_promisc(struct adapter *adapter)
1821{
1822	uint32_t	reg_rctl;
1823
1824	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1825
1826	reg_rctl &=  (~E1000_RCTL_UPE);
1827	reg_rctl &=  (~E1000_RCTL_MPE);
1828	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1829}
1830
1831
1832/*********************************************************************
1833 *  Multicast Update
1834 *
1835 *  This routine is called whenever multicast address list is updated.
1836 *
1837 **********************************************************************/
1838
1839static void
1840igb_set_multi(struct adapter *adapter)
1841{
1842	struct ifnet	*ifp = adapter->ifp;
1843	struct ifmultiaddr *ifma;
1844	u32 reg_rctl = 0;
1845	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1846
1847	int mcnt = 0;
1848
1849	IOCTL_DEBUGOUT("igb_set_multi: begin");
1850
1851#if __FreeBSD_version < 800000
1852	IF_ADDR_LOCK(ifp);
1853#else
1854	if_maddr_rlock(ifp);
1855#endif
1856	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1857		if (ifma->ifma_addr->sa_family != AF_LINK)
1858			continue;
1859
1860		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1861			break;
1862
1863		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1864		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1865		mcnt++;
1866	}
1867#if __FreeBSD_version < 800000
1868	IF_ADDR_UNLOCK(ifp);
1869#else
1870	if_maddr_runlock(ifp);
1871#endif
1872
1873	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1874		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1875		reg_rctl |= E1000_RCTL_MPE;
1876		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1877	} else
1878		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1879}
1880
1881
1882/*********************************************************************
1883 *  Timer routine:
1884 *  	This routine checks for link status,
1885 *	updates statistics, and does the watchdog.
1886 *
1887 **********************************************************************/
1888
1889static void
1890igb_local_timer(void *arg)
1891{
1892	struct adapter		*adapter = arg;
1893	struct ifnet		*ifp = adapter->ifp;
1894	device_t		dev = adapter->dev;
1895	struct tx_ring		*txr = adapter->tx_rings;
1896
1897
1898	IGB_CORE_LOCK_ASSERT(adapter);
1899
1900	igb_update_link_status(adapter);
1901	igb_update_stats_counters(adapter);
1902
1903	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1904		igb_print_hw_stats(adapter);
1905
1906        /*
1907        ** Watchdog: check for time since any descriptor was cleaned
1908        */
1909	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1910		if (txr->watchdog_check == FALSE)
1911			continue;
1912		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1913			goto timeout;
1914	}
1915
1916	/* Trigger an RX interrupt on all queues */
1917#ifdef DEVICE_POLLING
1918	if (!(ifp->if_capenable & IFCAP_POLLING))
1919#endif
1920	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1921	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1922	return;
1923
1924timeout:
1925	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1926	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1927            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1928            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1929	device_printf(dev,"TX(%d) desc avail = %d,"
1930            "Next TX to Clean = %d\n",
1931            txr->me, txr->tx_avail, txr->next_to_clean);
1932	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1933	adapter->watchdog_events++;
1934	igb_init_locked(adapter);
1935}
1936
1937static void
1938igb_update_link_status(struct adapter *adapter)
1939{
1940	struct e1000_hw *hw = &adapter->hw;
1941	struct ifnet *ifp = adapter->ifp;
1942	device_t dev = adapter->dev;
1943	struct tx_ring *txr = adapter->tx_rings;
1944	u32 link_check = 0;
1945
1946	/* Get the cached link value or read for real */
1947        switch (hw->phy.media_type) {
1948        case e1000_media_type_copper:
1949                if (hw->mac.get_link_status) {
1950			/* Do the work to read phy */
1951                        e1000_check_for_link(hw);
1952                        link_check = !hw->mac.get_link_status;
1953                } else
1954                        link_check = TRUE;
1955                break;
1956        case e1000_media_type_fiber:
1957                e1000_check_for_link(hw);
1958                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1959                                 E1000_STATUS_LU);
1960                break;
1961        case e1000_media_type_internal_serdes:
1962                e1000_check_for_link(hw);
1963                link_check = adapter->hw.mac.serdes_has_link;
1964                break;
1965        default:
1966        case e1000_media_type_unknown:
1967                break;
1968        }
1969
1970	/* Now we check if a transition has happened */
1971	if (link_check && (adapter->link_active == 0)) {
1972		e1000_get_speed_and_duplex(&adapter->hw,
1973		    &adapter->link_speed, &adapter->link_duplex);
1974		if (bootverbose)
1975			device_printf(dev, "Link is up %d Mbps %s\n",
1976			    adapter->link_speed,
1977			    ((adapter->link_duplex == FULL_DUPLEX) ?
1978			    "Full Duplex" : "Half Duplex"));
1979		adapter->link_active = 1;
1980		ifp->if_baudrate = adapter->link_speed * 1000000;
1981		/* This can sleep */
1982		if_link_state_change(ifp, LINK_STATE_UP);
1983	} else if (!link_check && (adapter->link_active == 1)) {
1984		ifp->if_baudrate = adapter->link_speed = 0;
1985		adapter->link_duplex = 0;
1986		if (bootverbose)
1987			device_printf(dev, "Link is Down\n");
1988		adapter->link_active = 0;
1989		/* This can sleep */
1990		if_link_state_change(ifp, LINK_STATE_DOWN);
1991		/* Turn off watchdogs */
1992		for (int i = 0; i < adapter->num_queues; i++, txr++)
1993			txr->watchdog_check = FALSE;
1994	}
1995}
1996
1997/*********************************************************************
1998 *
1999 *  This routine disables all traffic on the adapter by issuing a
2000 *  global reset on the MAC and deallocates TX/RX buffers.
2001 *
2002 **********************************************************************/
2003
2004static void
2005igb_stop(void *arg)
2006{
2007	struct adapter	*adapter = arg;
2008	struct ifnet	*ifp = adapter->ifp;
2009	struct tx_ring *txr = adapter->tx_rings;
2010
2011	IGB_CORE_LOCK_ASSERT(adapter);
2012
2013	INIT_DEBUGOUT("igb_stop: begin");
2014
2015	igb_disable_intr(adapter);
2016
2017	callout_stop(&adapter->timer);
2018
2019	/* Tell the stack that the interface is no longer active */
2020	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2021
2022	/* Unarm watchdog timer. */
2023	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2024		IGB_TX_LOCK(txr);
2025		txr->watchdog_check = FALSE;
2026		IGB_TX_UNLOCK(txr);
2027	}
2028
2029	e1000_reset_hw(&adapter->hw);
2030	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2031
2032	e1000_led_off(&adapter->hw);
2033	e1000_cleanup_led(&adapter->hw);
2034}
2035
2036
2037/*********************************************************************
2038 *
2039 *  Determine hardware revision.
2040 *
2041 **********************************************************************/
2042static void
2043igb_identify_hardware(struct adapter *adapter)
2044{
2045	device_t dev = adapter->dev;
2046
2047	/* Make sure our PCI config space has the necessary stuff set */
2048	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2049	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2050	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2051		device_printf(dev, "Memory Access and/or Bus Master bits "
2052		    "were not set!\n");
2053		adapter->hw.bus.pci_cmd_word |=
2054		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2055		pci_write_config(dev, PCIR_COMMAND,
2056		    adapter->hw.bus.pci_cmd_word, 2);
2057	}
2058
2059	/* Save off the information about this board */
2060	adapter->hw.vendor_id = pci_get_vendor(dev);
2061	adapter->hw.device_id = pci_get_device(dev);
2062	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2063	adapter->hw.subsystem_vendor_id =
2064	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2065	adapter->hw.subsystem_device_id =
2066	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2067
2068	/* Do Shared Code Init and Setup */
2069	if (e1000_set_mac_type(&adapter->hw)) {
2070		device_printf(dev, "Setup init failure\n");
2071		return;
2072	}
2073}
2074
2075static int
2076igb_allocate_pci_resources(struct adapter *adapter)
2077{
2078	device_t	dev = adapter->dev;
2079	int		rid;
2080
2081	rid = PCIR_BAR(0);
2082	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2083	    &rid, RF_ACTIVE);
2084	if (adapter->pci_mem == NULL) {
2085		device_printf(dev, "Unable to allocate bus resource: memory\n");
2086		return (ENXIO);
2087	}
2088	adapter->osdep.mem_bus_space_tag =
2089	    rman_get_bustag(adapter->pci_mem);
2090	adapter->osdep.mem_bus_space_handle =
2091	    rman_get_bushandle(adapter->pci_mem);
2092	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2093
2094	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2095
2096	/* This will setup either MSI/X or MSI */
2097	adapter->msix = igb_setup_msix(adapter);
2098	adapter->hw.back = &adapter->osdep;
2099
2100	return (0);
2101}
2102
2103/*********************************************************************
2104 *
2105 *  Setup the Legacy or MSI Interrupt handler
2106 *
2107 **********************************************************************/
2108static int
2109igb_allocate_legacy(struct adapter *adapter)
2110{
2111	device_t		dev = adapter->dev;
2112	struct igb_queue	*que = adapter->queues;
2113	int			error, rid = 0;
2114
2115	/* Turn off all interrupts */
2116	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2117
2118	/* MSI RID is 1 */
2119	if (adapter->msix == 1)
2120		rid = 1;
2121
2122	/* We allocate a single interrupt resource */
2123	adapter->res = bus_alloc_resource_any(dev,
2124	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2125	if (adapter->res == NULL) {
2126		device_printf(dev, "Unable to allocate bus resource: "
2127		    "interrupt\n");
2128		return (ENXIO);
2129	}
2130
2131	/*
2132	 * Try allocating a fast interrupt and the associated deferred
2133	 * processing contexts.
2134	 */
2135	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2136	/* Make tasklet for deferred link handling */
2137	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2138	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2139	    taskqueue_thread_enqueue, &adapter->tq);
2140	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2141	    device_get_nameunit(adapter->dev));
2142	if ((error = bus_setup_intr(dev, adapter->res,
2143	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2144	    adapter, &adapter->tag)) != 0) {
2145		device_printf(dev, "Failed to register fast interrupt "
2146			    "handler: %d\n", error);
2147		taskqueue_free(adapter->tq);
2148		adapter->tq = NULL;
2149		return (error);
2150	}
2151
2152	return (0);
2153}
2154
2155
2156/*********************************************************************
2157 *
2158 *  Setup the MSIX Queue Interrupt handlers:
2159 *
2160 **********************************************************************/
2161static int
2162igb_allocate_msix(struct adapter *adapter)
2163{
2164	device_t		dev = adapter->dev;
2165	struct igb_queue	*que = adapter->queues;
2166	int			error, rid, vector = 0;
2167
2168
2169	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2170		rid = vector +1;
2171		que->res = bus_alloc_resource_any(dev,
2172		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2173		if (que->res == NULL) {
2174			device_printf(dev,
2175			    "Unable to allocate bus resource: "
2176			    "MSIX Queue Interrupt\n");
2177			return (ENXIO);
2178		}
2179		error = bus_setup_intr(dev, que->res,
2180	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2181		    igb_msix_que, que, &que->tag);
2182		if (error) {
2183			que->res = NULL;
2184			device_printf(dev, "Failed to register Queue handler");
2185			return (error);
2186		}
2187		que->msix = vector;
2188		if (adapter->hw.mac.type == e1000_82575)
2189			que->eims = E1000_EICR_TX_QUEUE0 << i;
2190		else
2191			que->eims = 1 << vector;
2192		/*
2193		** Bind the msix vector, and thus the
2194		** rings to the corresponding cpu.
2195		*/
2196		if (adapter->num_queues > 1)
2197			bus_bind_intr(dev, que->res, i);
2198		/* Make tasklet for deferred handling */
2199		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2200		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2201		    taskqueue_thread_enqueue, &que->tq);
2202		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2203		    device_get_nameunit(adapter->dev));
2204	}
2205
2206	/* And Link */
2207	rid = vector + 1;
2208	adapter->res = bus_alloc_resource_any(dev,
2209	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2210	if (adapter->res == NULL) {
2211		device_printf(dev,
2212		    "Unable to allocate bus resource: "
2213		    "MSIX Link Interrupt\n");
2214		return (ENXIO);
2215	}
2216	if ((error = bus_setup_intr(dev, adapter->res,
2217	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2218	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2219		device_printf(dev, "Failed to register Link handler");
2220		return (error);
2221	}
2222	adapter->linkvec = vector;
2223
2224	/* Make tasklet for deferred handling */
2225	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2226	adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2227	    taskqueue_thread_enqueue, &adapter->tq);
2228	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2229	    device_get_nameunit(adapter->dev));
2230
2231	return (0);
2232}
2233
2234
2235static void
2236igb_configure_queues(struct adapter *adapter)
2237{
2238	struct	e1000_hw	*hw = &adapter->hw;
2239	struct	igb_queue	*que;
2240	u32			tmp, ivar = 0;
2241	u32			newitr = IGB_DEFAULT_ITR;
2242
2243	/* First turn on RSS capability */
2244	if (adapter->hw.mac.type > e1000_82575)
2245		E1000_WRITE_REG(hw, E1000_GPIE,
2246		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2247		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2248
2249	/* Turn on MSIX */
2250	switch (adapter->hw.mac.type) {
2251	case e1000_82580:
2252		/* RX entries */
2253		for (int i = 0; i < adapter->num_queues; i++) {
2254			u32 index = i >> 1;
2255			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2256			que = &adapter->queues[i];
2257			if (i & 1) {
2258				ivar &= 0xFF00FFFF;
2259				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2260			} else {
2261				ivar &= 0xFFFFFF00;
2262				ivar |= que->msix | E1000_IVAR_VALID;
2263			}
2264			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2265		}
2266		/* TX entries */
2267		for (int i = 0; i < adapter->num_queues; i++) {
2268			u32 index = i >> 1;
2269			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2270			que = &adapter->queues[i];
2271			if (i & 1) {
2272				ivar &= 0x00FFFFFF;
2273				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2274			} else {
2275				ivar &= 0xFFFF00FF;
2276				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2277			}
2278			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2279			adapter->eims_mask |= que->eims;
2280		}
2281
2282		/* And for the link interrupt */
2283		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2284		adapter->link_mask = 1 << adapter->linkvec;
2285		adapter->eims_mask |= adapter->link_mask;
2286		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2287		break;
2288	case e1000_82576:
2289		/* RX entries */
2290		for (int i = 0; i < adapter->num_queues; i++) {
2291			u32 index = i & 0x7; /* Each IVAR has two entries */
2292			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2293			que = &adapter->queues[i];
2294			if (i < 8) {
2295				ivar &= 0xFFFFFF00;
2296				ivar |= que->msix | E1000_IVAR_VALID;
2297			} else {
2298				ivar &= 0xFF00FFFF;
2299				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2300			}
2301			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2302			adapter->eims_mask |= que->eims;
2303		}
2304		/* TX entries */
2305		for (int i = 0; i < adapter->num_queues; i++) {
2306			u32 index = i & 0x7; /* Each IVAR has two entries */
2307			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2308			que = &adapter->queues[i];
2309			if (i < 8) {
2310				ivar &= 0xFFFF00FF;
2311				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2312			} else {
2313				ivar &= 0x00FFFFFF;
2314				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2315			}
2316			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2317			adapter->eims_mask |= que->eims;
2318		}
2319
2320		/* And for the link interrupt */
2321		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2322		adapter->link_mask = 1 << adapter->linkvec;
2323		adapter->eims_mask |= adapter->link_mask;
2324		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2325		break;
2326
2327	case e1000_82575:
2328                /* enable MSI-X support*/
2329		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2330                tmp |= E1000_CTRL_EXT_PBA_CLR;
2331                /* Auto-Mask interrupts upon ICR read. */
2332                tmp |= E1000_CTRL_EXT_EIAME;
2333                tmp |= E1000_CTRL_EXT_IRCA;
2334                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2335
2336		/* Queues */
2337		for (int i = 0; i < adapter->num_queues; i++) {
2338			que = &adapter->queues[i];
2339			tmp = E1000_EICR_RX_QUEUE0 << i;
2340			tmp |= E1000_EICR_TX_QUEUE0 << i;
2341			que->eims = tmp;
2342			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2343			    i, que->eims);
2344			adapter->eims_mask |= que->eims;
2345		}
2346
2347		/* Link */
2348		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2349		    E1000_EIMS_OTHER);
2350		adapter->link_mask |= E1000_EIMS_OTHER;
2351		adapter->eims_mask |= adapter->link_mask;
2352	default:
2353		break;
2354	}
2355
2356	/* Set the starting interrupt rate */
2357        if (hw->mac.type == e1000_82575)
2358                newitr |= newitr << 16;
2359        else
2360                newitr |= E1000_EITR_CNT_IGNR;
2361
2362	for (int i = 0; i < adapter->num_queues; i++) {
2363		que = &adapter->queues[i];
2364		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2365	}
2366
2367	return;
2368}
2369
2370
2371static void
2372igb_free_pci_resources(struct adapter *adapter)
2373{
2374	struct		igb_queue *que = adapter->queues;
2375	device_t	dev = adapter->dev;
2376	int		rid;
2377
2378	/*
2379	** There is a slight possibility of a failure mode
2380	** in attach that will result in entering this function
2381	** before interrupt resources have been initialized, and
2382	** in that case we do not want to execute the loops below
2383	** We can detect this reliably by the state of the adapter
2384	** res pointer.
2385	*/
2386	if (adapter->res == NULL)
2387		goto mem;
2388
2389	/*
2390	 * First release all the interrupt resources:
2391	 */
2392	for (int i = 0; i < adapter->num_queues; i++, que++) {
2393		rid = que->msix + 1;
2394		if (que->tag != NULL) {
2395			bus_teardown_intr(dev, que->res, que->tag);
2396			que->tag = NULL;
2397		}
2398		if (que->res != NULL)
2399			bus_release_resource(dev,
2400			    SYS_RES_IRQ, rid, que->res);
2401	}
2402
2403	/* Clean the Legacy or Link interrupt last */
2404	if (adapter->linkvec) /* we are doing MSIX */
2405		rid = adapter->linkvec + 1;
2406	else
2407		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2408
2409	if (adapter->tag != NULL) {
2410		bus_teardown_intr(dev, adapter->res, adapter->tag);
2411		adapter->tag = NULL;
2412	}
2413	if (adapter->res != NULL)
2414		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2415
2416mem:
2417	if (adapter->msix)
2418		pci_release_msi(dev);
2419
2420	if (adapter->msix_mem != NULL)
2421		bus_release_resource(dev, SYS_RES_MEMORY,
2422		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2423
2424	if (adapter->pci_mem != NULL)
2425		bus_release_resource(dev, SYS_RES_MEMORY,
2426		    PCIR_BAR(0), adapter->pci_mem);
2427
2428}
2429
2430/*
2431 * Setup Either MSI/X or MSI
2432 */
2433static int
2434igb_setup_msix(struct adapter *adapter)
2435{
2436	device_t dev = adapter->dev;
2437	int rid, want, queues, msgs;
2438
2439	/* tuneable override */
2440	if (igb_enable_msix == 0)
2441		goto msi;
2442
2443	/* First try MSI/X */
2444	rid = PCIR_BAR(IGB_MSIX_BAR);
2445	adapter->msix_mem = bus_alloc_resource_any(dev,
2446	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2447       	if (!adapter->msix_mem) {
2448		/* May not be enabled */
2449		device_printf(adapter->dev,
2450		    "Unable to map MSIX table \n");
2451		goto msi;
2452	}
2453
2454	msgs = pci_msix_count(dev);
2455	if (msgs == 0) { /* system has msix disabled */
2456		bus_release_resource(dev, SYS_RES_MEMORY,
2457		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2458		adapter->msix_mem = NULL;
2459		goto msi;
2460	}
2461
2462	/* Figure out a reasonable auto config value */
2463	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2464
2465	/* Manual override */
2466	if (igb_num_queues != 0)
2467		queues = igb_num_queues;
2468
2469	/* Can have max of 4 queues on 82575 */
2470	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2471		queues = 4;
2472
2473	/*
2474	** One vector (RX/TX pair) per queue
2475	** plus an additional for Link interrupt
2476	*/
2477	want = queues + 1;
2478	if (msgs >= want)
2479		msgs = want;
2480	else {
2481               	device_printf(adapter->dev,
2482		    "MSIX Configuration Problem, "
2483		    "%d vectors configured, but %d queues wanted!\n",
2484		    msgs, want);
2485		return (ENXIO);
2486	}
2487	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2488               	device_printf(adapter->dev,
2489		    "Using MSIX interrupts with %d vectors\n", msgs);
2490		adapter->num_queues = queues;
2491		return (msgs);
2492	}
2493msi:
2494       	msgs = pci_msi_count(dev);
2495       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2496               	device_printf(adapter->dev,"Using MSI interrupt\n");
2497	return (msgs);
2498}
2499
2500/*********************************************************************
2501 *
2502 *  Set up an fresh starting state
2503 *
2504 **********************************************************************/
2505static void
2506igb_reset(struct adapter *adapter)
2507{
2508	device_t	dev = adapter->dev;
2509	struct e1000_hw *hw = &adapter->hw;
2510	struct e1000_fc_info *fc = &hw->fc;
2511	struct ifnet	*ifp = adapter->ifp;
2512	u32		pba = 0;
2513	u16		hwm;
2514
2515	INIT_DEBUGOUT("igb_reset: begin");
2516
2517	/* Let the firmware know the OS is in control */
2518	igb_get_hw_control(adapter);
2519
2520	/*
2521	 * Packet Buffer Allocation (PBA)
2522	 * Writing PBA sets the receive portion of the buffer
2523	 * the remainder is used for the transmit buffer.
2524	 */
2525	switch (hw->mac.type) {
2526	case e1000_82575:
2527		pba = E1000_PBA_32K;
2528		break;
2529	case e1000_82576:
2530		pba = E1000_PBA_64K;
2531		break;
2532	case e1000_82580:
2533		pba = E1000_PBA_35K;
2534	default:
2535		break;
2536	}
2537
2538	/* Special needs in case of Jumbo frames */
2539	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2540		u32 tx_space, min_tx, min_rx;
2541		pba = E1000_READ_REG(hw, E1000_PBA);
2542		tx_space = pba >> 16;
2543		pba &= 0xffff;
2544		min_tx = (adapter->max_frame_size +
2545		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2546		min_tx = roundup2(min_tx, 1024);
2547		min_tx >>= 10;
2548                min_rx = adapter->max_frame_size;
2549                min_rx = roundup2(min_rx, 1024);
2550                min_rx >>= 10;
2551		if (tx_space < min_tx &&
2552		    ((min_tx - tx_space) < pba)) {
2553			pba = pba - (min_tx - tx_space);
2554			/*
2555                         * if short on rx space, rx wins
2556                         * and must trump tx adjustment
2557			 */
2558                        if (pba < min_rx)
2559                                pba = min_rx;
2560		}
2561		E1000_WRITE_REG(hw, E1000_PBA, pba);
2562	}
2563
2564	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2565
2566	/*
2567	 * These parameters control the automatic generation (Tx) and
2568	 * response (Rx) to Ethernet PAUSE frames.
2569	 * - High water mark should allow for at least two frames to be
2570	 *   received after sending an XOFF.
2571	 * - Low water mark works best when it is very near the high water mark.
2572	 *   This allows the receiver to restart by sending XON when it has
2573	 *   drained a bit.
2574	 */
2575	hwm = min(((pba << 10) * 9 / 10),
2576	    ((pba << 10) - 2 * adapter->max_frame_size));
2577
2578	if (hw->mac.type < e1000_82576) {
2579		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2580		fc->low_water = fc->high_water - 8;
2581	} else {
2582		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2583		fc->low_water = fc->high_water - 16;
2584	}
2585
2586	fc->pause_time = IGB_FC_PAUSE_TIME;
2587	fc->send_xon = TRUE;
2588
2589	/* Set Flow control, use the tunable location if sane */
2590	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2591		fc->requested_mode = igb_fc_setting;
2592	else
2593		fc->requested_mode = e1000_fc_none;
2594
2595	fc->current_mode = fc->requested_mode;
2596
2597	/* Issue a global reset */
2598	e1000_reset_hw(hw);
2599	E1000_WRITE_REG(hw, E1000_WUC, 0);
2600
2601	if (e1000_init_hw(hw) < 0)
2602		device_printf(dev, "Hardware Initialization Failed\n");
2603
2604	if (hw->mac.type == e1000_82580) {
2605		u32 reg;
2606
2607		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2608		/*
2609		 * 0x80000000 - enable DMA COAL
2610		 * 0x10000000 - use L0s as low power
2611		 * 0x20000000 - use L1 as low power
2612		 * X << 16 - exit dma coal when rx data exceeds X kB
2613		 * Y - upper limit to stay in dma coal in units of 32usecs
2614		 */
2615		E1000_WRITE_REG(hw, E1000_DMACR,
2616		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2617
2618		/* set hwm to PBA -  2 * max frame size */
2619		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2620		/*
2621		 * This sets the time to wait before requesting transition to
2622		 * low power state to number of usecs needed to receive 1 512
2623		 * byte frame at gigabit line rate
2624		 */
2625		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2626
2627		/* free space in tx packet buffer to wake from DMA coal */
2628		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2629		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2630
2631		/* make low power state decision controlled by DMA coal */
2632		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2633		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2634		    reg | E1000_PCIEMISC_LX_DECISION);
2635	}
2636
2637	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2638	e1000_get_phy_info(hw);
2639	e1000_check_for_link(hw);
2640	return;
2641}
2642
2643/*********************************************************************
2644 *
2645 *  Setup networking device structure and register an interface.
2646 *
2647 **********************************************************************/
2648static void
2649igb_setup_interface(device_t dev, struct adapter *adapter)
2650{
2651	struct ifnet   *ifp;
2652
2653	INIT_DEBUGOUT("igb_setup_interface: begin");
2654
2655	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2656	if (ifp == NULL)
2657		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2658	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2659	ifp->if_mtu = ETHERMTU;
2660	ifp->if_init =  igb_init;
2661	ifp->if_softc = adapter;
2662	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2663	ifp->if_ioctl = igb_ioctl;
2664	ifp->if_start = igb_start;
2665#if __FreeBSD_version >= 800000
2666	ifp->if_transmit = igb_mq_start;
2667	ifp->if_qflush = igb_qflush;
2668#endif
2669	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2670	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2671	IFQ_SET_READY(&ifp->if_snd);
2672
2673	ether_ifattach(ifp, adapter->hw.mac.addr);
2674
2675	ifp->if_capabilities = ifp->if_capenable = 0;
2676
2677	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2678	ifp->if_capabilities |= IFCAP_TSO4;
2679	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2680	if (igb_header_split)
2681		ifp->if_capabilities |= IFCAP_LRO;
2682
2683	ifp->if_capenable = ifp->if_capabilities;
2684#ifdef DEVICE_POLLING
2685	ifp->if_capabilities |= IFCAP_POLLING;
2686#endif
2687
2688	/*
2689	 * Tell the upper layer(s) we
2690	 * support full VLAN capability.
2691	 */
2692	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2693	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2694	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2695
2696	/*
2697	** Dont turn this on by default, if vlans are
2698	** created on another pseudo device (eg. lagg)
2699	** then vlan events are not passed thru, breaking
2700	** operation, but with HW FILTER off it works. If
2701	** using vlans directly on the em driver you can
2702	** enable this and get full hardware tag filtering.
2703	*/
2704	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2705
2706	/*
2707	 * Specify the media types supported by this adapter and register
2708	 * callbacks to update media and link information
2709	 */
2710	ifmedia_init(&adapter->media, IFM_IMASK,
2711	    igb_media_change, igb_media_status);
2712	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2713	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2714		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2715			    0, NULL);
2716		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2717	} else {
2718		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2719		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2720			    0, NULL);
2721		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2722			    0, NULL);
2723		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2724			    0, NULL);
2725		if (adapter->hw.phy.type != e1000_phy_ife) {
2726			ifmedia_add(&adapter->media,
2727				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2728			ifmedia_add(&adapter->media,
2729				IFM_ETHER | IFM_1000_T, 0, NULL);
2730		}
2731	}
2732	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2733	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2734}
2735
2736
2737/*
2738 * Manage DMA'able memory.
2739 */
2740static void
2741igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2742{
2743	if (error)
2744		return;
2745	*(bus_addr_t *) arg = segs[0].ds_addr;
2746}
2747
2748static int
2749igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2750        struct igb_dma_alloc *dma, int mapflags)
2751{
2752	int error;
2753
2754	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2755				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2756				BUS_SPACE_MAXADDR,	/* lowaddr */
2757				BUS_SPACE_MAXADDR,	/* highaddr */
2758				NULL, NULL,		/* filter, filterarg */
2759				size,			/* maxsize */
2760				1,			/* nsegments */
2761				size,			/* maxsegsize */
2762				0,			/* flags */
2763				NULL,			/* lockfunc */
2764				NULL,			/* lockarg */
2765				&dma->dma_tag);
2766	if (error) {
2767		device_printf(adapter->dev,
2768		    "%s: bus_dma_tag_create failed: %d\n",
2769		    __func__, error);
2770		goto fail_0;
2771	}
2772
2773	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2774	    BUS_DMA_NOWAIT, &dma->dma_map);
2775	if (error) {
2776		device_printf(adapter->dev,
2777		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2778		    __func__, (uintmax_t)size, error);
2779		goto fail_2;
2780	}
2781
2782	dma->dma_paddr = 0;
2783	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2784	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2785	if (error || dma->dma_paddr == 0) {
2786		device_printf(adapter->dev,
2787		    "%s: bus_dmamap_load failed: %d\n",
2788		    __func__, error);
2789		goto fail_3;
2790	}
2791
2792	return (0);
2793
2794fail_3:
2795	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2796fail_2:
2797	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2798	bus_dma_tag_destroy(dma->dma_tag);
2799fail_0:
2800	dma->dma_map = NULL;
2801	dma->dma_tag = NULL;
2802
2803	return (error);
2804}
2805
2806static void
2807igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2808{
2809	if (dma->dma_tag == NULL)
2810		return;
2811	if (dma->dma_map != NULL) {
2812		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2813		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2814		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2815		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2816		dma->dma_map = NULL;
2817	}
2818	bus_dma_tag_destroy(dma->dma_tag);
2819	dma->dma_tag = NULL;
2820}
2821
2822
2823/*********************************************************************
2824 *
2825 *  Allocate memory for the transmit and receive rings, and then
2826 *  the descriptors associated with each, called only once at attach.
2827 *
2828 **********************************************************************/
2829static int
2830igb_allocate_queues(struct adapter *adapter)
2831{
2832	device_t dev = adapter->dev;
2833	struct igb_queue	*que = NULL;
2834	struct tx_ring		*txr = NULL;
2835	struct rx_ring		*rxr = NULL;
2836	int rsize, tsize, error = E1000_SUCCESS;
2837	int txconf = 0, rxconf = 0;
2838
2839	/* First allocate the top level queue structs */
2840	if (!(adapter->queues =
2841	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2842	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2843		device_printf(dev, "Unable to allocate queue memory\n");
2844		error = ENOMEM;
2845		goto fail;
2846	}
2847
2848	/* Next allocate the TX ring struct memory */
2849	if (!(adapter->tx_rings =
2850	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2851	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2852		device_printf(dev, "Unable to allocate TX ring memory\n");
2853		error = ENOMEM;
2854		goto tx_fail;
2855	}
2856
2857	/* Now allocate the RX */
2858	if (!(adapter->rx_rings =
2859	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2860	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2861		device_printf(dev, "Unable to allocate RX ring memory\n");
2862		error = ENOMEM;
2863		goto rx_fail;
2864	}
2865
2866	tsize = roundup2(adapter->num_tx_desc *
2867	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2868	/*
2869	 * Now set up the TX queues, txconf is needed to handle the
2870	 * possibility that things fail midcourse and we need to
2871	 * undo memory gracefully
2872	 */
2873	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2874		/* Set up some basics */
2875		txr = &adapter->tx_rings[i];
2876		txr->adapter = adapter;
2877		txr->me = i;
2878
2879		/* Initialize the TX lock */
2880		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2881		    device_get_nameunit(dev), txr->me);
2882		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2883
2884		if (igb_dma_malloc(adapter, tsize,
2885			&txr->txdma, BUS_DMA_NOWAIT)) {
2886			device_printf(dev,
2887			    "Unable to allocate TX Descriptor memory\n");
2888			error = ENOMEM;
2889			goto err_tx_desc;
2890		}
2891		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2892		bzero((void *)txr->tx_base, tsize);
2893
2894        	/* Now allocate transmit buffers for the ring */
2895        	if (igb_allocate_transmit_buffers(txr)) {
2896			device_printf(dev,
2897			    "Critical Failure setting up transmit buffers\n");
2898			error = ENOMEM;
2899			goto err_tx_desc;
2900        	}
2901#if __FreeBSD_version >= 800000
2902		/* Allocate a buf ring */
2903		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2904		    M_WAITOK, &txr->tx_mtx);
2905#endif
2906	}
2907
2908	/*
2909	 * Next the RX queues...
2910	 */
2911	rsize = roundup2(adapter->num_rx_desc *
2912	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2913	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2914		rxr = &adapter->rx_rings[i];
2915		rxr->adapter = adapter;
2916		rxr->me = i;
2917
2918		/* Initialize the RX lock */
2919		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2920		    device_get_nameunit(dev), txr->me);
2921		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2922
2923		if (igb_dma_malloc(adapter, rsize,
2924			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2925			device_printf(dev,
2926			    "Unable to allocate RxDescriptor memory\n");
2927			error = ENOMEM;
2928			goto err_rx_desc;
2929		}
2930		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2931		bzero((void *)rxr->rx_base, rsize);
2932
2933        	/* Allocate receive buffers for the ring*/
2934		if (igb_allocate_receive_buffers(rxr)) {
2935			device_printf(dev,
2936			    "Critical Failure setting up receive buffers\n");
2937			error = ENOMEM;
2938			goto err_rx_desc;
2939		}
2940	}
2941
2942	/*
2943	** Finally set up the queue holding structs
2944	*/
2945	for (int i = 0; i < adapter->num_queues; i++) {
2946		que = &adapter->queues[i];
2947		que->adapter = adapter;
2948		que->txr = &adapter->tx_rings[i];
2949		que->rxr = &adapter->rx_rings[i];
2950	}
2951
2952	return (0);
2953
2954err_rx_desc:
2955	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2956		igb_dma_free(adapter, &rxr->rxdma);
2957err_tx_desc:
2958	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2959		igb_dma_free(adapter, &txr->txdma);
2960	free(adapter->rx_rings, M_DEVBUF);
2961rx_fail:
2962	buf_ring_free(txr->br, M_DEVBUF);
2963	free(adapter->tx_rings, M_DEVBUF);
2964tx_fail:
2965	free(adapter->queues, M_DEVBUF);
2966fail:
2967	return (error);
2968}
2969
2970/*********************************************************************
2971 *
2972 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2973 *  the information needed to transmit a packet on the wire. This is
2974 *  called only once at attach, setup is done every reset.
2975 *
2976 **********************************************************************/
2977static int
2978igb_allocate_transmit_buffers(struct tx_ring *txr)
2979{
2980	struct adapter *adapter = txr->adapter;
2981	device_t dev = adapter->dev;
2982	struct igb_tx_buffer *txbuf;
2983	int error, i;
2984
2985	/*
2986	 * Setup DMA descriptor areas.
2987	 */
2988	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2989			       1, 0,			/* alignment, bounds */
2990			       BUS_SPACE_MAXADDR,	/* lowaddr */
2991			       BUS_SPACE_MAXADDR,	/* highaddr */
2992			       NULL, NULL,		/* filter, filterarg */
2993			       IGB_TSO_SIZE,		/* maxsize */
2994			       IGB_MAX_SCATTER,		/* nsegments */
2995			       PAGE_SIZE,		/* maxsegsize */
2996			       0,			/* flags */
2997			       NULL,			/* lockfunc */
2998			       NULL,			/* lockfuncarg */
2999			       &txr->txtag))) {
3000		device_printf(dev,"Unable to allocate TX DMA tag\n");
3001		goto fail;
3002	}
3003
3004	if (!(txr->tx_buffers =
3005	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3006	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3007		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3008		error = ENOMEM;
3009		goto fail;
3010	}
3011
3012        /* Create the descriptor buffer dma maps */
3013	txbuf = txr->tx_buffers;
3014	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3015		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3016		if (error != 0) {
3017			device_printf(dev, "Unable to create TX DMA map\n");
3018			goto fail;
3019		}
3020	}
3021
3022	return 0;
3023fail:
3024	/* We free all, it handles case where we are in the middle */
3025	igb_free_transmit_structures(adapter);
3026	return (error);
3027}
3028
3029/*********************************************************************
3030 *
3031 *  Initialize a transmit ring.
3032 *
3033 **********************************************************************/
3034static void
3035igb_setup_transmit_ring(struct tx_ring *txr)
3036{
3037	struct adapter *adapter = txr->adapter;
3038	struct igb_tx_buffer *txbuf;
3039	int i;
3040
3041	/* Clear the old descriptor contents */
3042	IGB_TX_LOCK(txr);
3043	bzero((void *)txr->tx_base,
3044	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3045	/* Reset indices */
3046	txr->next_avail_desc = 0;
3047	txr->next_to_clean = 0;
3048
3049	/* Free any existing tx buffers. */
3050        txbuf = txr->tx_buffers;
3051	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3052		if (txbuf->m_head != NULL) {
3053			bus_dmamap_sync(txr->txtag, txbuf->map,
3054			    BUS_DMASYNC_POSTWRITE);
3055			bus_dmamap_unload(txr->txtag, txbuf->map);
3056			m_freem(txbuf->m_head);
3057			txbuf->m_head = NULL;
3058		}
3059		/* clear the watch index */
3060		txbuf->next_eop = -1;
3061        }
3062
3063	/* Set number of descriptors available */
3064	txr->tx_avail = adapter->num_tx_desc;
3065
3066	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3067	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3068	IGB_TX_UNLOCK(txr);
3069}
3070
3071/*********************************************************************
3072 *
3073 *  Initialize all transmit rings.
3074 *
3075 **********************************************************************/
3076static void
3077igb_setup_transmit_structures(struct adapter *adapter)
3078{
3079	struct tx_ring *txr = adapter->tx_rings;
3080
3081	for (int i = 0; i < adapter->num_queues; i++, txr++)
3082		igb_setup_transmit_ring(txr);
3083
3084	return;
3085}
3086
3087/*********************************************************************
3088 *
3089 *  Enable transmit unit.
3090 *
3091 **********************************************************************/
3092static void
3093igb_initialize_transmit_units(struct adapter *adapter)
3094{
3095	struct tx_ring	*txr = adapter->tx_rings;
3096	struct e1000_hw *hw = &adapter->hw;
3097	u32		tctl, txdctl;
3098
3099	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3100
3101	/* Setup the Tx Descriptor Rings */
3102	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3103		u64 bus_addr = txr->txdma.dma_paddr;
3104
3105		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3106		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3107		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3108		    (uint32_t)(bus_addr >> 32));
3109		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3110		    (uint32_t)bus_addr);
3111
3112		/* Setup the HW Tx Head and Tail descriptor pointers */
3113		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3114		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3115
3116		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3117		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3118		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3119
3120		txr->watchdog_check = FALSE;
3121
3122		txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3123		txdctl |= IGB_TX_PTHRESH;
3124		txdctl |= IGB_TX_HTHRESH << 8;
3125		txdctl |= IGB_TX_WTHRESH << 16;
3126		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3127		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3128	}
3129
3130	/* Program the Transmit Control Register */
3131	tctl = E1000_READ_REG(hw, E1000_TCTL);
3132	tctl &= ~E1000_TCTL_CT;
3133	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3134		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3135
3136	e1000_config_collision_dist(hw);
3137
3138	/* This write will effectively turn on the transmit unit. */
3139	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3140}
3141
3142/*********************************************************************
3143 *
3144 *  Free all transmit rings.
3145 *
3146 **********************************************************************/
3147static void
3148igb_free_transmit_structures(struct adapter *adapter)
3149{
3150	struct tx_ring *txr = adapter->tx_rings;
3151
3152	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3153		IGB_TX_LOCK(txr);
3154		igb_free_transmit_buffers(txr);
3155		igb_dma_free(adapter, &txr->txdma);
3156		IGB_TX_UNLOCK(txr);
3157		IGB_TX_LOCK_DESTROY(txr);
3158	}
3159	free(adapter->tx_rings, M_DEVBUF);
3160}
3161
3162/*********************************************************************
3163 *
3164 *  Free transmit ring related data structures.
3165 *
3166 **********************************************************************/
3167static void
3168igb_free_transmit_buffers(struct tx_ring *txr)
3169{
3170	struct adapter *adapter = txr->adapter;
3171	struct igb_tx_buffer *tx_buffer;
3172	int             i;
3173
3174	INIT_DEBUGOUT("free_transmit_ring: begin");
3175
3176	if (txr->tx_buffers == NULL)
3177		return;
3178
3179	tx_buffer = txr->tx_buffers;
3180	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3181		if (tx_buffer->m_head != NULL) {
3182			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3183			    BUS_DMASYNC_POSTWRITE);
3184			bus_dmamap_unload(txr->txtag,
3185			    tx_buffer->map);
3186			m_freem(tx_buffer->m_head);
3187			tx_buffer->m_head = NULL;
3188			if (tx_buffer->map != NULL) {
3189				bus_dmamap_destroy(txr->txtag,
3190				    tx_buffer->map);
3191				tx_buffer->map = NULL;
3192			}
3193		} else if (tx_buffer->map != NULL) {
3194			bus_dmamap_unload(txr->txtag,
3195			    tx_buffer->map);
3196			bus_dmamap_destroy(txr->txtag,
3197			    tx_buffer->map);
3198			tx_buffer->map = NULL;
3199		}
3200	}
3201#if __FreeBSD_version >= 800000
3202	if (txr->br != NULL)
3203		buf_ring_free(txr->br, M_DEVBUF);
3204#endif
3205	if (txr->tx_buffers != NULL) {
3206		free(txr->tx_buffers, M_DEVBUF);
3207		txr->tx_buffers = NULL;
3208	}
3209	if (txr->txtag != NULL) {
3210		bus_dma_tag_destroy(txr->txtag);
3211		txr->txtag = NULL;
3212	}
3213	return;
3214}
3215
3216/**********************************************************************
3217 *
3218 *  Setup work for hardware segmentation offload (TSO)
3219 *
3220 **********************************************************************/
3221static boolean_t
3222igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3223{
3224	struct adapter *adapter = txr->adapter;
3225	struct e1000_adv_tx_context_desc *TXD;
3226	struct igb_tx_buffer        *tx_buffer;
3227	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3228	u32 mss_l4len_idx = 0;
3229	u16 vtag = 0;
3230	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3231	struct ether_vlan_header *eh;
3232	struct ip *ip;
3233	struct tcphdr *th;
3234
3235
3236	/*
3237	 * Determine where frame payload starts.
3238	 * Jump over vlan headers if already present
3239	 */
3240	eh = mtod(mp, struct ether_vlan_header *);
3241	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3242		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3243	else
3244		ehdrlen = ETHER_HDR_LEN;
3245
3246	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3247	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3248		return FALSE;
3249
3250	/* Only supports IPV4 for now */
3251	ctxd = txr->next_avail_desc;
3252	tx_buffer = &txr->tx_buffers[ctxd];
3253	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3254
3255	ip = (struct ip *)(mp->m_data + ehdrlen);
3256	if (ip->ip_p != IPPROTO_TCP)
3257                return FALSE;   /* 0 */
3258	ip->ip_sum = 0;
3259	ip_hlen = ip->ip_hl << 2;
3260	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3261	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3262	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3263	tcp_hlen = th->th_off << 2;
3264	/*
3265	 * Calculate header length, this is used
3266	 * in the transmit desc in igb_xmit
3267	 */
3268	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3269
3270	/* VLAN MACLEN IPLEN */
3271	if (mp->m_flags & M_VLANTAG) {
3272		vtag = htole16(mp->m_pkthdr.ether_vtag);
3273		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3274	}
3275
3276	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3277	vlan_macip_lens |= ip_hlen;
3278	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3279
3280	/* ADV DTYPE TUCMD */
3281	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3282	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3283	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3284	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3285
3286	/* MSS L4LEN IDX */
3287	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3288	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3289	/* 82575 needs the queue index added */
3290	if (adapter->hw.mac.type == e1000_82575)
3291		mss_l4len_idx |= txr->me << 4;
3292	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3293
3294	TXD->seqnum_seed = htole32(0);
3295	tx_buffer->m_head = NULL;
3296	tx_buffer->next_eop = -1;
3297
3298	if (++ctxd == adapter->num_tx_desc)
3299		ctxd = 0;
3300
3301	txr->tx_avail--;
3302	txr->next_avail_desc = ctxd;
3303	return TRUE;
3304}
3305
3306
3307/*********************************************************************
3308 *
3309 *  Context Descriptor setup for VLAN or CSUM
3310 *
3311 **********************************************************************/
3312
3313static bool
3314igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3315{
3316	struct adapter *adapter = txr->adapter;
3317	struct e1000_adv_tx_context_desc *TXD;
3318	struct igb_tx_buffer        *tx_buffer;
3319	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3320	struct ether_vlan_header *eh;
3321	struct ip *ip = NULL;
3322	struct ip6_hdr *ip6;
3323	int  ehdrlen, ctxd, ip_hlen = 0;
3324	u16	etype, vtag = 0;
3325	u8	ipproto = 0;
3326	bool	offload = TRUE;
3327
3328	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3329		offload = FALSE;
3330
3331	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3332	ctxd = txr->next_avail_desc;
3333	tx_buffer = &txr->tx_buffers[ctxd];
3334	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3335
3336	/*
3337	** In advanced descriptors the vlan tag must
3338	** be placed into the context descriptor, thus
3339	** we need to be here just for that setup.
3340	*/
3341	if (mp->m_flags & M_VLANTAG) {
3342		vtag = htole16(mp->m_pkthdr.ether_vtag);
3343		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3344	} else if (offload == FALSE)
3345		return FALSE;
3346
3347	/*
3348	 * Determine where frame payload starts.
3349	 * Jump over vlan headers if already present,
3350	 * helpful for QinQ too.
3351	 */
3352	eh = mtod(mp, struct ether_vlan_header *);
3353	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3354		etype = ntohs(eh->evl_proto);
3355		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3356	} else {
3357		etype = ntohs(eh->evl_encap_proto);
3358		ehdrlen = ETHER_HDR_LEN;
3359	}
3360
3361	/* Set the ether header length */
3362	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3363
3364	switch (etype) {
3365		case ETHERTYPE_IP:
3366			ip = (struct ip *)(mp->m_data + ehdrlen);
3367			ip_hlen = ip->ip_hl << 2;
3368			if (mp->m_len < ehdrlen + ip_hlen) {
3369				offload = FALSE;
3370				break;
3371			}
3372			ipproto = ip->ip_p;
3373			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3374			break;
3375		case ETHERTYPE_IPV6:
3376			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3377			ip_hlen = sizeof(struct ip6_hdr);
3378			if (mp->m_len < ehdrlen + ip_hlen)
3379				return (FALSE);
3380			ipproto = ip6->ip6_nxt;
3381			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3382			break;
3383		default:
3384			offload = FALSE;
3385			break;
3386	}
3387
3388	vlan_macip_lens |= ip_hlen;
3389	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3390
3391	switch (ipproto) {
3392		case IPPROTO_TCP:
3393			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3394				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3395			break;
3396		case IPPROTO_UDP:
3397			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3398				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3399			break;
3400#if __FreeBSD_version >= 800000
3401		case IPPROTO_SCTP:
3402			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3403				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3404			break;
3405#endif
3406		default:
3407			offload = FALSE;
3408			break;
3409	}
3410
3411	/* 82575 needs the queue index added */
3412	if (adapter->hw.mac.type == e1000_82575)
3413		mss_l4len_idx = txr->me << 4;
3414
3415	/* Now copy bits into descriptor */
3416	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3417	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3418	TXD->seqnum_seed = htole32(0);
3419	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3420
3421	tx_buffer->m_head = NULL;
3422	tx_buffer->next_eop = -1;
3423
3424	/* We've consumed the first desc, adjust counters */
3425	if (++ctxd == adapter->num_tx_desc)
3426		ctxd = 0;
3427	txr->next_avail_desc = ctxd;
3428	--txr->tx_avail;
3429
3430        return (offload);
3431}
3432
3433
3434/**********************************************************************
3435 *
3436 *  Examine each tx_buffer in the used queue. If the hardware is done
3437 *  processing the packet then free associated resources. The
3438 *  tx_buffer is put back on the free queue.
3439 *
3440 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3441 **********************************************************************/
3442static bool
3443igb_txeof(struct tx_ring *txr)
3444{
3445	struct adapter	*adapter = txr->adapter;
3446        int first, last, done;
3447        struct igb_tx_buffer *tx_buffer;
3448        struct e1000_tx_desc   *tx_desc, *eop_desc;
3449	struct ifnet   *ifp = adapter->ifp;
3450
3451	IGB_TX_LOCK_ASSERT(txr);
3452
3453        if (txr->tx_avail == adapter->num_tx_desc)
3454                return FALSE;
3455
3456        first = txr->next_to_clean;
3457        tx_desc = &txr->tx_base[first];
3458        tx_buffer = &txr->tx_buffers[first];
3459	last = tx_buffer->next_eop;
3460        eop_desc = &txr->tx_base[last];
3461
3462	/*
3463	 * What this does is get the index of the
3464	 * first descriptor AFTER the EOP of the
3465	 * first packet, that way we can do the
3466	 * simple comparison on the inner while loop.
3467	 */
3468	if (++last == adapter->num_tx_desc)
3469 		last = 0;
3470	done = last;
3471
3472        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3473            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3474
3475        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3476		/* We clean the range of the packet */
3477		while (first != done) {
3478                	tx_desc->upper.data = 0;
3479                	tx_desc->lower.data = 0;
3480                	tx_desc->buffer_addr = 0;
3481                	++txr->tx_avail;
3482
3483			if (tx_buffer->m_head) {
3484				txr->bytes +=
3485				    tx_buffer->m_head->m_pkthdr.len;
3486				bus_dmamap_sync(txr->txtag,
3487				    tx_buffer->map,
3488				    BUS_DMASYNC_POSTWRITE);
3489				bus_dmamap_unload(txr->txtag,
3490				    tx_buffer->map);
3491
3492                        	m_freem(tx_buffer->m_head);
3493                        	tx_buffer->m_head = NULL;
3494                	}
3495			tx_buffer->next_eop = -1;
3496			txr->watchdog_time = ticks;
3497
3498	                if (++first == adapter->num_tx_desc)
3499				first = 0;
3500
3501	                tx_buffer = &txr->tx_buffers[first];
3502			tx_desc = &txr->tx_base[first];
3503		}
3504		++txr->packets;
3505		++ifp->if_opackets;
3506		/* See if we can continue to the next packet */
3507		last = tx_buffer->next_eop;
3508		if (last != -1) {
3509        		eop_desc = &txr->tx_base[last];
3510			/* Get new done point */
3511			if (++last == adapter->num_tx_desc) last = 0;
3512			done = last;
3513		} else
3514			break;
3515        }
3516        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3517            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3518
3519        txr->next_to_clean = first;
3520
3521        /*
3522         * If we have enough room, clear IFF_DRV_OACTIVE
3523         * to tell the stack that it is OK to send packets.
3524         */
3525        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3526                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3527		/* All clean, turn off the watchdog */
3528                if (txr->tx_avail == adapter->num_tx_desc) {
3529			txr->watchdog_check = FALSE;
3530			return FALSE;
3531		}
3532        }
3533
3534	return (TRUE);
3535}
3536
3537
3538/*********************************************************************
3539 *
3540 *  Refresh mbuf buffers for RX descriptor rings
3541 *   - now keeps its own state so discards due to resource
3542 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3543 *     it just returns, keeping its placeholder, thus it can simply
3544 *     be recalled to try again.
3545 *
3546 **********************************************************************/
3547static void
3548igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3549{
3550	struct adapter		*adapter = rxr->adapter;
3551	bus_dma_segment_t	hseg[1];
3552	bus_dma_segment_t	pseg[1];
3553	struct igb_rx_buf	*rxbuf;
3554	struct mbuf		*mh, *mp;
3555	int			i, nsegs, error, cleaned;
3556
3557	i = rxr->next_to_refresh;
3558	cleaned = -1; /* Signify no completions */
3559	while (i != limit) {
3560		rxbuf = &rxr->rx_buffers[i];
3561		if (rxbuf->m_head == NULL) {
3562			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3563			if (mh == NULL)
3564				goto update;
3565			mh->m_pkthdr.len = mh->m_len = MHLEN;
3566			mh->m_len = MHLEN;
3567			mh->m_flags |= M_PKTHDR;
3568			m_adj(mh, ETHER_ALIGN);
3569			/* Get the memory mapping */
3570			error = bus_dmamap_load_mbuf_sg(rxr->htag,
3571			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3572			if (error != 0) {
3573				printf("GET BUF: dmamap load"
3574				    " failure - %d\n", error);
3575				m_free(mh);
3576				goto update;
3577			}
3578			rxbuf->m_head = mh;
3579			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3580			    BUS_DMASYNC_PREREAD);
3581			rxr->rx_base[i].read.hdr_addr =
3582			    htole64(hseg[0].ds_addr);
3583		}
3584
3585		if (rxbuf->m_pack == NULL) {
3586			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3587			    M_PKTHDR, adapter->rx_mbuf_sz);
3588			if (mp == NULL)
3589				goto update;
3590			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3591			/* Get the memory mapping */
3592			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3593			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3594			if (error != 0) {
3595				printf("GET BUF: dmamap load"
3596				    " failure - %d\n", error);
3597				m_free(mp);
3598				goto update;
3599			}
3600			rxbuf->m_pack = mp;
3601			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3602			    BUS_DMASYNC_PREREAD);
3603			rxr->rx_base[i].read.pkt_addr =
3604			    htole64(pseg[0].ds_addr);
3605		}
3606
3607		cleaned = i;
3608		/* Calculate next index */
3609		if (++i == adapter->num_rx_desc)
3610			i = 0;
3611		/* This is the work marker for refresh */
3612		rxr->next_to_refresh = i;
3613	}
3614update:
3615	if (cleaned != -1) /* If we refreshed some, bump tail */
3616		E1000_WRITE_REG(&adapter->hw,
3617		    E1000_RDT(rxr->me), cleaned);
3618	return;
3619}
3620
3621
3622/*********************************************************************
3623 *
3624 *  Allocate memory for rx_buffer structures. Since we use one
3625 *  rx_buffer per received packet, the maximum number of rx_buffer's
3626 *  that we'll need is equal to the number of receive descriptors
3627 *  that we've allocated.
3628 *
3629 **********************************************************************/
3630static int
3631igb_allocate_receive_buffers(struct rx_ring *rxr)
3632{
3633	struct	adapter 	*adapter = rxr->adapter;
3634	device_t 		dev = adapter->dev;
3635	struct igb_rx_buf	*rxbuf;
3636	int             	i, bsize, error;
3637
3638	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3639	if (!(rxr->rx_buffers =
3640	    (struct igb_rx_buf *) malloc(bsize,
3641	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3642		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3643		error = ENOMEM;
3644		goto fail;
3645	}
3646
3647	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3648				   1, 0,		/* alignment, bounds */
3649				   BUS_SPACE_MAXADDR,	/* lowaddr */
3650				   BUS_SPACE_MAXADDR,	/* highaddr */
3651				   NULL, NULL,		/* filter, filterarg */
3652				   MSIZE,		/* maxsize */
3653				   1,			/* nsegments */
3654				   MSIZE,		/* maxsegsize */
3655				   0,			/* flags */
3656				   NULL,		/* lockfunc */
3657				   NULL,		/* lockfuncarg */
3658				   &rxr->htag))) {
3659		device_printf(dev, "Unable to create RX DMA tag\n");
3660		goto fail;
3661	}
3662
3663	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3664				   1, 0,		/* alignment, bounds */
3665				   BUS_SPACE_MAXADDR,	/* lowaddr */
3666				   BUS_SPACE_MAXADDR,	/* highaddr */
3667				   NULL, NULL,		/* filter, filterarg */
3668				   MJUMPAGESIZE,	/* maxsize */
3669				   1,			/* nsegments */
3670				   MJUMPAGESIZE,	/* maxsegsize */
3671				   0,			/* flags */
3672				   NULL,		/* lockfunc */
3673				   NULL,		/* lockfuncarg */
3674				   &rxr->ptag))) {
3675		device_printf(dev, "Unable to create RX payload DMA tag\n");
3676		goto fail;
3677	}
3678
3679	for (i = 0; i < adapter->num_rx_desc; i++) {
3680		rxbuf = &rxr->rx_buffers[i];
3681		error = bus_dmamap_create(rxr->htag,
3682		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3683		if (error) {
3684			device_printf(dev,
3685			    "Unable to create RX head DMA maps\n");
3686			goto fail;
3687		}
3688		error = bus_dmamap_create(rxr->ptag,
3689		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3690		if (error) {
3691			device_printf(dev,
3692			    "Unable to create RX packet DMA maps\n");
3693			goto fail;
3694		}
3695	}
3696
3697	return (0);
3698
3699fail:
3700	/* Frees all, but can handle partial completion */
3701	igb_free_receive_structures(adapter);
3702	return (error);
3703}
3704
3705
3706static void
3707igb_free_receive_ring(struct rx_ring *rxr)
3708{
3709	struct	adapter		*adapter;
3710	struct igb_rx_buf	*rxbuf;
3711	int i;
3712
3713	adapter = rxr->adapter;
3714	for (i = 0; i < adapter->num_rx_desc; i++) {
3715		rxbuf = &rxr->rx_buffers[i];
3716		if (rxbuf->m_head != NULL) {
3717			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3718			    BUS_DMASYNC_POSTREAD);
3719			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3720			rxbuf->m_head->m_flags |= M_PKTHDR;
3721			m_freem(rxbuf->m_head);
3722		}
3723		if (rxbuf->m_pack != NULL) {
3724			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3725			    BUS_DMASYNC_POSTREAD);
3726			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3727			rxbuf->m_pack->m_flags |= M_PKTHDR;
3728			m_freem(rxbuf->m_pack);
3729		}
3730		rxbuf->m_head = NULL;
3731		rxbuf->m_pack = NULL;
3732	}
3733}
3734
3735
3736/*********************************************************************
3737 *
3738 *  Initialize a receive ring and its buffers.
3739 *
3740 **********************************************************************/
3741static int
3742igb_setup_receive_ring(struct rx_ring *rxr)
3743{
3744	struct	adapter		*adapter;
3745	struct  ifnet		*ifp;
3746	device_t		dev;
3747	struct igb_rx_buf	*rxbuf;
3748	bus_dma_segment_t	pseg[1], hseg[1];
3749	struct lro_ctrl		*lro = &rxr->lro;
3750	int			rsize, nsegs, error = 0;
3751
3752	adapter = rxr->adapter;
3753	dev = adapter->dev;
3754	ifp = adapter->ifp;
3755
3756	/* Clear the ring contents */
3757	IGB_RX_LOCK(rxr);
3758	rsize = roundup2(adapter->num_rx_desc *
3759	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3760	bzero((void *)rxr->rx_base, rsize);
3761
3762	/*
3763	** Free current RX buffer structures and their mbufs
3764	*/
3765	igb_free_receive_ring(rxr);
3766
3767        /* Now replenish the ring mbufs */
3768	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3769		struct mbuf	*mh, *mp;
3770
3771		rxbuf = &rxr->rx_buffers[j];
3772
3773		/* First the header */
3774		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3775		if (rxbuf->m_head == NULL)
3776                        goto fail;
3777		m_adj(rxbuf->m_head, ETHER_ALIGN);
3778		mh = rxbuf->m_head;
3779		mh->m_len = mh->m_pkthdr.len = MHLEN;
3780		mh->m_flags |= M_PKTHDR;
3781		/* Get the memory mapping */
3782		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3783		    rxbuf->hmap, rxbuf->m_head, hseg,
3784		    &nsegs, BUS_DMA_NOWAIT);
3785		if (error != 0) /* Nothing elegant to do here */
3786                        goto fail;
3787		bus_dmamap_sync(rxr->htag,
3788		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3789		/* Update descriptor */
3790		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3791
3792		/* Now the payload cluster */
3793		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3794		    M_PKTHDR, adapter->rx_mbuf_sz);
3795		if (rxbuf->m_pack == NULL)
3796                        goto fail;
3797		mp = rxbuf->m_pack;
3798		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3799		/* Get the memory mapping */
3800		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3801		    rxbuf->pmap, mp, pseg,
3802		    &nsegs, BUS_DMA_NOWAIT);
3803		if (error != 0)
3804                        goto fail;
3805		bus_dmamap_sync(rxr->ptag,
3806		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3807		/* Update descriptor */
3808		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3809        }
3810
3811	/* Setup our descriptor indices */
3812	rxr->next_to_check = 0;
3813	rxr->next_to_refresh = 0;
3814	rxr->lro_enabled = FALSE;
3815
3816	if (igb_header_split)
3817		rxr->hdr_split = TRUE;
3818	else
3819		ifp->if_capabilities &= ~IFCAP_LRO;
3820
3821	rxr->fmp = NULL;
3822	rxr->lmp = NULL;
3823	rxr->discard = FALSE;
3824
3825	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3826	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3827
3828	/*
3829	** Now set up the LRO interface, we
3830	** also only do head split when LRO
3831	** is enabled, since so often they
3832	** are undesireable in similar setups.
3833	*/
3834	if (ifp->if_capenable & IFCAP_LRO) {
3835		int err = tcp_lro_init(lro);
3836		if (err) {
3837			device_printf(dev, "LRO Initialization failed!\n");
3838			goto fail;
3839		}
3840		INIT_DEBUGOUT("RX LRO Initialized\n");
3841		rxr->lro_enabled = TRUE;
3842		lro->ifp = adapter->ifp;
3843	}
3844
3845	IGB_RX_UNLOCK(rxr);
3846	return (0);
3847
3848fail:
3849	igb_free_receive_ring(rxr);
3850	IGB_RX_UNLOCK(rxr);
3851	return (error);
3852}
3853
3854/*********************************************************************
3855 *
3856 *  Initialize all receive rings.
3857 *
3858 **********************************************************************/
3859static int
3860igb_setup_receive_structures(struct adapter *adapter)
3861{
3862	struct rx_ring *rxr = adapter->rx_rings;
3863	int i, j;
3864
3865	for (i = 0; i < adapter->num_queues; i++, rxr++)
3866		if (igb_setup_receive_ring(rxr))
3867			goto fail;
3868
3869	return (0);
3870fail:
3871	/*
3872	 * Free RX buffers allocated so far, we will only handle
3873	 * the rings that completed, the failing case will have
3874	 * cleaned up for itself. The value of 'i' will be the
3875	 * failed ring so we must pre-decrement it.
3876	 */
3877	rxr = adapter->rx_rings;
3878	for (--i; i > 0; i--, rxr++) {
3879		for (j = 0; j < adapter->num_rx_desc; j++)
3880			igb_free_receive_ring(rxr);
3881	}
3882
3883	return (ENOBUFS);
3884}
3885
3886/*********************************************************************
3887 *
3888 *  Enable receive unit.
3889 *
3890 **********************************************************************/
3891static void
3892igb_initialize_receive_units(struct adapter *adapter)
3893{
3894	struct rx_ring	*rxr = adapter->rx_rings;
3895	struct ifnet	*ifp = adapter->ifp;
3896	struct e1000_hw *hw = &adapter->hw;
3897	u32		rctl, rxcsum, psize, srrctl = 0;
3898
3899	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3900
3901	/*
3902	 * Make sure receives are disabled while setting
3903	 * up the descriptor ring
3904	 */
3905	rctl = E1000_READ_REG(hw, E1000_RCTL);
3906	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3907
3908	/*
3909	** Set up for header split
3910	*/
3911	if (rxr->hdr_split) {
3912		/* Use a standard mbuf for the header */
3913		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3914		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3915	} else
3916		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3917
3918	/*
3919	** Set up for jumbo frames
3920	*/
3921	if (ifp->if_mtu > ETHERMTU) {
3922		rctl |= E1000_RCTL_LPE;
3923		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3924		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3925
3926		/* Set maximum packet len */
3927		psize = adapter->max_frame_size;
3928		/* are we on a vlan? */
3929		if (adapter->ifp->if_vlantrunk != NULL)
3930			psize += VLAN_TAG_SIZE;
3931		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3932	} else {
3933		rctl &= ~E1000_RCTL_LPE;
3934		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3935		rctl |= E1000_RCTL_SZ_2048;
3936	}
3937
3938	/* Setup the Base and Length of the Rx Descriptor Rings */
3939	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3940		u64 bus_addr = rxr->rxdma.dma_paddr;
3941		u32 rxdctl;
3942
3943		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3944		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3945		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3946		    (uint32_t)(bus_addr >> 32));
3947		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3948		    (uint32_t)bus_addr);
3949		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3950		/* Enable this Queue */
3951		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3952		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3953		rxdctl &= 0xFFF00000;
3954		rxdctl |= IGB_RX_PTHRESH;
3955		rxdctl |= IGB_RX_HTHRESH << 8;
3956		rxdctl |= IGB_RX_WTHRESH << 16;
3957		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3958	}
3959
3960	/*
3961	** Setup for RX MultiQueue
3962	*/
3963	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3964	if (adapter->num_queues >1) {
3965		u32 random[10], mrqc, shift = 0;
3966		union igb_reta {
3967			u32 dword;
3968			u8  bytes[4];
3969		} reta;
3970
3971		arc4rand(&random, sizeof(random), 0);
3972		if (adapter->hw.mac.type == e1000_82575)
3973			shift = 6;
3974		/* Warning FM follows */
3975		for (int i = 0; i < 128; i++) {
3976			reta.bytes[i & 3] =
3977			    (i % adapter->num_queues) << shift;
3978			if ((i & 3) == 3)
3979				E1000_WRITE_REG(hw,
3980				    E1000_RETA(i >> 2), reta.dword);
3981		}
3982		/* Now fill in hash table */
3983		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3984		for (int i = 0; i < 10; i++)
3985			E1000_WRITE_REG_ARRAY(hw,
3986			    E1000_RSSRK(0), i, random[i]);
3987
3988		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3989		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3990		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3991		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3992		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3993		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3994		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3995		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3996
3997		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3998
3999		/*
4000		** NOTE: Receive Full-Packet Checksum Offload
4001		** is mutually exclusive with Multiqueue. However
4002		** this is not the same as TCP/IP checksums which
4003		** still work.
4004		*/
4005		rxcsum |= E1000_RXCSUM_PCSD;
4006#if __FreeBSD_version >= 800000
4007		/* For SCTP Offload */
4008		if ((hw->mac.type == e1000_82576)
4009		    && (ifp->if_capenable & IFCAP_RXCSUM))
4010			rxcsum |= E1000_RXCSUM_CRCOFL;
4011#endif
4012	} else {
4013		/* Non RSS setup */
4014		if (ifp->if_capenable & IFCAP_RXCSUM) {
4015			rxcsum |= E1000_RXCSUM_IPPCSE;
4016#if __FreeBSD_version >= 800000
4017			if (adapter->hw.mac.type == e1000_82576)
4018				rxcsum |= E1000_RXCSUM_CRCOFL;
4019#endif
4020		} else
4021			rxcsum &= ~E1000_RXCSUM_TUOFL;
4022	}
4023	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4024
4025	/* Setup the Receive Control Register */
4026	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4027	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4028		   E1000_RCTL_RDMTS_HALF |
4029		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4030	/* Strip CRC bytes. */
4031	rctl |= E1000_RCTL_SECRC;
4032	/* Make sure VLAN Filters are off */
4033	rctl &= ~E1000_RCTL_VFE;
4034	/* Don't store bad packets */
4035	rctl &= ~E1000_RCTL_SBP;
4036
4037	/* Enable Receives */
4038	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4039
4040	/*
4041	 * Setup the HW Rx Head and Tail Descriptor Pointers
4042	 *   - needs to be after enable
4043	 */
4044	for (int i = 0; i < adapter->num_queues; i++) {
4045		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4046		E1000_WRITE_REG(hw, E1000_RDT(i),
4047		     adapter->num_rx_desc - 1);
4048	}
4049	return;
4050}
4051
4052/*********************************************************************
4053 *
4054 *  Free receive rings.
4055 *
4056 **********************************************************************/
4057static void
4058igb_free_receive_structures(struct adapter *adapter)
4059{
4060	struct rx_ring *rxr = adapter->rx_rings;
4061
4062	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4063		struct lro_ctrl	*lro = &rxr->lro;
4064		igb_free_receive_buffers(rxr);
4065		tcp_lro_free(lro);
4066		igb_dma_free(adapter, &rxr->rxdma);
4067	}
4068
4069	free(adapter->rx_rings, M_DEVBUF);
4070}
4071
4072/*********************************************************************
4073 *
4074 *  Free receive ring data structures.
4075 *
4076 **********************************************************************/
4077static void
4078igb_free_receive_buffers(struct rx_ring *rxr)
4079{
4080	struct adapter		*adapter = rxr->adapter;
4081	struct igb_rx_buf	*rxbuf;
4082	int i;
4083
4084	INIT_DEBUGOUT("free_receive_structures: begin");
4085
4086	/* Cleanup any existing buffers */
4087	if (rxr->rx_buffers != NULL) {
4088		for (i = 0; i < adapter->num_rx_desc; i++) {
4089			rxbuf = &rxr->rx_buffers[i];
4090			if (rxbuf->m_head != NULL) {
4091				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4092				    BUS_DMASYNC_POSTREAD);
4093				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4094				rxbuf->m_head->m_flags |= M_PKTHDR;
4095				m_freem(rxbuf->m_head);
4096			}
4097			if (rxbuf->m_pack != NULL) {
4098				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4099				    BUS_DMASYNC_POSTREAD);
4100				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4101				rxbuf->m_pack->m_flags |= M_PKTHDR;
4102				m_freem(rxbuf->m_pack);
4103			}
4104			rxbuf->m_head = NULL;
4105			rxbuf->m_pack = NULL;
4106			if (rxbuf->hmap != NULL) {
4107				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4108				rxbuf->hmap = NULL;
4109			}
4110			if (rxbuf->pmap != NULL) {
4111				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4112				rxbuf->pmap = NULL;
4113			}
4114		}
4115		if (rxr->rx_buffers != NULL) {
4116			free(rxr->rx_buffers, M_DEVBUF);
4117			rxr->rx_buffers = NULL;
4118		}
4119	}
4120
4121	if (rxr->htag != NULL) {
4122		bus_dma_tag_destroy(rxr->htag);
4123		rxr->htag = NULL;
4124	}
4125	if (rxr->ptag != NULL) {
4126		bus_dma_tag_destroy(rxr->ptag);
4127		rxr->ptag = NULL;
4128	}
4129}
4130
4131static __inline void
4132igb_rx_discard(struct rx_ring *rxr, int i)
4133{
4134	struct adapter		*adapter = rxr->adapter;
4135	struct igb_rx_buf	*rbuf;
4136	struct mbuf             *mh, *mp;
4137
4138	rbuf = &rxr->rx_buffers[i];
4139	if (rxr->fmp != NULL) {
4140		rxr->fmp->m_flags |= M_PKTHDR;
4141		m_freem(rxr->fmp);
4142		rxr->fmp = NULL;
4143		rxr->lmp = NULL;
4144	}
4145
4146	mh = rbuf->m_head;
4147	mp = rbuf->m_pack;
4148
4149	/* Reuse loaded DMA map and just update mbuf chain */
4150	mh->m_len = MHLEN;
4151	mh->m_flags |= M_PKTHDR;
4152	mh->m_next = NULL;
4153
4154	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4155	mp->m_data = mp->m_ext.ext_buf;
4156	mp->m_next = NULL;
4157	return;
4158}
4159
4160static __inline void
4161igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4162{
4163
4164	/*
4165	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4166	 * should be computed by hardware. Also it should not have VLAN tag in
4167	 * ethernet header.
4168	 */
4169	if (rxr->lro_enabled &&
4170	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4171	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4172	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4173	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4174	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4175	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4176		/*
4177		 * Send to the stack if:
4178		 **  - LRO not enabled, or
4179		 **  - no LRO resources, or
4180		 **  - lro enqueue fails
4181		 */
4182		if (rxr->lro.lro_cnt != 0)
4183			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4184				return;
4185	}
4186	(*ifp->if_input)(ifp, m);
4187}
4188
4189/*********************************************************************
4190 *
4191 *  This routine executes in interrupt context. It replenishes
4192 *  the mbufs in the descriptor and sends data which has been
4193 *  dma'ed into host memory to upper layer.
4194 *
4195 *  We loop at most count times if count is > 0, or until done if
4196 *  count < 0.
4197 *
4198 *  Return TRUE if more to clean, FALSE otherwise
4199 *********************************************************************/
4200static bool
4201igb_rxeof(struct igb_queue *que, int count)
4202{
4203	struct adapter		*adapter = que->adapter;
4204	struct rx_ring		*rxr = que->rxr;
4205	struct ifnet		*ifp = adapter->ifp;
4206	struct lro_ctrl		*lro = &rxr->lro;
4207	struct lro_entry	*queued;
4208	int			i, processed = 0;
4209	u32			ptype, staterr = 0;
4210	union e1000_adv_rx_desc	*cur;
4211
4212	IGB_RX_LOCK(rxr);
4213	/* Sync the ring. */
4214	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4215	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4216
4217	/* Main clean loop */
4218	for (i = rxr->next_to_check; count != 0;) {
4219		struct mbuf		*sendmp, *mh, *mp;
4220		struct igb_rx_buf	*rxbuf;
4221		u16			hlen, plen, hdr, vtag;
4222		bool			eop = FALSE;
4223
4224		cur = &rxr->rx_base[i];
4225		staterr = le32toh(cur->wb.upper.status_error);
4226		if ((staterr & E1000_RXD_STAT_DD) == 0)
4227			break;
4228		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4229			break;
4230		count--;
4231		sendmp = mh = mp = NULL;
4232		cur->wb.upper.status_error = 0;
4233		rxbuf = &rxr->rx_buffers[i];
4234		plen = le16toh(cur->wb.upper.length);
4235		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4236		vtag = le16toh(cur->wb.upper.vlan);
4237		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4238		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4239
4240		/* Make sure all segments of a bad packet are discarded */
4241		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4242		    (rxr->discard)) {
4243			ifp->if_ierrors++;
4244			++rxr->rx_discarded;
4245			if (!eop) /* Catch subsequent segs */
4246				rxr->discard = TRUE;
4247			else
4248				rxr->discard = FALSE;
4249			igb_rx_discard(rxr, i);
4250			goto next_desc;
4251		}
4252
4253		/*
4254		** The way the hardware is configured to
4255		** split, it will ONLY use the header buffer
4256		** when header split is enabled, otherwise we
4257		** get normal behavior, ie, both header and
4258		** payload are DMA'd into the payload buffer.
4259		**
4260		** The fmp test is to catch the case where a
4261		** packet spans multiple descriptors, in that
4262		** case only the first header is valid.
4263		*/
4264		if (rxr->hdr_split && rxr->fmp == NULL) {
4265			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4266			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4267			if (hlen > IGB_HDR_BUF)
4268				hlen = IGB_HDR_BUF;
4269			/* Handle the header mbuf */
4270			mh = rxr->rx_buffers[i].m_head;
4271			mh->m_len = hlen;
4272			/* clear buf info for refresh */
4273			rxbuf->m_head = NULL;
4274			/*
4275			** Get the payload length, this
4276			** could be zero if its a small
4277			** packet.
4278			*/
4279			if (plen > 0) {
4280				mp = rxr->rx_buffers[i].m_pack;
4281				mp->m_len = plen;
4282				mh->m_next = mp;
4283				/* clear buf info for refresh */
4284				rxbuf->m_pack = NULL;
4285				rxr->rx_split_packets++;
4286			}
4287		} else {
4288			/*
4289			** Either no header split, or a
4290			** secondary piece of a fragmented
4291			** split packet.
4292			*/
4293			mh = rxr->rx_buffers[i].m_pack;
4294			mh->m_len = plen;
4295			/* clear buf info for refresh */
4296			rxbuf->m_pack = NULL;
4297		}
4298
4299		++processed; /* So we know when to refresh */
4300
4301		/* Initial frame - setup */
4302		if (rxr->fmp == NULL) {
4303			mh->m_pkthdr.len = mh->m_len;
4304			/* Store the first mbuf */
4305			rxr->fmp = mh;
4306			rxr->lmp = mh;
4307			if (mp != NULL) {
4308				/* Add payload if split */
4309				mh->m_pkthdr.len += mp->m_len;
4310				rxr->lmp = mh->m_next;
4311			}
4312		} else {
4313			/* Chain mbuf's together */
4314			rxr->lmp->m_next = mh;
4315			rxr->lmp = rxr->lmp->m_next;
4316			rxr->fmp->m_pkthdr.len += mh->m_len;
4317		}
4318
4319		if (eop) {
4320			rxr->fmp->m_pkthdr.rcvif = ifp;
4321			ifp->if_ipackets++;
4322			rxr->rx_packets++;
4323			/* capture data for AIM */
4324			rxr->packets++;
4325			rxr->bytes += rxr->fmp->m_pkthdr.len;
4326			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4327
4328			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4329				igb_rx_checksum(staterr, rxr->fmp, ptype);
4330
4331			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4332			    (staterr & E1000_RXD_STAT_VP) != 0) {
4333				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4334				rxr->fmp->m_flags |= M_VLANTAG;
4335			}
4336#if __FreeBSD_version >= 800000
4337			rxr->fmp->m_pkthdr.flowid = que->msix;
4338			rxr->fmp->m_flags |= M_FLOWID;
4339#endif
4340			sendmp = rxr->fmp;
4341			/* Make sure to set M_PKTHDR. */
4342			sendmp->m_flags |= M_PKTHDR;
4343			rxr->fmp = NULL;
4344			rxr->lmp = NULL;
4345		}
4346
4347next_desc:
4348		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4349		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4350
4351		/* Advance our pointers to the next descriptor. */
4352		if (++i == adapter->num_rx_desc)
4353			i = 0;
4354		/*
4355		** Send to the stack or LRO
4356		*/
4357		if (sendmp != NULL)
4358			igb_rx_input(rxr, ifp, sendmp, ptype);
4359
4360		/* Every 8 descriptors we go to refresh mbufs */
4361		if (processed == 8) {
4362                        igb_refresh_mbufs(rxr, i);
4363                        processed = 0;
4364		}
4365	}
4366
4367	/* Catch any remainders */
4368	if (processed != 0) {
4369		igb_refresh_mbufs(rxr, i);
4370		processed = 0;
4371	}
4372
4373	rxr->next_to_check = i;
4374
4375	/*
4376	 * Flush any outstanding LRO work
4377	 */
4378	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4379		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4380		tcp_lro_flush(lro, queued);
4381	}
4382
4383	IGB_RX_UNLOCK(rxr);
4384
4385	/*
4386	** We still have cleaning to do?
4387	** Schedule another interrupt if so.
4388	*/
4389	if ((staterr & E1000_RXD_STAT_DD) != 0)
4390		return (TRUE);
4391
4392	return (FALSE);
4393}
4394
4395/*********************************************************************
4396 *
4397 *  Verify that the hardware indicated that the checksum is valid.
4398 *  Inform the stack about the status of checksum so that stack
4399 *  doesn't spend time verifying the checksum.
4400 *
4401 *********************************************************************/
4402static void
4403igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4404{
4405	u16 status = (u16)staterr;
4406	u8  errors = (u8) (staterr >> 24);
4407	int sctp;
4408
4409	/* Ignore Checksum bit is set */
4410	if (status & E1000_RXD_STAT_IXSM) {
4411		mp->m_pkthdr.csum_flags = 0;
4412		return;
4413	}
4414
4415	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4416	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4417		sctp = 1;
4418	else
4419		sctp = 0;
4420	if (status & E1000_RXD_STAT_IPCS) {
4421		/* Did it pass? */
4422		if (!(errors & E1000_RXD_ERR_IPE)) {
4423			/* IP Checksum Good */
4424			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4425			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4426		} else
4427			mp->m_pkthdr.csum_flags = 0;
4428	}
4429
4430	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4431		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4432#if __FreeBSD_version >= 800000
4433		if (sctp) /* reassign */
4434			type = CSUM_SCTP_VALID;
4435#endif
4436		/* Did it pass? */
4437		if (!(errors & E1000_RXD_ERR_TCPE)) {
4438			mp->m_pkthdr.csum_flags |= type;
4439			if (sctp == 0)
4440				mp->m_pkthdr.csum_data = htons(0xffff);
4441		}
4442	}
4443	return;
4444}
4445
4446/*
4447 * This routine is run via an vlan
4448 * config EVENT
4449 */
4450static void
4451igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4452{
4453	struct adapter	*adapter = ifp->if_softc;
4454	u32		index, bit;
4455
4456	if (ifp->if_softc !=  arg)   /* Not our event */
4457		return;
4458
4459	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4460                return;
4461
4462	index = (vtag >> 5) & 0x7F;
4463	bit = vtag & 0x1F;
4464	igb_shadow_vfta[index] |= (1 << bit);
4465	++adapter->num_vlans;
4466	/* Re-init to load the changes */
4467	igb_init(adapter);
4468}
4469
4470/*
4471 * This routine is run via an vlan
4472 * unconfig EVENT
4473 */
4474static void
4475igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4476{
4477	struct adapter	*adapter = ifp->if_softc;
4478	u32		index, bit;
4479
4480	if (ifp->if_softc !=  arg)
4481		return;
4482
4483	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4484                return;
4485
4486	index = (vtag >> 5) & 0x7F;
4487	bit = vtag & 0x1F;
4488	igb_shadow_vfta[index] &= ~(1 << bit);
4489	--adapter->num_vlans;
4490	/* Re-init to load the changes */
4491	igb_init(adapter);
4492}
4493
4494static void
4495igb_setup_vlan_hw_support(struct adapter *adapter)
4496{
4497	struct e1000_hw *hw = &adapter->hw;
4498	u32             reg;
4499
4500	/*
4501	** We get here thru init_locked, meaning
4502	** a soft reset, this has already cleared
4503	** the VFTA and other state, so if there
4504	** have been no vlan's registered do nothing.
4505	*/
4506	if (adapter->num_vlans == 0)
4507                return;
4508
4509	/*
4510	** A soft reset zero's out the VFTA, so
4511	** we need to repopulate it now.
4512	*/
4513	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4514                if (igb_shadow_vfta[i] != 0)
4515			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4516                            i, igb_shadow_vfta[i]);
4517
4518	reg = E1000_READ_REG(hw, E1000_CTRL);
4519	reg |= E1000_CTRL_VME;
4520	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4521
4522	/* Enable the Filter Table */
4523	reg = E1000_READ_REG(hw, E1000_RCTL);
4524	reg &= ~E1000_RCTL_CFIEN;
4525	reg |= E1000_RCTL_VFE;
4526	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4527
4528	/* Update the frame size */
4529	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4530	    adapter->max_frame_size + VLAN_TAG_SIZE);
4531}
4532
4533static void
4534igb_enable_intr(struct adapter *adapter)
4535{
4536	/* With RSS set up what to auto clear */
4537	if (adapter->msix_mem) {
4538		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4539		    adapter->eims_mask);
4540		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4541		    adapter->eims_mask);
4542		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4543		    adapter->eims_mask);
4544		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4545		    E1000_IMS_LSC);
4546	} else {
4547		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4548		    IMS_ENABLE_MASK);
4549	}
4550	E1000_WRITE_FLUSH(&adapter->hw);
4551
4552	return;
4553}
4554
4555static void
4556igb_disable_intr(struct adapter *adapter)
4557{
4558	if (adapter->msix_mem) {
4559		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4560		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4561	}
4562	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4563	E1000_WRITE_FLUSH(&adapter->hw);
4564	return;
4565}
4566
4567/*
4568 * Bit of a misnomer, what this really means is
4569 * to enable OS management of the system... aka
4570 * to disable special hardware management features
4571 */
4572static void
4573igb_init_manageability(struct adapter *adapter)
4574{
4575	if (adapter->has_manage) {
4576		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4577		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4578
4579		/* disable hardware interception of ARP */
4580		manc &= ~(E1000_MANC_ARP_EN);
4581
4582                /* enable receiving management packets to the host */
4583		manc |= E1000_MANC_EN_MNG2HOST;
4584		manc2h |= 1 << 5;  /* Mng Port 623 */
4585		manc2h |= 1 << 6;  /* Mng Port 664 */
4586		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4587		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4588	}
4589}
4590
4591/*
4592 * Give control back to hardware management
4593 * controller if there is one.
4594 */
4595static void
4596igb_release_manageability(struct adapter *adapter)
4597{
4598	if (adapter->has_manage) {
4599		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4600
4601		/* re-enable hardware interception of ARP */
4602		manc |= E1000_MANC_ARP_EN;
4603		manc &= ~E1000_MANC_EN_MNG2HOST;
4604
4605		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4606	}
4607}
4608
4609/*
4610 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4611 * For ASF and Pass Through versions of f/w this means that
4612 * the driver is loaded.
4613 *
4614 */
4615static void
4616igb_get_hw_control(struct adapter *adapter)
4617{
4618	u32 ctrl_ext;
4619
4620	/* Let firmware know the driver has taken over */
4621	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4622	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4623	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4624}
4625
4626/*
4627 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4628 * For ASF and Pass Through versions of f/w this means that the
4629 * driver is no longer loaded.
4630 *
4631 */
4632static void
4633igb_release_hw_control(struct adapter *adapter)
4634{
4635	u32 ctrl_ext;
4636
4637	/* Let firmware taken over control of h/w */
4638	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4639	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4640	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4641}
4642
4643static int
4644igb_is_valid_ether_addr(uint8_t *addr)
4645{
4646	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4647
4648	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4649		return (FALSE);
4650	}
4651
4652	return (TRUE);
4653}
4654
4655
4656/*
4657 * Enable PCI Wake On Lan capability
4658 */
4659static void
4660igb_enable_wakeup(device_t dev)
4661{
4662	u16     cap, status;
4663	u8      id;
4664
4665	/* First find the capabilities pointer*/
4666	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4667	/* Read the PM Capabilities */
4668	id = pci_read_config(dev, cap, 1);
4669	if (id != PCIY_PMG)     /* Something wrong */
4670		return;
4671	/* OK, we have the power capabilities, so
4672	   now get the status register */
4673	cap += PCIR_POWER_STATUS;
4674	status = pci_read_config(dev, cap, 2);
4675	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4676	pci_write_config(dev, cap, status, 2);
4677	return;
4678}
4679
4680static void
4681igb_led_func(void *arg, int onoff)
4682{
4683	struct adapter	*adapter = arg;
4684
4685	IGB_CORE_LOCK(adapter);
4686	if (onoff) {
4687		e1000_setup_led(&adapter->hw);
4688		e1000_led_on(&adapter->hw);
4689	} else {
4690		e1000_led_off(&adapter->hw);
4691		e1000_cleanup_led(&adapter->hw);
4692	}
4693	IGB_CORE_UNLOCK(adapter);
4694}
4695
4696/**********************************************************************
4697 *
4698 *  Update the board statistics counters.
4699 *
4700 **********************************************************************/
4701static void
4702igb_update_stats_counters(struct adapter *adapter)
4703{
4704	struct ifnet   *ifp;
4705
4706	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
4707	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4708		adapter->stats.symerrs +=
4709		    E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4710		adapter->stats.sec +=
4711		    E1000_READ_REG(&adapter->hw, E1000_SEC);
4712	}
4713	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4714	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4715	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4716	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4717
4718	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4719	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4720	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4721	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4722	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4723	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4724	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4725	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4726	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4727	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4728	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4729	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4730	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4731	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4732	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4733	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4734	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4735	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4736	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4737	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4738
4739	/* For the 64-bit byte counters the low dword must be read first. */
4740	/* Both registers clear on the read of the high dword */
4741
4742	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4743	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4744
4745	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4746	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4747	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4748	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4749	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4750
4751	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4752	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4753
4754	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4755	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4756	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4757	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4758	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4759	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4760	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4761	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4762	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4763	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4764
4765	adapter->stats.algnerrc +=
4766		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4767	adapter->stats.rxerrc +=
4768		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4769	adapter->stats.tncrs +=
4770		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4771	adapter->stats.cexterr +=
4772		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4773	adapter->stats.tsctc +=
4774		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4775	adapter->stats.tsctfc +=
4776		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4777	ifp = adapter->ifp;
4778
4779	ifp->if_collisions = adapter->stats.colc;
4780
4781	/* Rx Errors */
4782	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4783	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4784	    adapter->stats.ruc + adapter->stats.roc +
4785	    adapter->stats.mpc + adapter->stats.cexterr;
4786
4787	/* Tx Errors */
4788	ifp->if_oerrors = adapter->stats.ecol +
4789	    adapter->stats.latecol + adapter->watchdog_events;
4790}
4791
4792
4793/**********************************************************************
4794 *
4795 *  This routine is called only when igb_display_debug_stats is enabled.
4796 *  This routine provides a way to take a look at important statistics
4797 *  maintained by the driver and hardware.
4798 *
4799 **********************************************************************/
4800static void
4801igb_print_debug_info(struct adapter *adapter)
4802{
4803	device_t dev = adapter->dev;
4804	struct igb_queue *que = adapter->queues;
4805	struct rx_ring *rxr = adapter->rx_rings;
4806	struct tx_ring *txr = adapter->tx_rings;
4807	uint8_t *hw_addr = adapter->hw.hw_addr;
4808
4809	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4810	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4811	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4812	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4813
4814#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4815	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4816	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4817	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4818#endif
4819
4820	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4821	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4822	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4823	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4824	    adapter->hw.fc.high_water,
4825	    adapter->hw.fc.low_water);
4826
4827	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4828		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4829		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4830		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4831		device_printf(dev, "rdh = %d, rdt = %d\n",
4832		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4833		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4834		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4835		    txr->me, (long long)txr->no_desc_avail);
4836		device_printf(dev, "TX(%d) Packets sent = %lld\n",
4837		    txr->me, (long long)txr->tx_packets);
4838		device_printf(dev, "RX(%d) Packets received = %lld  ",
4839		    rxr->me, (long long)rxr->rx_packets);
4840	}
4841
4842	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4843		struct lro_ctrl *lro = &rxr->lro;
4844		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4845		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4846		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4847		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4848		    (long long)rxr->rx_packets);
4849		device_printf(dev, " Split Packets = %lld ",
4850		    (long long)rxr->rx_split_packets);
4851		device_printf(dev, " Byte count = %lld\n",
4852		    (long long)rxr->rx_bytes);
4853		device_printf(dev,"RX(%d) LRO Queued= %d  ",
4854		    i, lro->lro_queued);
4855		device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4856	}
4857
4858	for (int i = 0; i < adapter->num_queues; i++, que++)
4859		device_printf(dev,"QUE(%d) IRQs = %llx\n",
4860		    i, (long long)que->irqs);
4861
4862	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4863	device_printf(dev, "Mbuf defrag failed = %ld\n",
4864	    adapter->mbuf_defrag_failed);
4865	device_printf(dev, "Std mbuf header failed = %ld\n",
4866	    adapter->mbuf_header_failed);
4867	device_printf(dev, "Std mbuf packet failed = %ld\n",
4868	    adapter->mbuf_packet_failed);
4869	device_printf(dev, "Driver dropped packets = %ld\n",
4870	    adapter->dropped_pkts);
4871	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4872		adapter->no_tx_dma_setup);
4873}
4874
4875static void
4876igb_print_hw_stats(struct adapter *adapter)
4877{
4878	device_t dev = adapter->dev;
4879
4880	device_printf(dev, "Excessive collisions = %lld\n",
4881	    (long long)adapter->stats.ecol);
4882#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4883	device_printf(dev, "Symbol errors = %lld\n",
4884	    (long long)adapter->stats.symerrs);
4885#endif
4886	device_printf(dev, "Sequence errors = %lld\n",
4887	    (long long)adapter->stats.sec);
4888	device_printf(dev, "Defer count = %lld\n",
4889	    (long long)adapter->stats.dc);
4890	device_printf(dev, "Missed Packets = %lld\n",
4891	    (long long)adapter->stats.mpc);
4892	device_printf(dev, "Receive No Buffers = %lld\n",
4893	    (long long)adapter->stats.rnbc);
4894	/* RLEC is inaccurate on some hardware, calculate our own. */
4895	device_printf(dev, "Receive Length Errors = %lld\n",
4896	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4897	device_printf(dev, "Receive errors = %lld\n",
4898	    (long long)adapter->stats.rxerrc);
4899	device_printf(dev, "Crc errors = %lld\n",
4900	    (long long)adapter->stats.crcerrs);
4901	device_printf(dev, "Alignment errors = %lld\n",
4902	    (long long)adapter->stats.algnerrc);
4903	/* On 82575 these are collision counts */
4904	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4905	    (long long)adapter->stats.cexterr);
4906	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4907	device_printf(dev, "watchdog timeouts = %ld\n",
4908	    adapter->watchdog_events);
4909	device_printf(dev, "XON Rcvd = %lld\n",
4910	    (long long)adapter->stats.xonrxc);
4911	device_printf(dev, "XON Xmtd = %lld\n",
4912	    (long long)adapter->stats.xontxc);
4913	device_printf(dev, "XOFF Rcvd = %lld\n",
4914	    (long long)adapter->stats.xoffrxc);
4915	device_printf(dev, "XOFF Xmtd = %lld\n",
4916	    (long long)adapter->stats.xofftxc);
4917	device_printf(dev, "Good Packets Rcvd = %lld\n",
4918	    (long long)adapter->stats.gprc);
4919	device_printf(dev, "Good Packets Xmtd = %lld\n",
4920	    (long long)adapter->stats.gptc);
4921	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4922	    (long long)adapter->stats.tsctc);
4923	device_printf(dev, "TSO Contexts Failed = %lld\n",
4924	    (long long)adapter->stats.tsctfc);
4925}
4926
4927/**********************************************************************
4928 *
4929 *  This routine provides a way to dump out the adapter eeprom,
4930 *  often a useful debug/service tool. This only dumps the first
4931 *  32 words, stuff that matters is in that extent.
4932 *
4933 **********************************************************************/
4934static void
4935igb_print_nvm_info(struct adapter *adapter)
4936{
4937	u16	eeprom_data;
4938	int	i, j, row = 0;
4939
4940	/* Its a bit crude, but it gets the job done */
4941	printf("\nInterface EEPROM Dump:\n");
4942	printf("Offset\n0x0000  ");
4943	for (i = 0, j = 0; i < 32; i++, j++) {
4944		if (j == 8) { /* Make the offset block */
4945			j = 0; ++row;
4946			printf("\n0x00%x0  ",row);
4947		}
4948		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4949		printf("%04x ", eeprom_data);
4950	}
4951	printf("\n");
4952}
4953
4954static int
4955igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4956{
4957	struct adapter *adapter;
4958	int error;
4959	int result;
4960
4961	result = -1;
4962	error = sysctl_handle_int(oidp, &result, 0, req);
4963
4964	if (error || !req->newptr)
4965		return (error);
4966
4967	if (result == 1) {
4968		adapter = (struct adapter *)arg1;
4969		igb_print_debug_info(adapter);
4970	}
4971	/*
4972	 * This value will cause a hex dump of the
4973	 * first 32 16-bit words of the EEPROM to
4974	 * the screen.
4975	 */
4976	if (result == 2) {
4977		adapter = (struct adapter *)arg1;
4978		igb_print_nvm_info(adapter);
4979        }
4980
4981	return (error);
4982}
4983
4984
4985static int
4986igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4987{
4988	struct adapter *adapter;
4989	int error;
4990	int result;
4991
4992	result = -1;
4993	error = sysctl_handle_int(oidp, &result, 0, req);
4994
4995	if (error || !req->newptr)
4996		return (error);
4997
4998	if (result == 1) {
4999		adapter = (struct adapter *)arg1;
5000		igb_print_hw_stats(adapter);
5001	}
5002
5003	return (error);
5004}
5005
5006static void
5007igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5008	const char *description, int *limit, int value)
5009{
5010	*limit = value;
5011	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5012	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5013	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5014}
5015