if_igb.c revision 200268
1/******************************************************************************
2
3  Copyright (c) 2001-2009, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 200268 2009-12-08 18:54:37Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <sys/pcpu.h>
60#include <sys/smp.h>
61#include <machine/smp.h>
62#include <machine/bus.h>
63#include <machine/resource.h>
64
65#ifdef IGB_IEEE1588
66#include <sys/ieee1588.h>
67#endif
68
69#include <net/bpf.h>
70#include <net/ethernet.h>
71#include <net/if.h>
72#include <net/if_arp.h>
73#include <net/if_dl.h>
74#include <net/if_media.h>
75
76#include <net/if_types.h>
77#include <net/if_vlan_var.h>
78
79#include <netinet/in_systm.h>
80#include <netinet/in.h>
81#include <netinet/if_ether.h>
82#include <netinet/ip.h>
83#include <netinet/ip6.h>
84#include <netinet/tcp.h>
85#include <netinet/tcp_lro.h>
86#include <netinet/udp.h>
87
88#include <machine/in_cksum.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 1.8.4";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	/* required last entry */
140	{ 0, 0, 0, 0, 0}
141};
142
143/*********************************************************************
144 *  Table of branding strings for all supported NICs.
145 *********************************************************************/
146
147static char *igb_strings[] = {
148	"Intel(R) PRO/1000 Network Connection"
149};
150
151/*********************************************************************
152 *  Function prototypes
153 *********************************************************************/
154static int	igb_probe(device_t);
155static int	igb_attach(device_t);
156static int	igb_detach(device_t);
157static int	igb_shutdown(device_t);
158static int	igb_suspend(device_t);
159static int	igb_resume(device_t);
160static void	igb_start(struct ifnet *);
161static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
162#if __FreeBSD_version >= 800000
163static int	igb_mq_start(struct ifnet *, struct mbuf *);
164static int	igb_mq_start_locked(struct ifnet *,
165		    struct tx_ring *, struct mbuf *);
166static void	igb_qflush(struct ifnet *);
167#endif
168static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
169static void	igb_init(void *);
170static void	igb_init_locked(struct adapter *);
171static void	igb_stop(void *);
172static void	igb_media_status(struct ifnet *, struct ifmediareq *);
173static int	igb_media_change(struct ifnet *);
174static void	igb_identify_hardware(struct adapter *);
175static int	igb_allocate_pci_resources(struct adapter *);
176static int	igb_allocate_msix(struct adapter *);
177static int	igb_allocate_legacy(struct adapter *);
178static int	igb_setup_msix(struct adapter *);
179static void	igb_free_pci_resources(struct adapter *);
180static void	igb_local_timer(void *);
181static void	igb_reset(struct adapter *);
182static void	igb_setup_interface(device_t, struct adapter *);
183static int	igb_allocate_queues(struct adapter *);
184static void	igb_configure_queues(struct adapter *);
185
186static int	igb_allocate_transmit_buffers(struct tx_ring *);
187static void	igb_setup_transmit_structures(struct adapter *);
188static void	igb_setup_transmit_ring(struct tx_ring *);
189static void	igb_initialize_transmit_units(struct adapter *);
190static void	igb_free_transmit_structures(struct adapter *);
191static void	igb_free_transmit_buffers(struct tx_ring *);
192
193static int	igb_allocate_receive_buffers(struct rx_ring *);
194static int	igb_setup_receive_structures(struct adapter *);
195static int	igb_setup_receive_ring(struct rx_ring *);
196static void	igb_initialize_receive_units(struct adapter *);
197static void	igb_free_receive_structures(struct adapter *);
198static void	igb_free_receive_buffers(struct rx_ring *);
199
200static void	igb_enable_intr(struct adapter *);
201static void	igb_disable_intr(struct adapter *);
202static void	igb_update_stats_counters(struct adapter *);
203static bool	igb_txeof(struct tx_ring *);
204static bool	igb_rxeof(struct rx_ring *, int);
205static void	igb_rx_checksum(u32, struct mbuf *, bool);
206static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
207static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
208static void	igb_set_promisc(struct adapter *);
209static void	igb_disable_promisc(struct adapter *);
210static void	igb_set_multi(struct adapter *);
211static void	igb_print_hw_stats(struct adapter *);
212static void	igb_update_link_status(struct adapter *);
213static int	igb_get_buf(struct rx_ring *, int, int);
214
215static void	igb_register_vlan(void *, struct ifnet *, u16);
216static void	igb_unregister_vlan(void *, struct ifnet *, u16);
217static void	igb_setup_vlan_hw_support(struct adapter *);
218
219static int	igb_xmit(struct tx_ring *, struct mbuf **);
220static int	igb_dma_malloc(struct adapter *, bus_size_t,
221		    struct igb_dma_alloc *, int);
222static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
223static void	igb_print_debug_info(struct adapter *);
224static void	igb_print_nvm_info(struct adapter *);
225static int 	igb_is_valid_ether_addr(u8 *);
226static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
227static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
228/* Management and WOL Support */
229static void	igb_init_manageability(struct adapter *);
230static void	igb_release_manageability(struct adapter *);
231static void     igb_get_hw_control(struct adapter *);
232static void     igb_release_hw_control(struct adapter *);
233static void     igb_enable_wakeup(device_t);
234
235static int	igb_irq_fast(void *);
236static void	igb_add_rx_process_limit(struct adapter *, const char *,
237		    const char *, int *, int);
238static void	igb_handle_rxtx(void *context, int pending);
239static void	igb_handle_tx(void *context, int pending);
240static void	igb_handle_rx(void *context, int pending);
241
242/* These are MSIX only irq handlers */
243static void	igb_msix_rx(void *);
244static void	igb_msix_tx(void *);
245static void	igb_msix_link(void *);
246
247/* Adaptive Interrupt Moderation */
248static void	igb_update_aim(struct rx_ring *);
249
250/*********************************************************************
251 *  FreeBSD Device Interface Entry Points
252 *********************************************************************/
253
254static device_method_t igb_methods[] = {
255	/* Device interface */
256	DEVMETHOD(device_probe, igb_probe),
257	DEVMETHOD(device_attach, igb_attach),
258	DEVMETHOD(device_detach, igb_detach),
259	DEVMETHOD(device_shutdown, igb_shutdown),
260	DEVMETHOD(device_suspend, igb_suspend),
261	DEVMETHOD(device_resume, igb_resume),
262	{0, 0}
263};
264
265static driver_t igb_driver = {
266	"igb", igb_methods, sizeof(struct adapter),
267};
268
269static devclass_t igb_devclass;
270DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
271MODULE_DEPEND(igb, pci, 1, 1, 1);
272MODULE_DEPEND(igb, ether, 1, 1, 1);
273
274/*********************************************************************
275 *  Tunable default values.
276 *********************************************************************/
277
278/* Descriptor defaults */
279static int igb_rxd = IGB_DEFAULT_RXD;
280static int igb_txd = IGB_DEFAULT_TXD;
281TUNABLE_INT("hw.igb.rxd", &igb_rxd);
282TUNABLE_INT("hw.igb.txd", &igb_txd);
283
284/*
285** These parameters are used in Adaptive
286** Interrupt Moderation. The value is set
287** into EITR and controls the interrupt
288** frequency. A variable static scheme can
289** be created by changing the assigned value
290** of igb_ave_latency to the desired value,
291** and then set igb_enable_aim to FALSE.
292** This will result in all EITR registers
293** getting set to that value statically.
294*/
295static int igb_enable_aim = TRUE;
296TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297static int igb_low_latency = IGB_LOW_LATENCY;
298TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
299static int igb_ave_latency = IGB_AVE_LATENCY;
300TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
301static int igb_bulk_latency = IGB_BULK_LATENCY;
302TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
303
304/*
305 * MSIX should be the default for best performance,
306 * but this allows it to be forced off for testing.
307 */
308static int igb_enable_msix = 1;
309TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
310
311/*
312 * Header split has seemed to be beneficial in
313 * all circumstances tested, so its on by default
314 * however this variable will allow it to be disabled
315 * for some debug purposes.
316 */
317static bool igb_header_split = TRUE;
318TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
319
320/*
321** This will autoconfigure based on the number
322** of CPUs if left at 0. Only a matched pair of
323** TX and RX rings are allowed.
324*/
325static int igb_num_queues = 0;
326TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
327
328/* How many packets rxeof tries to clean at a time */
329static int igb_rx_process_limit = 100;
330TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
331
332/* Flow control setting - default to FULL */
333static int igb_fc_setting = e1000_fc_full;
334TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
335
336/*
337** Shadow VFTA table, this is needed because
338** the real filter table gets cleared during
339** a soft reset and the driver needs to be able
340** to repopulate it.
341*/
342static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
343
344
345/*********************************************************************
346 *  Device identification routine
347 *
348 *  igb_probe determines if the driver should be loaded on
349 *  adapter based on PCI vendor/device id of the adapter.
350 *
351 *  return BUS_PROBE_DEFAULT on success, positive on failure
352 *********************************************************************/
353
354static int
355igb_probe(device_t dev)
356{
357	char		adapter_name[60];
358	uint16_t	pci_vendor_id = 0;
359	uint16_t	pci_device_id = 0;
360	uint16_t	pci_subvendor_id = 0;
361	uint16_t	pci_subdevice_id = 0;
362	igb_vendor_info_t *ent;
363
364	INIT_DEBUGOUT("igb_probe: begin");
365
366	pci_vendor_id = pci_get_vendor(dev);
367	if (pci_vendor_id != IGB_VENDOR_ID)
368		return (ENXIO);
369
370	pci_device_id = pci_get_device(dev);
371	pci_subvendor_id = pci_get_subvendor(dev);
372	pci_subdevice_id = pci_get_subdevice(dev);
373
374	ent = igb_vendor_info_array;
375	while (ent->vendor_id != 0) {
376		if ((pci_vendor_id == ent->vendor_id) &&
377		    (pci_device_id == ent->device_id) &&
378
379		    ((pci_subvendor_id == ent->subvendor_id) ||
380		    (ent->subvendor_id == PCI_ANY_ID)) &&
381
382		    ((pci_subdevice_id == ent->subdevice_id) ||
383		    (ent->subdevice_id == PCI_ANY_ID))) {
384			sprintf(adapter_name, "%s %s",
385				igb_strings[ent->index],
386				igb_driver_version);
387			device_set_desc_copy(dev, adapter_name);
388			return (BUS_PROBE_DEFAULT);
389		}
390		ent++;
391	}
392
393	return (ENXIO);
394}
395
396/*********************************************************************
397 *  Device initialization routine
398 *
399 *  The attach entry point is called when the driver is being loaded.
400 *  This routine identifies the type of hardware, allocates all resources
401 *  and initializes the hardware.
402 *
403 *  return 0 on success, positive on failure
404 *********************************************************************/
405
406static int
407igb_attach(device_t dev)
408{
409	struct adapter	*adapter;
410	int		error = 0;
411	u16		eeprom_data;
412
413	INIT_DEBUGOUT("igb_attach: begin");
414
415	adapter = device_get_softc(dev);
416	adapter->dev = adapter->osdep.dev = dev;
417	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
418
419	/* SYSCTL stuff */
420	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
421	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
422	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
423	    igb_sysctl_debug_info, "I", "Debug Information");
424
425	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
426	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
427	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
428	    igb_sysctl_stats, "I", "Statistics");
429
430	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
431	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
432	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
433	    &igb_fc_setting, 0, "Flow Control");
434
435	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
436	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
437	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
438	    &igb_enable_aim, 1, "Interrupt Moderation");
439
440	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
441	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
442	    OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
443	    &igb_low_latency, 1, "Low Latency");
444
445	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
446	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447	    OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
448	    &igb_ave_latency, 1, "Average Latency");
449
450	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
451	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452	    OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
453	    &igb_bulk_latency, 1, "Bulk Latency");
454
455	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457	/* Determine hardware and mac info */
458	igb_identify_hardware(adapter);
459
460	/* Setup PCI resources */
461	if (igb_allocate_pci_resources(adapter)) {
462		device_printf(dev, "Allocation of PCI resources failed\n");
463		error = ENXIO;
464		goto err_pci;
465	}
466
467	/* Do Shared Code initialization */
468	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
469		device_printf(dev, "Setup of Shared code failed\n");
470		error = ENXIO;
471		goto err_pci;
472	}
473
474	e1000_get_bus_info(&adapter->hw);
475
476	/* Sysctls for limiting the amount of work done in the taskqueue */
477	igb_add_rx_process_limit(adapter, "rx_processing_limit",
478	    "max number of rx packets to process", &adapter->rx_process_limit,
479	    igb_rx_process_limit);
480
481	/*
482	 * Validate number of transmit and receive descriptors. It
483	 * must not exceed hardware maximum, and must be multiple
484	 * of E1000_DBA_ALIGN.
485	 */
486	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
487	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
488		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
489		    IGB_DEFAULT_TXD, igb_txd);
490		adapter->num_tx_desc = IGB_DEFAULT_TXD;
491	} else
492		adapter->num_tx_desc = igb_txd;
493	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
494	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
495		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
496		    IGB_DEFAULT_RXD, igb_rxd);
497		adapter->num_rx_desc = IGB_DEFAULT_RXD;
498	} else
499		adapter->num_rx_desc = igb_rxd;
500
501	adapter->hw.mac.autoneg = DO_AUTO_NEG;
502	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
503	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
504
505	/* Copper options */
506	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
507		adapter->hw.phy.mdix = AUTO_ALL_MODES;
508		adapter->hw.phy.disable_polarity_correction = FALSE;
509		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
510	}
511
512	/*
513	 * Set the frame limits assuming
514	 * standard ethernet sized frames.
515	 */
516	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
517	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
518
519	/*
520	** Allocate and Setup Queues
521	*/
522	if (igb_allocate_queues(adapter)) {
523		error = ENOMEM;
524		goto err_pci;
525	}
526
527	/*
528	** Start from a known state, this is
529	** important in reading the nvm and
530	** mac from that.
531	*/
532	e1000_reset_hw(&adapter->hw);
533
534	/* Make sure we have a good EEPROM before we read from it */
535	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
536		/*
537		** Some PCI-E parts fail the first check due to
538		** the link being in sleep state, call it again,
539		** if it fails a second time its a real issue.
540		*/
541		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
542			device_printf(dev,
543			    "The EEPROM Checksum Is Not Valid\n");
544			error = EIO;
545			goto err_late;
546		}
547	}
548
549	/*
550	** Copy the permanent MAC address out of the EEPROM
551	*/
552	if (e1000_read_mac_addr(&adapter->hw) < 0) {
553		device_printf(dev, "EEPROM read error while reading MAC"
554		    " address\n");
555		error = EIO;
556		goto err_late;
557	}
558	/* Check its sanity */
559	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
560		device_printf(dev, "Invalid MAC address\n");
561		error = EIO;
562		goto err_late;
563	}
564
565	/*
566	** Configure Interrupts
567	*/
568	if ((adapter->msix > 1) && (igb_enable_msix))
569		error = igb_allocate_msix(adapter);
570	else /* MSI or Legacy */
571		error = igb_allocate_legacy(adapter);
572	if (error)
573		goto err_late;
574
575	/* Setup OS specific network interface */
576	igb_setup_interface(dev, adapter);
577
578	/* Now get a good starting state */
579	igb_reset(adapter);
580
581#ifdef IGB_IEEE1588
582        /*
583        ** Setup the timer: IEEE 1588 support
584        */
585        adapter->cycles.read = igb_read_clock;
586        adapter->cycles.mask = (u64)-1;
587        adapter->cycles.mult = 1;
588        adapter->cycles.shift = IGB_TSYNC_SHIFT;
589        E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
590            IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
591        E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
592        E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
593
594	// JFV - this is not complete yet
595#endif
596
597	/* Initialize statistics */
598	igb_update_stats_counters(adapter);
599
600	adapter->hw.mac.get_link_status = 1;
601	igb_update_link_status(adapter);
602
603	/* Indicate SOL/IDER usage */
604	if (e1000_check_reset_block(&adapter->hw))
605		device_printf(dev,
606		    "PHY reset is blocked due to SOL/IDER session.\n");
607
608	/* Determine if we have to control management hardware */
609	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
610
611	/*
612	 * Setup Wake-on-Lan
613	 */
614	/* APME bit in EEPROM is mapped to WUC.APME */
615	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
616	if (eeprom_data)
617		adapter->wol = E1000_WUFC_MAG;
618
619	/* Register for VLAN events */
620	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
621	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
622	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
623	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
624
625	/* Tell the stack that the interface is not active */
626	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
627
628	INIT_DEBUGOUT("igb_attach: end");
629
630	return (0);
631
632err_late:
633	igb_free_transmit_structures(adapter);
634	igb_free_receive_structures(adapter);
635	igb_release_hw_control(adapter);
636err_pci:
637	igb_free_pci_resources(adapter);
638	IGB_CORE_LOCK_DESTROY(adapter);
639
640	return (error);
641}
642
643/*********************************************************************
644 *  Device removal routine
645 *
646 *  The detach entry point is called when the driver is being removed.
647 *  This routine stops the adapter and deallocates all the resources
648 *  that were allocated for driver operation.
649 *
650 *  return 0 on success, positive on failure
651 *********************************************************************/
652
653static int
654igb_detach(device_t dev)
655{
656	struct adapter	*adapter = device_get_softc(dev);
657	struct ifnet	*ifp = adapter->ifp;
658
659	INIT_DEBUGOUT("igb_detach: begin");
660
661	/* Make sure VLANS are not using driver */
662	if (adapter->ifp->if_vlantrunk != NULL) {
663		device_printf(dev,"Vlan in use, detach first\n");
664		return (EBUSY);
665	}
666
667	IGB_CORE_LOCK(adapter);
668	adapter->in_detach = 1;
669	igb_stop(adapter);
670	IGB_CORE_UNLOCK(adapter);
671
672	e1000_phy_hw_reset(&adapter->hw);
673
674	/* Give control back to firmware */
675	igb_release_manageability(adapter);
676	igb_release_hw_control(adapter);
677
678	if (adapter->wol) {
679		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
680		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
681		igb_enable_wakeup(dev);
682	}
683
684	/* Unregister VLAN events */
685	if (adapter->vlan_attach != NULL)
686		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
687	if (adapter->vlan_detach != NULL)
688		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
689
690	ether_ifdetach(adapter->ifp);
691
692	callout_drain(&adapter->timer);
693
694	igb_free_pci_resources(adapter);
695	bus_generic_detach(dev);
696	if_free(ifp);
697
698	igb_free_transmit_structures(adapter);
699	igb_free_receive_structures(adapter);
700
701	IGB_CORE_LOCK_DESTROY(adapter);
702
703	return (0);
704}
705
706/*********************************************************************
707 *
708 *  Shutdown entry point
709 *
710 **********************************************************************/
711
712static int
713igb_shutdown(device_t dev)
714{
715	return igb_suspend(dev);
716}
717
718/*
719 * Suspend/resume device methods.
720 */
721static int
722igb_suspend(device_t dev)
723{
724	struct adapter *adapter = device_get_softc(dev);
725
726	IGB_CORE_LOCK(adapter);
727
728	igb_stop(adapter);
729
730        igb_release_manageability(adapter);
731	igb_release_hw_control(adapter);
732
733        if (adapter->wol) {
734                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
735                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
736                igb_enable_wakeup(dev);
737        }
738
739	IGB_CORE_UNLOCK(adapter);
740
741	return bus_generic_suspend(dev);
742}
743
744static int
745igb_resume(device_t dev)
746{
747	struct adapter *adapter = device_get_softc(dev);
748	struct ifnet *ifp = adapter->ifp;
749
750	IGB_CORE_LOCK(adapter);
751	igb_init_locked(adapter);
752	igb_init_manageability(adapter);
753
754	if ((ifp->if_flags & IFF_UP) &&
755	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
756		igb_start(ifp);
757
758	IGB_CORE_UNLOCK(adapter);
759
760	return bus_generic_resume(dev);
761}
762
763
764/*********************************************************************
765 *  Transmit entry point
766 *
767 *  igb_start is called by the stack to initiate a transmit.
768 *  The driver will remain in this routine as long as there are
769 *  packets to transmit and transmit resources are available.
770 *  In case resources are not available stack is notified and
771 *  the packet is requeued.
772 **********************************************************************/
773
774static void
775igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
776{
777	struct adapter	*adapter = ifp->if_softc;
778	struct mbuf	*m_head;
779
780	IGB_TX_LOCK_ASSERT(txr);
781
782	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
783	    IFF_DRV_RUNNING)
784		return;
785	if (!adapter->link_active)
786		return;
787
788	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
789
790		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
791		if (m_head == NULL)
792			break;
793		/*
794		 *  Encapsulation can modify our pointer, and or make it
795		 *  NULL on failure.  In that event, we can't requeue.
796		 */
797		if (igb_xmit(txr, &m_head)) {
798			if (m_head == NULL)
799				break;
800			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
801			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
802			break;
803		}
804
805		/* Send a copy of the frame to the BPF listener */
806		ETHER_BPF_MTAP(ifp, m_head);
807
808		/* Set watchdog on */
809		txr->watchdog_check = TRUE;
810	}
811}
812
813/*
814 * Legacy TX driver routine, called from the
815 * stack, always uses tx[0], and spins for it.
816 * Should not be used with multiqueue tx
817 */
818static void
819igb_start(struct ifnet *ifp)
820{
821	struct adapter	*adapter = ifp->if_softc;
822	struct tx_ring	*txr = adapter->tx_rings;
823
824	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
825		IGB_TX_LOCK(txr);
826		igb_start_locked(txr, ifp);
827		IGB_TX_UNLOCK(txr);
828	}
829	return;
830}
831
832#if __FreeBSD_version >= 800000
833/*
834** Multiqueue Transmit driver
835**
836*/
837static int
838igb_mq_start(struct ifnet *ifp, struct mbuf *m)
839{
840	struct adapter	*adapter = ifp->if_softc;
841	struct tx_ring	*txr;
842	int 		i = 0, err = 0;
843
844	/* Which queue to use */
845	if ((m->m_flags & M_FLOWID) != 0)
846		i = m->m_pkthdr.flowid % adapter->num_queues;
847	txr = &adapter->tx_rings[i];
848
849	if (IGB_TX_TRYLOCK(txr)) {
850		err = igb_mq_start_locked(ifp, txr, m);
851		IGB_TX_UNLOCK(txr);
852	} else
853		err = drbr_enqueue(ifp, txr->br, m);
854
855	return (err);
856}
857
858static int
859igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
860{
861	struct adapter  *adapter = txr->adapter;
862        struct mbuf     *next;
863        int             err = 0;
864
865	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
866		err = drbr_enqueue(ifp, txr->br, m);
867		return (err);
868	}
869
870	if (m == NULL) /* Called by tasklet */
871		goto process;
872
873	/* If nothing queued go right to xmit */
874	if (drbr_empty(ifp, txr->br)) {
875		if (igb_xmit(txr, &m)) {
876			if (m && (err = drbr_enqueue(ifp, txr->br, m)) != 0)
877                                return (err);
878		} else {
879			/* Success, update stats */
880			drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
881			/* Send a copy of the frame to the BPF listener */
882			ETHER_BPF_MTAP(ifp, m);
883			/* Set the watchdog */
884			txr->watchdog_check = TRUE;
885                }
886
887        } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
888		return (err);
889
890process:
891	if (drbr_empty(ifp, txr->br))
892		return (err);
893
894	/* Process the queue */
895	while (TRUE) {
896		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
897			break;
898		next = drbr_dequeue(ifp, txr->br);
899		if (next == NULL)
900			break;
901		if (igb_xmit(txr, &next))
902			break;
903		ETHER_BPF_MTAP(ifp, next);
904		/* Set the watchdog */
905		txr->watchdog_check = TRUE;
906	}
907
908	if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
909		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
910
911	return (err);
912}
913
914/*
915** Flush all ring buffers
916*/
917static void
918igb_qflush(struct ifnet *ifp)
919{
920	struct adapter	*adapter = ifp->if_softc;
921	struct tx_ring	*txr = adapter->tx_rings;
922	struct mbuf	*m;
923
924	for (int i = 0; i < adapter->num_queues; i++, txr++) {
925		IGB_TX_LOCK(txr);
926		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
927			m_freem(m);
928		IGB_TX_UNLOCK(txr);
929	}
930	if_qflush(ifp);
931}
932#endif /* __FreeBSD_version >= 800000 */
933
934/*********************************************************************
935 *  Ioctl entry point
936 *
937 *  igb_ioctl is called when the user wants to configure the
938 *  interface.
939 *
940 *  return 0 on success, positive on failure
941 **********************************************************************/
942
943static int
944igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
945{
946	struct adapter	*adapter = ifp->if_softc;
947	struct ifreq *ifr = (struct ifreq *)data;
948#ifdef INET
949	struct ifaddr *ifa = (struct ifaddr *)data;
950#endif
951	int error = 0;
952
953	if (adapter->in_detach)
954		return (error);
955
956	switch (command) {
957	case SIOCSIFADDR:
958#ifdef INET
959		if (ifa->ifa_addr->sa_family == AF_INET) {
960			/*
961			 * XXX
962			 * Since resetting hardware takes a very long time
963			 * and results in link renegotiation we only
964			 * initialize the hardware only when it is absolutely
965			 * required.
966			 */
967			ifp->if_flags |= IFF_UP;
968			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
969				IGB_CORE_LOCK(adapter);
970				igb_init_locked(adapter);
971				IGB_CORE_UNLOCK(adapter);
972			}
973			arp_ifinit(ifp, ifa);
974		} else
975#endif
976			error = ether_ioctl(ifp, command, data);
977		break;
978	case SIOCSIFMTU:
979	    {
980		int max_frame_size;
981
982		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
983
984		IGB_CORE_LOCK(adapter);
985		max_frame_size = 9234;
986		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
987		    ETHER_CRC_LEN) {
988			IGB_CORE_UNLOCK(adapter);
989			error = EINVAL;
990			break;
991		}
992
993		ifp->if_mtu = ifr->ifr_mtu;
994		adapter->max_frame_size =
995		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
996		igb_init_locked(adapter);
997		IGB_CORE_UNLOCK(adapter);
998		break;
999	    }
1000	case SIOCSIFFLAGS:
1001		IOCTL_DEBUGOUT("ioctl rcv'd:\
1002		    SIOCSIFFLAGS (Set Interface Flags)");
1003		IGB_CORE_LOCK(adapter);
1004		if (ifp->if_flags & IFF_UP) {
1005			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1006				if ((ifp->if_flags ^ adapter->if_flags) &
1007				    (IFF_PROMISC | IFF_ALLMULTI)) {
1008					igb_disable_promisc(adapter);
1009					igb_set_promisc(adapter);
1010				}
1011			} else
1012				igb_init_locked(adapter);
1013		} else
1014			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1015				igb_stop(adapter);
1016		adapter->if_flags = ifp->if_flags;
1017		IGB_CORE_UNLOCK(adapter);
1018		break;
1019	case SIOCADDMULTI:
1020	case SIOCDELMULTI:
1021		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1022		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1023			IGB_CORE_LOCK(adapter);
1024			igb_disable_intr(adapter);
1025			igb_set_multi(adapter);
1026				igb_enable_intr(adapter);
1027			IGB_CORE_UNLOCK(adapter);
1028		}
1029		break;
1030	case SIOCSIFMEDIA:
1031		/* Check SOL/IDER usage */
1032		IGB_CORE_LOCK(adapter);
1033		if (e1000_check_reset_block(&adapter->hw)) {
1034			IGB_CORE_UNLOCK(adapter);
1035			device_printf(adapter->dev, "Media change is"
1036			    " blocked due to SOL/IDER session.\n");
1037			break;
1038		}
1039		IGB_CORE_UNLOCK(adapter);
1040	case SIOCGIFMEDIA:
1041		IOCTL_DEBUGOUT("ioctl rcv'd: \
1042		    SIOCxIFMEDIA (Get/Set Interface Media)");
1043		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1044		break;
1045	case SIOCSIFCAP:
1046	    {
1047		int mask, reinit;
1048
1049		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1050		reinit = 0;
1051		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1052		if (mask & IFCAP_HWCSUM) {
1053			ifp->if_capenable ^= IFCAP_HWCSUM;
1054			reinit = 1;
1055		}
1056		if (mask & IFCAP_TSO4) {
1057			ifp->if_capenable ^= IFCAP_TSO4;
1058			reinit = 1;
1059		}
1060		if (mask & IFCAP_VLAN_HWTAGGING) {
1061			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1062			reinit = 1;
1063		}
1064		if ((mask & IFCAP_LRO) && (igb_header_split)) {
1065			ifp->if_capenable ^= IFCAP_LRO;
1066			reinit = 1;
1067		}
1068		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1069			igb_init(adapter);
1070		VLAN_CAPABILITIES(ifp);
1071		break;
1072	    }
1073
1074#ifdef IGB_IEEE1588
1075	/*
1076	** IOCTL support for Precision Time (IEEE 1588) Support
1077	*/
1078	case SIOCSHWTSTAMP:
1079		error = igb_hwtstamp_ioctl(adapter, ifp);
1080		break;
1081#endif
1082
1083	default:
1084		error = ether_ioctl(ifp, command, data);
1085		break;
1086	}
1087
1088	return (error);
1089}
1090
1091
1092/*********************************************************************
1093 *  Init entry point
1094 *
1095 *  This routine is used in two ways. It is used by the stack as
1096 *  init entry point in network interface structure. It is also used
1097 *  by the driver as a hw/sw initialization routine to get to a
1098 *  consistent state.
1099 *
1100 *  return 0 on success, positive on failure
1101 **********************************************************************/
1102
1103static void
1104igb_init_locked(struct adapter *adapter)
1105{
1106	struct ifnet	*ifp = adapter->ifp;
1107	device_t	dev = adapter->dev;
1108
1109	INIT_DEBUGOUT("igb_init: begin");
1110
1111	IGB_CORE_LOCK_ASSERT(adapter);
1112
1113	igb_disable_intr(adapter);
1114	callout_stop(&adapter->timer);
1115
1116	/* Get the latest mac address, User can use a LAA */
1117        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1118              ETHER_ADDR_LEN);
1119
1120	/* Put the address into the Receive Address Array */
1121	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1122
1123	igb_reset(adapter);
1124	igb_update_link_status(adapter);
1125
1126	/* Set hardware offload abilities */
1127	ifp->if_hwassist = 0;
1128	if (ifp->if_capenable & IFCAP_TXCSUM) {
1129		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1130#if __FreeBSD_version >= 800000
1131		if (adapter->hw.mac.type == e1000_82576)
1132			ifp->if_hwassist |= CSUM_SCTP;
1133#endif
1134	}
1135
1136	if (ifp->if_capenable & IFCAP_TSO4)
1137		ifp->if_hwassist |= CSUM_TSO;
1138
1139	/* Configure for OS presence */
1140	igb_init_manageability(adapter);
1141
1142	/* Prepare transmit descriptors and buffers */
1143	igb_setup_transmit_structures(adapter);
1144	igb_initialize_transmit_units(adapter);
1145
1146	/* Setup Multicast table */
1147	igb_set_multi(adapter);
1148
1149	/*
1150	** Figure out the desired mbuf pool
1151	** for doing jumbo/packetsplit
1152	*/
1153	if (ifp->if_mtu > ETHERMTU)
1154		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1155	else
1156		adapter->rx_mbuf_sz = MCLBYTES;
1157
1158	/* Prepare receive descriptors and buffers */
1159	if (igb_setup_receive_structures(adapter)) {
1160		device_printf(dev, "Could not setup receive structures\n");
1161		return;
1162	}
1163	igb_initialize_receive_units(adapter);
1164
1165	/* Don't lose promiscuous settings */
1166	igb_set_promisc(adapter);
1167
1168	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1169	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1170
1171	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1172	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1173
1174	if (adapter->msix > 1) /* Set up queue routing */
1175		igb_configure_queues(adapter);
1176
1177	/* Set up VLAN tag offload and filter */
1178	igb_setup_vlan_hw_support(adapter);
1179
1180	/* this clears any pending interrupts */
1181	E1000_READ_REG(&adapter->hw, E1000_ICR);
1182	igb_enable_intr(adapter);
1183	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1184
1185	/* Don't reset the phy next time init gets called */
1186	adapter->hw.phy.reset_disable = TRUE;
1187}
1188
1189static void
1190igb_init(void *arg)
1191{
1192	struct adapter *adapter = arg;
1193
1194	IGB_CORE_LOCK(adapter);
1195	igb_init_locked(adapter);
1196	IGB_CORE_UNLOCK(adapter);
1197}
1198
1199
1200static void
1201igb_handle_rxtx(void *context, int pending)
1202{
1203	struct adapter	*adapter = context;
1204	struct tx_ring	*txr = adapter->tx_rings;
1205	struct rx_ring	*rxr = adapter->rx_rings;
1206	struct ifnet	*ifp;
1207
1208	ifp = adapter->ifp;
1209
1210	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1211		if (igb_rxeof(rxr, adapter->rx_process_limit))
1212			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1213		IGB_TX_LOCK(txr);
1214		igb_txeof(txr);
1215
1216#if __FreeBSD_version >= 800000
1217		if (!drbr_empty(ifp, txr->br))
1218			igb_mq_start_locked(ifp, txr, NULL);
1219#else
1220		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1221			igb_start_locked(txr, ifp);
1222#endif
1223		IGB_TX_UNLOCK(txr);
1224	}
1225
1226	igb_enable_intr(adapter);
1227}
1228
1229static void
1230igb_handle_rx(void *context, int pending)
1231{
1232	struct rx_ring  *rxr = context;
1233	struct adapter  *adapter = rxr->adapter;
1234	u32		loop = IGB_MAX_LOOP;
1235	bool		more;
1236
1237	do {
1238		more = igb_rxeof(rxr, -1);
1239	} while (loop-- && more);
1240
1241	/* Reenable this interrupt */
1242	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1243}
1244
1245static void
1246igb_handle_tx(void *context, int pending)
1247{
1248	struct tx_ring  *txr = context;
1249	struct adapter  *adapter = txr->adapter;
1250	struct ifnet	*ifp = adapter->ifp;
1251	u32		loop = IGB_MAX_LOOP;
1252	bool		more;
1253
1254	IGB_TX_LOCK(txr);
1255	do {
1256		more = igb_txeof(txr);
1257	} while (loop-- && more);
1258#if __FreeBSD_version >= 800000
1259	if (!drbr_empty(ifp, txr->br))
1260		igb_mq_start_locked(ifp, txr, NULL);
1261#else
1262	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1263		igb_start_locked(txr, ifp);
1264#endif
1265	IGB_TX_UNLOCK(txr);
1266	/* Reenable this interrupt */
1267	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1268}
1269
1270
1271/*********************************************************************
1272 *
1273 *  MSI/Legacy Deferred
1274 *  Interrupt Service routine
1275 *
1276 *********************************************************************/
1277static int
1278igb_irq_fast(void *arg)
1279{
1280	struct adapter	*adapter = arg;
1281	uint32_t	reg_icr;
1282
1283
1284	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1285
1286	/* Hot eject?  */
1287	if (reg_icr == 0xffffffff)
1288		return FILTER_STRAY;
1289
1290	/* Definitely not our interrupt.  */
1291	if (reg_icr == 0x0)
1292		return FILTER_STRAY;
1293
1294	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1295		return FILTER_STRAY;
1296
1297	/*
1298	 * Mask interrupts until the taskqueue is finished running.  This is
1299	 * cheap, just assume that it is needed.  This also works around the
1300	 * MSI message reordering errata on certain systems.
1301	 */
1302	igb_disable_intr(adapter);
1303	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1304
1305	/* Link status change */
1306	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1307		adapter->hw.mac.get_link_status = 1;
1308		igb_update_link_status(adapter);
1309	}
1310
1311	if (reg_icr & E1000_ICR_RXO)
1312		adapter->rx_overruns++;
1313	return FILTER_HANDLED;
1314}
1315
1316
1317/*********************************************************************
1318 *
1319 *  MSIX TX Interrupt Service routine
1320 *
1321 **********************************************************************/
1322static void
1323igb_msix_tx(void *arg)
1324{
1325	struct tx_ring *txr = arg;
1326	struct adapter *adapter = txr->adapter;
1327	bool		more;
1328
1329	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, txr->eims);
1330
1331	IGB_TX_LOCK(txr);
1332	++txr->tx_irq;
1333	more = igb_txeof(txr);
1334	IGB_TX_UNLOCK(txr);
1335
1336	/* Schedule a clean task if needed*/
1337	if (more)
1338		taskqueue_enqueue(txr->tq, &txr->tx_task);
1339	else
1340		/* Reenable this interrupt */
1341		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1342	return;
1343}
1344
1345/*********************************************************************
1346 *
1347 *  MSIX RX Interrupt Service routine
1348 *
1349 **********************************************************************/
1350
1351static void
1352igb_msix_rx(void *arg)
1353{
1354	struct rx_ring *rxr = arg;
1355	struct adapter *adapter = rxr->adapter;
1356	bool		more;
1357
1358	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, rxr->eims);
1359
1360	++rxr->rx_irq;
1361	more = igb_rxeof(rxr, adapter->rx_process_limit);
1362
1363	/* Update interrupt rate */
1364	if (igb_enable_aim == TRUE)
1365		igb_update_aim(rxr);
1366
1367	/* Schedule another clean */
1368	if (more)
1369		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1370	else
1371		/* Reenable this interrupt */
1372		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1373	return;
1374}
1375
1376
1377/*********************************************************************
1378 *
1379 *  MSIX Link Interrupt Service routine
1380 *
1381 **********************************************************************/
1382
1383static void
1384igb_msix_link(void *arg)
1385{
1386	struct adapter	*adapter = arg;
1387	u32       	icr;
1388
1389	++adapter->link_irq;
1390	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1391	if (!(icr & E1000_ICR_LSC))
1392		goto spurious;
1393	adapter->hw.mac.get_link_status = 1;
1394	igb_update_link_status(adapter);
1395
1396spurious:
1397	/* Rearm */
1398	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1399	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1400	return;
1401}
1402
1403
1404/*
1405** Routine to adjust the RX EITR value based on traffic,
1406** its a simple three state model, but seems to help.
1407**
1408** Note that the three EITR values are tuneable using
1409** sysctl in real time. The feature can be effectively
1410** nullified by setting them equal.
1411*/
1412#define BULK_THRESHOLD	10000
1413#define AVE_THRESHOLD	1600
1414
1415static void
1416igb_update_aim(struct rx_ring *rxr)
1417{
1418	struct adapter	*adapter = rxr->adapter;
1419	u32		olditr, newitr;
1420
1421	/* Update interrupt moderation based on traffic */
1422	olditr = rxr->eitr_setting;
1423	newitr = olditr;
1424
1425	/* Idle, don't change setting */
1426	if (rxr->bytes == 0)
1427		return;
1428
1429	if (olditr == igb_low_latency) {
1430		if (rxr->bytes > AVE_THRESHOLD)
1431			newitr = igb_ave_latency;
1432	} else if (olditr == igb_ave_latency) {
1433		if (rxr->bytes < AVE_THRESHOLD)
1434			newitr = igb_low_latency;
1435		else if (rxr->bytes > BULK_THRESHOLD)
1436			newitr = igb_bulk_latency;
1437	} else if (olditr == igb_bulk_latency) {
1438		if (rxr->bytes < BULK_THRESHOLD)
1439			newitr = igb_ave_latency;
1440	}
1441
1442	if (olditr != newitr) {
1443		/* Change interrupt rate */
1444		rxr->eitr_setting = newitr;
1445		E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1446		    newitr | (newitr << 16));
1447	}
1448
1449	rxr->bytes = 0;
1450        return;
1451}
1452
1453
1454/*********************************************************************
1455 *
1456 *  Media Ioctl callback
1457 *
1458 *  This routine is called whenever the user queries the status of
1459 *  the interface using ifconfig.
1460 *
1461 **********************************************************************/
1462static void
1463igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1464{
1465	struct adapter *adapter = ifp->if_softc;
1466	u_char fiber_type = IFM_1000_SX;
1467
1468	INIT_DEBUGOUT("igb_media_status: begin");
1469
1470	IGB_CORE_LOCK(adapter);
1471	igb_update_link_status(adapter);
1472
1473	ifmr->ifm_status = IFM_AVALID;
1474	ifmr->ifm_active = IFM_ETHER;
1475
1476	if (!adapter->link_active) {
1477		IGB_CORE_UNLOCK(adapter);
1478		return;
1479	}
1480
1481	ifmr->ifm_status |= IFM_ACTIVE;
1482
1483	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1484	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1485		ifmr->ifm_active |= fiber_type | IFM_FDX;
1486	else {
1487		switch (adapter->link_speed) {
1488		case 10:
1489			ifmr->ifm_active |= IFM_10_T;
1490			break;
1491		case 100:
1492			ifmr->ifm_active |= IFM_100_TX;
1493			break;
1494		case 1000:
1495			ifmr->ifm_active |= IFM_1000_T;
1496			break;
1497		}
1498		if (adapter->link_duplex == FULL_DUPLEX)
1499			ifmr->ifm_active |= IFM_FDX;
1500		else
1501			ifmr->ifm_active |= IFM_HDX;
1502	}
1503	IGB_CORE_UNLOCK(adapter);
1504}
1505
1506/*********************************************************************
1507 *
1508 *  Media Ioctl callback
1509 *
1510 *  This routine is called when the user changes speed/duplex using
1511 *  media/mediopt option with ifconfig.
1512 *
1513 **********************************************************************/
1514static int
1515igb_media_change(struct ifnet *ifp)
1516{
1517	struct adapter *adapter = ifp->if_softc;
1518	struct ifmedia  *ifm = &adapter->media;
1519
1520	INIT_DEBUGOUT("igb_media_change: begin");
1521
1522	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1523		return (EINVAL);
1524
1525	IGB_CORE_LOCK(adapter);
1526	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1527	case IFM_AUTO:
1528		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1529		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1530		break;
1531	case IFM_1000_LX:
1532	case IFM_1000_SX:
1533	case IFM_1000_T:
1534		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1535		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1536		break;
1537	case IFM_100_TX:
1538		adapter->hw.mac.autoneg = FALSE;
1539		adapter->hw.phy.autoneg_advertised = 0;
1540		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1541			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1542		else
1543			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1544		break;
1545	case IFM_10_T:
1546		adapter->hw.mac.autoneg = FALSE;
1547		adapter->hw.phy.autoneg_advertised = 0;
1548		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1549			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1550		else
1551			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1552		break;
1553	default:
1554		device_printf(adapter->dev, "Unsupported media type\n");
1555	}
1556
1557	/* As the speed/duplex settings my have changed we need to
1558	 * reset the PHY.
1559	 */
1560	adapter->hw.phy.reset_disable = FALSE;
1561
1562	igb_init_locked(adapter);
1563	IGB_CORE_UNLOCK(adapter);
1564
1565	return (0);
1566}
1567
1568
1569/*********************************************************************
1570 *
1571 *  This routine maps the mbufs to Advanced TX descriptors.
1572 *  used by the 82575 adapter.
1573 *
1574 **********************************************************************/
1575
1576static int
1577igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1578{
1579	struct adapter		*adapter = txr->adapter;
1580	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1581	bus_dmamap_t		map;
1582	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1583	union e1000_adv_tx_desc	*txd = NULL;
1584	struct mbuf		*m_head;
1585	u32			olinfo_status = 0, cmd_type_len = 0;
1586	int			nsegs, i, j, error, first, last = 0;
1587	u32			hdrlen = 0;
1588
1589	m_head = *m_headp;
1590
1591
1592	/* Set basic descriptor constants */
1593	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1594	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1595	if (m_head->m_flags & M_VLANTAG)
1596		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1597
1598        /*
1599         * Force a cleanup if number of TX descriptors
1600         * available hits the threshold
1601         */
1602	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1603		igb_txeof(txr);
1604		/* Now do we at least have a minimal? */
1605		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1606			txr->no_desc_avail++;
1607			return (ENOBUFS);
1608		}
1609	}
1610
1611	/*
1612         * Map the packet for DMA.
1613	 *
1614	 * Capture the first descriptor index,
1615	 * this descriptor will have the index
1616	 * of the EOP which is the only one that
1617	 * now gets a DONE bit writeback.
1618	 */
1619	first = txr->next_avail_desc;
1620	tx_buffer = &txr->tx_buffers[first];
1621	tx_buffer_mapped = tx_buffer;
1622	map = tx_buffer->map;
1623
1624	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1625	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1626
1627	if (error == EFBIG) {
1628		struct mbuf *m;
1629
1630		m = m_defrag(*m_headp, M_DONTWAIT);
1631		if (m == NULL) {
1632			adapter->mbuf_defrag_failed++;
1633			m_freem(*m_headp);
1634			*m_headp = NULL;
1635			return (ENOBUFS);
1636		}
1637		*m_headp = m;
1638
1639		/* Try it again */
1640		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1641		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1642
1643		if (error == ENOMEM) {
1644			adapter->no_tx_dma_setup++;
1645			return (error);
1646		} else if (error != 0) {
1647			adapter->no_tx_dma_setup++;
1648			m_freem(*m_headp);
1649			*m_headp = NULL;
1650			return (error);
1651		}
1652	} else if (error == ENOMEM) {
1653		adapter->no_tx_dma_setup++;
1654		return (error);
1655	} else if (error != 0) {
1656		adapter->no_tx_dma_setup++;
1657		m_freem(*m_headp);
1658		*m_headp = NULL;
1659		return (error);
1660	}
1661
1662	/* Check again to be sure we have enough descriptors */
1663        if (nsegs > (txr->tx_avail - 2)) {
1664                txr->no_desc_avail++;
1665		bus_dmamap_unload(txr->txtag, map);
1666		return (ENOBUFS);
1667        }
1668	m_head = *m_headp;
1669
1670        /*
1671         * Set up the context descriptor:
1672         * used when any hardware offload is done.
1673	 * This includes CSUM, VLAN, and TSO. It
1674	 * will use the first descriptor.
1675         */
1676        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1677		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1678			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1679			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1680			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1681		} else
1682			return (ENXIO);
1683	} else if (igb_tx_ctx_setup(txr, m_head))
1684		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1685
1686#ifdef IGB_IEEE1588
1687	/* This is changing soon to an mtag detection */
1688	if (we detect this mbuf has a TSTAMP mtag)
1689		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1690#endif
1691	/* Calculate payload length */
1692	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1693	    << E1000_ADVTXD_PAYLEN_SHIFT);
1694
1695	/* 82575 needs the queue index added */
1696	if (adapter->hw.mac.type == e1000_82575)
1697		olinfo_status |= txr->me << 4;
1698
1699	/* Set up our transmit descriptors */
1700	i = txr->next_avail_desc;
1701	for (j = 0; j < nsegs; j++) {
1702		bus_size_t seg_len;
1703		bus_addr_t seg_addr;
1704
1705		tx_buffer = &txr->tx_buffers[i];
1706		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1707		seg_addr = segs[j].ds_addr;
1708		seg_len  = segs[j].ds_len;
1709
1710		txd->read.buffer_addr = htole64(seg_addr);
1711		txd->read.cmd_type_len = htole32(
1712		    adapter->txd_cmd | cmd_type_len | seg_len);
1713		txd->read.olinfo_status = htole32(olinfo_status);
1714		last = i;
1715		if (++i == adapter->num_tx_desc)
1716			i = 0;
1717		tx_buffer->m_head = NULL;
1718		tx_buffer->next_eop = -1;
1719	}
1720
1721	txr->next_avail_desc = i;
1722	txr->tx_avail -= nsegs;
1723
1724        tx_buffer->m_head = m_head;
1725	tx_buffer_mapped->map = tx_buffer->map;
1726	tx_buffer->map = map;
1727        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1728
1729        /*
1730         * Last Descriptor of Packet
1731	 * needs End Of Packet (EOP)
1732	 * and Report Status (RS)
1733         */
1734        txd->read.cmd_type_len |=
1735	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1736	/*
1737	 * Keep track in the first buffer which
1738	 * descriptor will be written back
1739	 */
1740	tx_buffer = &txr->tx_buffers[first];
1741	tx_buffer->next_eop = last;
1742
1743	/*
1744	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1745	 * that this frame is available to transmit.
1746	 */
1747	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1748	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1749	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1750	++txr->tx_packets;
1751
1752	return (0);
1753
1754}
1755
1756static void
1757igb_set_promisc(struct adapter *adapter)
1758{
1759	struct ifnet	*ifp = adapter->ifp;
1760	uint32_t	reg_rctl;
1761
1762	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1763
1764	if (ifp->if_flags & IFF_PROMISC) {
1765		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1766		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1767	} else if (ifp->if_flags & IFF_ALLMULTI) {
1768		reg_rctl |= E1000_RCTL_MPE;
1769		reg_rctl &= ~E1000_RCTL_UPE;
1770		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1771	}
1772}
1773
1774static void
1775igb_disable_promisc(struct adapter *adapter)
1776{
1777	uint32_t	reg_rctl;
1778
1779	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1780
1781	reg_rctl &=  (~E1000_RCTL_UPE);
1782	reg_rctl &=  (~E1000_RCTL_MPE);
1783	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1784}
1785
1786
1787/*********************************************************************
1788 *  Multicast Update
1789 *
1790 *  This routine is called whenever multicast address list is updated.
1791 *
1792 **********************************************************************/
1793
1794static void
1795igb_set_multi(struct adapter *adapter)
1796{
1797	struct ifnet	*ifp = adapter->ifp;
1798	struct ifmultiaddr *ifma;
1799	u32 reg_rctl = 0;
1800	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1801
1802	int mcnt = 0;
1803
1804	IOCTL_DEBUGOUT("igb_set_multi: begin");
1805
1806#if __FreeBSD_version < 800000
1807	IF_ADDR_LOCK(ifp);
1808#else
1809	if_maddr_rlock(ifp);
1810#endif
1811	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1812		if (ifma->ifma_addr->sa_family != AF_LINK)
1813			continue;
1814
1815		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1816			break;
1817
1818		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1819		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1820		mcnt++;
1821	}
1822#if __FreeBSD_version < 800000
1823	IF_ADDR_UNLOCK(ifp);
1824#else
1825	if_maddr_runlock(ifp);
1826#endif
1827	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1828		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1829		reg_rctl |= E1000_RCTL_MPE;
1830		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1831	} else
1832		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1833}
1834
1835
1836/*********************************************************************
1837 *  Timer routine:
1838 *  	This routine checks for link status,
1839 *	updates statistics, and does the watchdog.
1840 *
1841 **********************************************************************/
1842
1843static void
1844igb_local_timer(void *arg)
1845{
1846	struct adapter		*adapter = arg;
1847	struct ifnet		*ifp = adapter->ifp;
1848	device_t		dev = adapter->dev;
1849	struct tx_ring		*txr = adapter->tx_rings;
1850
1851
1852	IGB_CORE_LOCK_ASSERT(adapter);
1853
1854	igb_update_link_status(adapter);
1855	igb_update_stats_counters(adapter);
1856
1857	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1858		igb_print_hw_stats(adapter);
1859
1860        /*
1861        ** Watchdog: check for time since any descriptor was cleaned
1862        */
1863	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1864		if (txr->watchdog_check == FALSE)
1865			continue;
1866		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1867			goto timeout;
1868	}
1869
1870	/* Trigger an RX interrupt on all queues */
1871	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1872	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1873	return;
1874
1875timeout:
1876	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1877	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1878            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1879            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1880	device_printf(dev,"TX(%d) desc avail = %d,"
1881            "Next TX to Clean = %d\n",
1882            txr->me, txr->tx_avail, txr->next_to_clean);
1883	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1884	adapter->watchdog_events++;
1885	igb_init_locked(adapter);
1886}
1887
1888static void
1889igb_update_link_status(struct adapter *adapter)
1890{
1891	struct e1000_hw *hw = &adapter->hw;
1892	struct ifnet *ifp = adapter->ifp;
1893	device_t dev = adapter->dev;
1894	struct tx_ring *txr = adapter->tx_rings;
1895	u32 link_check = 0;
1896
1897	/* Get the cached link value or read for real */
1898        switch (hw->phy.media_type) {
1899        case e1000_media_type_copper:
1900                if (hw->mac.get_link_status) {
1901			/* Do the work to read phy */
1902                        e1000_check_for_link(hw);
1903                        link_check = !hw->mac.get_link_status;
1904                } else
1905                        link_check = TRUE;
1906                break;
1907        case e1000_media_type_fiber:
1908                e1000_check_for_link(hw);
1909                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1910                                 E1000_STATUS_LU);
1911                break;
1912        case e1000_media_type_internal_serdes:
1913                e1000_check_for_link(hw);
1914                link_check = adapter->hw.mac.serdes_has_link;
1915                break;
1916        default:
1917        case e1000_media_type_unknown:
1918                break;
1919        }
1920
1921	/* Now we check if a transition has happened */
1922	if (link_check && (adapter->link_active == 0)) {
1923		e1000_get_speed_and_duplex(&adapter->hw,
1924		    &adapter->link_speed, &adapter->link_duplex);
1925		if (bootverbose)
1926			device_printf(dev, "Link is up %d Mbps %s\n",
1927			    adapter->link_speed,
1928			    ((adapter->link_duplex == FULL_DUPLEX) ?
1929			    "Full Duplex" : "Half Duplex"));
1930		adapter->link_active = 1;
1931		ifp->if_baudrate = adapter->link_speed * 1000000;
1932		if_link_state_change(ifp, LINK_STATE_UP);
1933	} else if (!link_check && (adapter->link_active == 1)) {
1934		ifp->if_baudrate = adapter->link_speed = 0;
1935		adapter->link_duplex = 0;
1936		if (bootverbose)
1937			device_printf(dev, "Link is Down\n");
1938		adapter->link_active = 0;
1939		if_link_state_change(ifp, LINK_STATE_DOWN);
1940		/* Turn off watchdogs */
1941		for (int i = 0; i < adapter->num_queues; i++, txr++)
1942			txr->watchdog_check = FALSE;
1943	}
1944}
1945
1946/*********************************************************************
1947 *
1948 *  This routine disables all traffic on the adapter by issuing a
1949 *  global reset on the MAC and deallocates TX/RX buffers.
1950 *
1951 **********************************************************************/
1952
1953static void
1954igb_stop(void *arg)
1955{
1956	struct adapter	*adapter = arg;
1957	struct ifnet	*ifp = adapter->ifp;
1958
1959	IGB_CORE_LOCK_ASSERT(adapter);
1960
1961	INIT_DEBUGOUT("igb_stop: begin");
1962
1963	igb_disable_intr(adapter);
1964
1965	callout_stop(&adapter->timer);
1966
1967	/* Tell the stack that the interface is no longer active */
1968	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1969
1970	e1000_reset_hw(&adapter->hw);
1971	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1972}
1973
1974
1975/*********************************************************************
1976 *
1977 *  Determine hardware revision.
1978 *
1979 **********************************************************************/
1980static void
1981igb_identify_hardware(struct adapter *adapter)
1982{
1983	device_t dev = adapter->dev;
1984
1985	/* Make sure our PCI config space has the necessary stuff set */
1986	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1987	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1988	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1989		device_printf(dev, "Memory Access and/or Bus Master bits "
1990		    "were not set!\n");
1991		adapter->hw.bus.pci_cmd_word |=
1992		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1993		pci_write_config(dev, PCIR_COMMAND,
1994		    adapter->hw.bus.pci_cmd_word, 2);
1995	}
1996
1997	/* Save off the information about this board */
1998	adapter->hw.vendor_id = pci_get_vendor(dev);
1999	adapter->hw.device_id = pci_get_device(dev);
2000	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2001	adapter->hw.subsystem_vendor_id =
2002	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2003	adapter->hw.subsystem_device_id =
2004	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2005
2006	/* Do Shared Code Init and Setup */
2007	if (e1000_set_mac_type(&adapter->hw)) {
2008		device_printf(dev, "Setup init failure\n");
2009		return;
2010	}
2011}
2012
2013static int
2014igb_allocate_pci_resources(struct adapter *adapter)
2015{
2016	device_t	dev = adapter->dev;
2017	int		rid;
2018
2019	rid = PCIR_BAR(0);
2020	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2021	    &rid, RF_ACTIVE);
2022	if (adapter->pci_mem == NULL) {
2023		device_printf(dev, "Unable to allocate bus resource: memory\n");
2024		return (ENXIO);
2025	}
2026	adapter->osdep.mem_bus_space_tag =
2027	    rman_get_bustag(adapter->pci_mem);
2028	adapter->osdep.mem_bus_space_handle =
2029	    rman_get_bushandle(adapter->pci_mem);
2030	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2031
2032	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2033
2034	/* This will setup either MSI/X or MSI */
2035	adapter->msix = igb_setup_msix(adapter);
2036	adapter->hw.back = &adapter->osdep;
2037
2038	return (0);
2039}
2040
2041/*********************************************************************
2042 *
2043 *  Setup the Legacy or MSI Interrupt handler
2044 *
2045 **********************************************************************/
2046static int
2047igb_allocate_legacy(struct adapter *adapter)
2048{
2049	device_t dev = adapter->dev;
2050	int error, rid = 0;
2051
2052	/* Turn off all interrupts */
2053	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2054
2055	/* MSI RID is 1 */
2056	if (adapter->msix == 1)
2057		rid = 1;
2058
2059	/* We allocate a single interrupt resource */
2060	adapter->res = bus_alloc_resource_any(dev,
2061	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2062	if (adapter->res == NULL) {
2063		device_printf(dev, "Unable to allocate bus resource: "
2064		    "interrupt\n");
2065		return (ENXIO);
2066	}
2067
2068	/*
2069	 * Try allocating a fast interrupt and the associated deferred
2070	 * processing contexts.
2071	 */
2072	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2073	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2074	    taskqueue_thread_enqueue, &adapter->tq);
2075	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2076	    device_get_nameunit(adapter->dev));
2077	if ((error = bus_setup_intr(dev, adapter->res,
2078	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2079	    adapter, &adapter->tag)) != 0) {
2080		device_printf(dev, "Failed to register fast interrupt "
2081			    "handler: %d\n", error);
2082		taskqueue_free(adapter->tq);
2083		adapter->tq = NULL;
2084		return (error);
2085	}
2086
2087	return (0);
2088}
2089
2090
2091/*********************************************************************
2092 *
2093 *  Setup the MSIX Interrupt handlers:
2094 *
2095 **********************************************************************/
2096static int
2097igb_allocate_msix(struct adapter *adapter)
2098{
2099	device_t dev = adapter->dev;
2100	struct tx_ring *txr = adapter->tx_rings;
2101	struct rx_ring *rxr = adapter->rx_rings;
2102	int error, rid, vector = 0;
2103
2104	/*
2105	 * Setup the interrupt handlers
2106	 */
2107
2108	/* TX Setup */
2109	for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2110		rid = vector +1;
2111		txr->res = bus_alloc_resource_any(dev,
2112		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2113		if (txr->res == NULL) {
2114			device_printf(dev,
2115			    "Unable to allocate bus resource: "
2116			    "MSIX TX Interrupt\n");
2117			return (ENXIO);
2118		}
2119		error = bus_setup_intr(dev, txr->res,
2120	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2121		    igb_msix_tx, txr, &txr->tag);
2122		if (error) {
2123			txr->res = NULL;
2124			device_printf(dev, "Failed to register TX handler");
2125			return (error);
2126		}
2127		txr->msix = vector;
2128		if (adapter->hw.mac.type == e1000_82575)
2129			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2130		else
2131			txr->eims = 1 << vector;
2132		/*
2133		** Bind the msix vector, and thus the
2134		** ring to the corresponding cpu.
2135		*/
2136		if (adapter->num_queues > 1)
2137			bus_bind_intr(dev, txr->res, i);
2138		/* Make tasklet for deferred handling - one per queue */
2139		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2140		txr->tq = taskqueue_create_fast("igb_txq", M_NOWAIT,
2141		    taskqueue_thread_enqueue, &txr->tq);
2142		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2143		    device_get_nameunit(adapter->dev));
2144	}
2145
2146	/* RX Setup */
2147	for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2148		rid = vector +1;
2149		rxr->res = bus_alloc_resource_any(dev,
2150		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2151		if (rxr->res == NULL) {
2152			device_printf(dev,
2153			    "Unable to allocate bus resource: "
2154			    "MSIX RX Interrupt\n");
2155			return (ENXIO);
2156		}
2157		error = bus_setup_intr(dev, rxr->res,
2158	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2159		    igb_msix_rx, rxr, &rxr->tag);
2160		if (error) {
2161			rxr->res = NULL;
2162			device_printf(dev, "Failed to register RX handler");
2163			return (error);
2164		}
2165		rxr->msix = vector;
2166		if (adapter->hw.mac.type == e1000_82575)
2167			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2168		else
2169			rxr->eims = 1 << vector;
2170		/* Get a mask for local timer */
2171		adapter->rx_mask |= rxr->eims;
2172		/*
2173		** Bind the msix vector, and thus the
2174		** ring to the corresponding cpu.
2175		** Notice that this makes an RX/TX pair
2176		** bound to each CPU, limited by the MSIX
2177		** vectors.
2178		*/
2179		if (adapter->num_queues > 1)
2180			bus_bind_intr(dev, rxr->res, i);
2181
2182		/* Make tasklet for deferred handling - one per queue */
2183		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2184		rxr->tq = taskqueue_create_fast("igb_rxq", M_NOWAIT,
2185		    taskqueue_thread_enqueue, &rxr->tq);
2186		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2187		    device_get_nameunit(adapter->dev));
2188	}
2189
2190	/* And Link */
2191	rid = vector +1;
2192	adapter->res = bus_alloc_resource_any(dev,
2193	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2194	if (adapter->res == NULL) {
2195		device_printf(dev,
2196		    "Unable to allocate bus resource: "
2197		    "MSIX Link Interrupt\n");
2198		return (ENXIO);
2199	}
2200	if ((error = bus_setup_intr(dev, adapter->res,
2201	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2202	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2203		device_printf(dev, "Failed to register Link handler");
2204		return (error);
2205	}
2206	adapter->linkvec = vector;
2207
2208	return (0);
2209}
2210
2211
2212static void
2213igb_configure_queues(struct adapter *adapter)
2214{
2215	struct	e1000_hw *hw = &adapter->hw;
2216	struct	tx_ring	*txr;
2217	struct	rx_ring	*rxr;
2218	u32	tmp, ivar = 0;
2219
2220	/* First turn on RSS capability */
2221	if (adapter->hw.mac.type > e1000_82575)
2222		E1000_WRITE_REG(hw, E1000_GPIE,
2223		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2224		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2225
2226	/* Turn on MSIX */
2227	switch (adapter->hw.mac.type) {
2228	case e1000_82580:
2229		/* RX */
2230		for (int i = 0; i < adapter->num_queues; i++) {
2231			u32 index = i >> 1;
2232			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2233			rxr = &adapter->rx_rings[i];
2234			if (i & 1) {
2235				ivar &= 0xFF00FFFF;
2236				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2237			} else {
2238				ivar &= 0xFFFFFF00;
2239				ivar |= rxr->msix | E1000_IVAR_VALID;
2240			}
2241			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2242			adapter->eims_mask |= rxr->eims;
2243		}
2244		/* TX */
2245		for (int i = 0; i < adapter->num_queues; i++) {
2246			u32 index = i >> 1;
2247			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2248			txr = &adapter->tx_rings[i];
2249			if (i & 1) {
2250				ivar &= 0x00FFFFFF;
2251				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2252			} else {
2253				ivar &= 0xFFFF00FF;
2254				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2255			}
2256			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2257			adapter->eims_mask |= txr->eims;
2258		}
2259
2260		/* And for the link interrupt */
2261		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2262		adapter->link_mask = 1 << adapter->linkvec;
2263		adapter->eims_mask |= adapter->link_mask;
2264		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2265		break;
2266	case e1000_82576:
2267		/* RX */
2268		for (int i = 0; i < adapter->num_queues; i++) {
2269			u32 index = i & 0x7; /* Each IVAR has two entries */
2270			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2271			rxr = &adapter->rx_rings[i];
2272			if (i < 8) {
2273				ivar &= 0xFFFFFF00;
2274				ivar |= rxr->msix | E1000_IVAR_VALID;
2275			} else {
2276				ivar &= 0xFF00FFFF;
2277				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2278			}
2279			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2280			adapter->eims_mask |= rxr->eims;
2281		}
2282		/* TX */
2283		for (int i = 0; i < adapter->num_queues; i++) {
2284			u32 index = i & 0x7; /* Each IVAR has two entries */
2285			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2286			txr = &adapter->tx_rings[i];
2287			if (i < 8) {
2288				ivar &= 0xFFFF00FF;
2289				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2290			} else {
2291				ivar &= 0x00FFFFFF;
2292				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2293			}
2294			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2295			adapter->eims_mask |= txr->eims;
2296		}
2297
2298		/* And for the link interrupt */
2299		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2300		adapter->link_mask = 1 << adapter->linkvec;
2301		adapter->eims_mask |= adapter->link_mask;
2302		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2303		break;
2304
2305	case e1000_82575:
2306                /* enable MSI-X support*/
2307		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2308                tmp |= E1000_CTRL_EXT_PBA_CLR;
2309                /* Auto-Mask interrupts upon ICR read. */
2310                tmp |= E1000_CTRL_EXT_EIAME;
2311                tmp |= E1000_CTRL_EXT_IRCA;
2312                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2313
2314		/* TX */
2315		for (int i = 0; i < adapter->num_queues; i++) {
2316			txr = &adapter->tx_rings[i];
2317			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2318			    txr->eims);
2319			adapter->eims_mask |= txr->eims;
2320		}
2321
2322		/* RX */
2323		for (int i = 0; i < adapter->num_queues; i++) {
2324			rxr = &adapter->rx_rings[i];
2325			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2326			    rxr->eims);
2327			adapter->eims_mask |= rxr->eims;
2328		}
2329
2330		/* Link */
2331		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2332		    E1000_EIMS_OTHER);
2333		adapter->link_mask |= E1000_EIMS_OTHER;
2334		adapter->eims_mask |= adapter->link_mask;
2335	default:
2336		break;
2337	}
2338
2339	return;
2340}
2341
2342
2343static void
2344igb_free_pci_resources(struct adapter *adapter)
2345{
2346	struct          tx_ring *txr = adapter->tx_rings;
2347	struct          rx_ring *rxr = adapter->rx_rings;
2348	device_t	dev = adapter->dev;
2349	int		rid;
2350
2351	/*
2352	** There is a slight possibility of a failure mode
2353	** in attach that will result in entering this function
2354	** before interrupt resources have been initialized, and
2355	** in that case we do not want to execute the loops below
2356	** We can detect this reliably by the state of the adapter
2357	** res pointer.
2358	*/
2359	if (adapter->res == NULL)
2360		goto mem;
2361
2362	/*
2363	 * First release all the TX/RX interrupt resources:
2364	 */
2365	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2366		rid = txr->msix + 1;
2367		if (txr->tag != NULL) {
2368			bus_teardown_intr(dev, txr->res, txr->tag);
2369			txr->tag = NULL;
2370		}
2371		if (txr->res != NULL)
2372			bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2373	}
2374
2375	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2376		rid = rxr->msix + 1;
2377		if (rxr->tag != NULL) {
2378			bus_teardown_intr(dev, rxr->res, rxr->tag);
2379			rxr->tag = NULL;
2380		}
2381		if (rxr->res != NULL)
2382			bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2383	}
2384
2385	/* Clean the Legacy or Link interrupt last */
2386	if (adapter->linkvec) /* we are doing MSIX */
2387		rid = adapter->linkvec + 1;
2388	else
2389		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2390
2391	if (adapter->tag != NULL) {
2392		bus_teardown_intr(dev, adapter->res, adapter->tag);
2393		adapter->tag = NULL;
2394	}
2395	if (adapter->res != NULL)
2396		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2397
2398mem:
2399	if (adapter->msix)
2400		pci_release_msi(dev);
2401
2402	if (adapter->msix_mem != NULL)
2403		bus_release_resource(dev, SYS_RES_MEMORY,
2404		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2405
2406	if (adapter->pci_mem != NULL)
2407		bus_release_resource(dev, SYS_RES_MEMORY,
2408		    PCIR_BAR(0), adapter->pci_mem);
2409
2410}
2411
2412/*
2413 * Setup Either MSI/X or MSI
2414 */
2415static int
2416igb_setup_msix(struct adapter *adapter)
2417{
2418	device_t dev = adapter->dev;
2419	int rid, want, queues, msgs;
2420
2421	/* tuneable override */
2422	if (igb_enable_msix == 0)
2423		goto msi;
2424
2425	/* First try MSI/X */
2426	rid = PCIR_BAR(IGB_MSIX_BAR);
2427	adapter->msix_mem = bus_alloc_resource_any(dev,
2428	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2429       	if (!adapter->msix_mem) {
2430		/* May not be enabled */
2431		device_printf(adapter->dev,
2432		    "Unable to map MSIX table \n");
2433		goto msi;
2434	}
2435
2436	msgs = pci_msix_count(dev);
2437	if (msgs == 0) { /* system has msix disabled */
2438		bus_release_resource(dev, SYS_RES_MEMORY,
2439		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2440		adapter->msix_mem = NULL;
2441		goto msi;
2442	}
2443
2444	/* Figure out a reasonable auto config value */
2445	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2446
2447	if (igb_num_queues == 0)
2448		igb_num_queues = queues;
2449	/*
2450	** Two vectors (RX/TX pair) per queue
2451	** plus an additional for Link interrupt
2452	*/
2453	want = (igb_num_queues * 2) + 1;
2454	if (msgs >= want)
2455		msgs = want;
2456	else {
2457               	device_printf(adapter->dev,
2458		    "MSIX Configuration Problem, "
2459		    "%d vectors configured, but %d queues wanted!\n",
2460		    msgs, want);
2461		return (ENXIO);
2462	}
2463	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2464               	device_printf(adapter->dev,
2465		    "Using MSIX interrupts with %d vectors\n", msgs);
2466		adapter->num_queues = igb_num_queues;
2467		return (msgs);
2468	}
2469msi:
2470       	msgs = pci_msi_count(dev);
2471       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2472               	device_printf(adapter->dev,"Using MSI interrupt\n");
2473	return (msgs);
2474}
2475
2476/*********************************************************************
2477 *
2478 *  Set up an fresh starting state
2479 *
2480 **********************************************************************/
2481static void
2482igb_reset(struct adapter *adapter)
2483{
2484	device_t	dev = adapter->dev;
2485	struct e1000_hw *hw = &adapter->hw;
2486	struct e1000_fc_info *fc = &hw->fc;
2487	struct ifnet	*ifp = adapter->ifp;
2488	u32		pba = 0;
2489	u16		hwm;
2490
2491	INIT_DEBUGOUT("igb_reset: begin");
2492
2493	/* Let the firmware know the OS is in control */
2494	igb_get_hw_control(adapter);
2495
2496	/*
2497	 * Packet Buffer Allocation (PBA)
2498	 * Writing PBA sets the receive portion of the buffer
2499	 * the remainder is used for the transmit buffer.
2500	 */
2501	switch (hw->mac.type) {
2502	case e1000_82575:
2503		pba = E1000_PBA_32K;
2504		break;
2505	case e1000_82576:
2506		pba = E1000_PBA_64K;
2507		break;
2508	case e1000_82580:
2509		pba = E1000_PBA_35K;
2510	default:
2511		break;
2512	}
2513
2514	/* Special needs in case of Jumbo frames */
2515	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2516		u32 tx_space, min_tx, min_rx;
2517		pba = E1000_READ_REG(hw, E1000_PBA);
2518		tx_space = pba >> 16;
2519		pba &= 0xffff;
2520		min_tx = (adapter->max_frame_size +
2521		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2522		min_tx = roundup2(min_tx, 1024);
2523		min_tx >>= 10;
2524                min_rx = adapter->max_frame_size;
2525                min_rx = roundup2(min_rx, 1024);
2526                min_rx >>= 10;
2527		if (tx_space < min_tx &&
2528		    ((min_tx - tx_space) < pba)) {
2529			pba = pba - (min_tx - tx_space);
2530			/*
2531                         * if short on rx space, rx wins
2532                         * and must trump tx adjustment
2533			 */
2534                        if (pba < min_rx)
2535                                pba = min_rx;
2536		}
2537		E1000_WRITE_REG(hw, E1000_PBA, pba);
2538	}
2539
2540	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2541
2542	/*
2543	 * These parameters control the automatic generation (Tx) and
2544	 * response (Rx) to Ethernet PAUSE frames.
2545	 * - High water mark should allow for at least two frames to be
2546	 *   received after sending an XOFF.
2547	 * - Low water mark works best when it is very near the high water mark.
2548	 *   This allows the receiver to restart by sending XON when it has
2549	 *   drained a bit.
2550	 */
2551	hwm = min(((pba << 10) * 9 / 10),
2552	    ((pba << 10) - 2 * adapter->max_frame_size));
2553
2554	if (hw->mac.type < e1000_82576) {
2555		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2556		fc->low_water = fc->high_water - 8;
2557	} else {
2558		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2559		fc->low_water = fc->high_water - 16;
2560	}
2561
2562	fc->pause_time = IGB_FC_PAUSE_TIME;
2563	fc->send_xon = TRUE;
2564
2565	/* Set Flow control, use the tunable location if sane */
2566	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2567		fc->requested_mode = igb_fc_setting;
2568	else
2569		fc->requested_mode = e1000_fc_none;
2570
2571	fc->current_mode = fc->requested_mode;
2572
2573	/* Issue a global reset */
2574	e1000_reset_hw(hw);
2575	E1000_WRITE_REG(hw, E1000_WUC, 0);
2576
2577	if (e1000_init_hw(hw) < 0)
2578		device_printf(dev, "Hardware Initialization Failed\n");
2579
2580	if (hw->mac.type == e1000_82580) {
2581		u32 reg;
2582
2583		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2584		/*
2585		 * 0x80000000 - enable DMA COAL
2586		 * 0x10000000 - use L0s as low power
2587		 * 0x20000000 - use L1 as low power
2588		 * X << 16 - exit dma coal when rx data exceeds X kB
2589		 * Y - upper limit to stay in dma coal in units of 32usecs
2590		 */
2591		E1000_WRITE_REG(hw, E1000_DMACR,
2592		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2593
2594		/* set hwm to PBA -  2 * max frame size */
2595		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2596		/*
2597		 * This sets the time to wait before requesting transition to
2598		 * low power state to number of usecs needed to receive 1 512
2599		 * byte frame at gigabit line rate
2600		 */
2601		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2602
2603		/* free space in tx packet buffer to wake from DMA coal */
2604		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2605		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2606
2607		/* make low power state decision controlled by DMA coal */
2608		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2609		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2610		    reg | E1000_PCIEMISC_LX_DECISION);
2611	}
2612
2613	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2614	e1000_get_phy_info(hw);
2615	e1000_check_for_link(hw);
2616	return;
2617}
2618
2619/*********************************************************************
2620 *
2621 *  Setup networking device structure and register an interface.
2622 *
2623 **********************************************************************/
2624static void
2625igb_setup_interface(device_t dev, struct adapter *adapter)
2626{
2627	struct ifnet   *ifp;
2628
2629	INIT_DEBUGOUT("igb_setup_interface: begin");
2630
2631	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2632	if (ifp == NULL)
2633		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2634	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2635	ifp->if_mtu = ETHERMTU;
2636	ifp->if_init =  igb_init;
2637	ifp->if_softc = adapter;
2638	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2639	ifp->if_ioctl = igb_ioctl;
2640	ifp->if_start = igb_start;
2641#if __FreeBSD_version >= 800000
2642	ifp->if_transmit = igb_mq_start;
2643	ifp->if_qflush = igb_qflush;
2644#endif
2645	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2646	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2647	IFQ_SET_READY(&ifp->if_snd);
2648
2649	ether_ifattach(ifp, adapter->hw.mac.addr);
2650
2651	ifp->if_capabilities = ifp->if_capenable = 0;
2652
2653	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2654	ifp->if_capabilities |= IFCAP_TSO4;
2655	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2656	if (igb_header_split)
2657		ifp->if_capabilities |= IFCAP_LRO;
2658
2659	ifp->if_capenable = ifp->if_capabilities;
2660
2661	/*
2662	 * Tell the upper layer(s) we support long frames.
2663	 */
2664	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2665	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2666	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2667
2668	/*
2669	 * Specify the media types supported by this adapter and register
2670	 * callbacks to update media and link information
2671	 */
2672	ifmedia_init(&adapter->media, IFM_IMASK,
2673	    igb_media_change, igb_media_status);
2674	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2675	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2676		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2677			    0, NULL);
2678		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2679	} else {
2680		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2681		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2682			    0, NULL);
2683		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2684			    0, NULL);
2685		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2686			    0, NULL);
2687		if (adapter->hw.phy.type != e1000_phy_ife) {
2688			ifmedia_add(&adapter->media,
2689				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2690			ifmedia_add(&adapter->media,
2691				IFM_ETHER | IFM_1000_T, 0, NULL);
2692		}
2693	}
2694	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2695	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2696}
2697
2698
2699/*
2700 * Manage DMA'able memory.
2701 */
2702static void
2703igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2704{
2705	if (error)
2706		return;
2707	*(bus_addr_t *) arg = segs[0].ds_addr;
2708}
2709
2710static int
2711igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2712        struct igb_dma_alloc *dma, int mapflags)
2713{
2714	int error;
2715
2716	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2717				1, 0,			/* alignment, bounds */
2718				BUS_SPACE_MAXADDR,	/* lowaddr */
2719				BUS_SPACE_MAXADDR,	/* highaddr */
2720				NULL, NULL,		/* filter, filterarg */
2721				size,			/* maxsize */
2722				1,			/* nsegments */
2723				size,			/* maxsegsize */
2724				0,			/* flags */
2725				NULL,			/* lockfunc */
2726				NULL,			/* lockarg */
2727				&dma->dma_tag);
2728	if (error) {
2729		device_printf(adapter->dev,
2730		    "%s: bus_dma_tag_create failed: %d\n",
2731		    __func__, error);
2732		goto fail_0;
2733	}
2734
2735	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2736	    BUS_DMA_NOWAIT, &dma->dma_map);
2737	if (error) {
2738		device_printf(adapter->dev,
2739		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2740		    __func__, (uintmax_t)size, error);
2741		goto fail_2;
2742	}
2743
2744	dma->dma_paddr = 0;
2745	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2746	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2747	if (error || dma->dma_paddr == 0) {
2748		device_printf(adapter->dev,
2749		    "%s: bus_dmamap_load failed: %d\n",
2750		    __func__, error);
2751		goto fail_3;
2752	}
2753
2754	return (0);
2755
2756fail_3:
2757	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2758fail_2:
2759	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2760	bus_dma_tag_destroy(dma->dma_tag);
2761fail_0:
2762	dma->dma_map = NULL;
2763	dma->dma_tag = NULL;
2764
2765	return (error);
2766}
2767
2768static void
2769igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2770{
2771	if (dma->dma_tag == NULL)
2772		return;
2773	if (dma->dma_map != NULL) {
2774		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2775		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2776		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2777		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2778		dma->dma_map = NULL;
2779	}
2780	bus_dma_tag_destroy(dma->dma_tag);
2781	dma->dma_tag = NULL;
2782}
2783
2784
2785/*********************************************************************
2786 *
2787 *  Allocate memory for the transmit and receive rings, and then
2788 *  the descriptors associated with each, called only once at attach.
2789 *
2790 **********************************************************************/
2791static int
2792igb_allocate_queues(struct adapter *adapter)
2793{
2794	device_t dev = adapter->dev;
2795	struct tx_ring *txr;
2796	struct rx_ring *rxr;
2797	int rsize, tsize, error = E1000_SUCCESS;
2798	int txconf = 0, rxconf = 0;
2799
2800	/* First allocate the TX ring struct memory */
2801	if (!(adapter->tx_rings =
2802	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2803	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2804		device_printf(dev, "Unable to allocate TX ring memory\n");
2805		error = ENOMEM;
2806		goto fail;
2807	}
2808	txr = adapter->tx_rings;
2809
2810	/* Next allocate the RX */
2811	if (!(adapter->rx_rings =
2812	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2813	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2814		device_printf(dev, "Unable to allocate RX ring memory\n");
2815		error = ENOMEM;
2816		goto rx_fail;
2817	}
2818	rxr = adapter->rx_rings;
2819
2820	tsize = roundup2(adapter->num_tx_desc *
2821	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2822	/*
2823	 * Now set up the TX queues, txconf is needed to handle the
2824	 * possibility that things fail midcourse and we need to
2825	 * undo memory gracefully
2826	 */
2827	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2828		/* Set up some basics */
2829		txr = &adapter->tx_rings[i];
2830		txr->adapter = adapter;
2831		txr->me = i;
2832
2833		/* Initialize the TX lock */
2834		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2835		    device_get_nameunit(dev), txr->me);
2836		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2837
2838		if (igb_dma_malloc(adapter, tsize,
2839			&txr->txdma, BUS_DMA_NOWAIT)) {
2840			device_printf(dev,
2841			    "Unable to allocate TX Descriptor memory\n");
2842			error = ENOMEM;
2843			goto err_tx_desc;
2844		}
2845		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2846		bzero((void *)txr->tx_base, tsize);
2847
2848        	/* Now allocate transmit buffers for the ring */
2849        	if (igb_allocate_transmit_buffers(txr)) {
2850			device_printf(dev,
2851			    "Critical Failure setting up transmit buffers\n");
2852			error = ENOMEM;
2853			goto err_tx_desc;
2854        	}
2855#if __FreeBSD_version >= 800000
2856		/* Allocate a buf ring */
2857		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2858		    M_WAITOK, &txr->tx_mtx);
2859#endif
2860	}
2861
2862	/*
2863	 * Next the RX queues...
2864	 */
2865	rsize = roundup2(adapter->num_rx_desc *
2866	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2867	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2868		rxr = &adapter->rx_rings[i];
2869		rxr->adapter = adapter;
2870		rxr->me = i;
2871
2872		/* Initialize the RX lock */
2873		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2874		    device_get_nameunit(dev), txr->me);
2875		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2876
2877		if (igb_dma_malloc(adapter, rsize,
2878			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2879			device_printf(dev,
2880			    "Unable to allocate RxDescriptor memory\n");
2881			error = ENOMEM;
2882			goto err_rx_desc;
2883		}
2884		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2885		bzero((void *)rxr->rx_base, rsize);
2886
2887        	/* Allocate receive buffers for the ring*/
2888		if (igb_allocate_receive_buffers(rxr)) {
2889			device_printf(dev,
2890			    "Critical Failure setting up receive buffers\n");
2891			error = ENOMEM;
2892			goto err_rx_desc;
2893		}
2894	}
2895
2896	return (0);
2897
2898err_rx_desc:
2899	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2900		igb_dma_free(adapter, &rxr->rxdma);
2901err_tx_desc:
2902	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2903		igb_dma_free(adapter, &txr->txdma);
2904	free(adapter->rx_rings, M_DEVBUF);
2905rx_fail:
2906	free(adapter->tx_rings, M_DEVBUF);
2907fail:
2908	return (error);
2909}
2910
2911/*********************************************************************
2912 *
2913 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2914 *  the information needed to transmit a packet on the wire. This is
2915 *  called only once at attach, setup is done every reset.
2916 *
2917 **********************************************************************/
2918static int
2919igb_allocate_transmit_buffers(struct tx_ring *txr)
2920{
2921	struct adapter *adapter = txr->adapter;
2922	device_t dev = adapter->dev;
2923	struct igb_tx_buffer *txbuf;
2924	int error, i;
2925
2926	/*
2927	 * Setup DMA descriptor areas.
2928	 */
2929	if ((error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),
2930			       1, 0,			/* alignment, bounds */
2931			       BUS_SPACE_MAXADDR,	/* lowaddr */
2932			       BUS_SPACE_MAXADDR,	/* highaddr */
2933			       NULL, NULL,		/* filter, filterarg */
2934			       IGB_TSO_SIZE,		/* maxsize */
2935			       IGB_MAX_SCATTER,		/* nsegments */
2936			       IGB_TSO_SEG_SIZE,	/* maxsegsize */
2937			       0,			/* flags */
2938			       NULL,			/* lockfunc */
2939			       NULL,			/* lockfuncarg */
2940			       &txr->txtag))) {
2941		device_printf(dev,"Unable to allocate TX DMA tag\n");
2942		goto fail;
2943	}
2944
2945	if (!(txr->tx_buffers =
2946	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2947	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2948		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2949		error = ENOMEM;
2950		goto fail;
2951	}
2952
2953        /* Create the descriptor buffer dma maps */
2954	txbuf = txr->tx_buffers;
2955	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2956		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2957		if (error != 0) {
2958			device_printf(dev, "Unable to create TX DMA map\n");
2959			goto fail;
2960		}
2961	}
2962
2963	return 0;
2964fail:
2965	/* We free all, it handles case where we are in the middle */
2966	igb_free_transmit_structures(adapter);
2967	return (error);
2968}
2969
2970/*********************************************************************
2971 *
2972 *  Initialize a transmit ring.
2973 *
2974 **********************************************************************/
2975static void
2976igb_setup_transmit_ring(struct tx_ring *txr)
2977{
2978	struct adapter *adapter = txr->adapter;
2979	struct igb_tx_buffer *txbuf;
2980	int i;
2981
2982	/* Clear the old descriptor contents */
2983	bzero((void *)txr->tx_base,
2984	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2985	/* Reset indices */
2986	txr->next_avail_desc = 0;
2987	txr->next_to_clean = 0;
2988
2989	/* Free any existing tx buffers. */
2990        txbuf = txr->tx_buffers;
2991	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2992		if (txbuf->m_head != NULL) {
2993			bus_dmamap_sync(txr->txtag, txbuf->map,
2994			    BUS_DMASYNC_POSTWRITE);
2995			bus_dmamap_unload(txr->txtag, txbuf->map);
2996			m_freem(txbuf->m_head);
2997			txbuf->m_head = NULL;
2998		}
2999		/* clear the watch index */
3000		txbuf->next_eop = -1;
3001        }
3002
3003	/* Set number of descriptors available */
3004	txr->tx_avail = adapter->num_tx_desc;
3005
3006	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3007	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3008
3009}
3010
3011/*********************************************************************
3012 *
3013 *  Initialize all transmit rings.
3014 *
3015 **********************************************************************/
3016static void
3017igb_setup_transmit_structures(struct adapter *adapter)
3018{
3019	struct tx_ring *txr = adapter->tx_rings;
3020
3021	for (int i = 0; i < adapter->num_queues; i++, txr++)
3022		igb_setup_transmit_ring(txr);
3023
3024	return;
3025}
3026
3027/*********************************************************************
3028 *
3029 *  Enable transmit unit.
3030 *
3031 **********************************************************************/
3032static void
3033igb_initialize_transmit_units(struct adapter *adapter)
3034{
3035	struct tx_ring	*txr = adapter->tx_rings;
3036	u32		tctl, txdctl;
3037
3038	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3039
3040	/* Setup Transmit Descriptor Base Settings */
3041	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3042
3043	/* Setup the Tx Descriptor Rings */
3044	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3045		u64 bus_addr = txr->txdma.dma_paddr;
3046
3047		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
3048		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3049		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
3050		    (uint32_t)(bus_addr >> 32));
3051		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
3052		    (uint32_t)bus_addr);
3053
3054		/* Setup the HW Tx Head and Tail descriptor pointers */
3055		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
3056		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
3057
3058		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3059		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3060		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3061
3062		txr->watchdog_check = FALSE;
3063
3064		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
3065		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3066		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
3067
3068		/* Default interrupt rate */
3069		E1000_WRITE_REG(&adapter->hw, E1000_EITR(txr->msix),
3070		    igb_ave_latency);
3071	}
3072
3073	/* Program the Transmit Control Register */
3074	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3075	tctl &= ~E1000_TCTL_CT;
3076	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3077		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3078
3079	e1000_config_collision_dist(&adapter->hw);
3080
3081	/* This write will effectively turn on the transmit unit. */
3082	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3083
3084}
3085
3086/*********************************************************************
3087 *
3088 *  Free all transmit rings.
3089 *
3090 **********************************************************************/
3091static void
3092igb_free_transmit_structures(struct adapter *adapter)
3093{
3094	struct tx_ring *txr = adapter->tx_rings;
3095
3096	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3097		IGB_TX_LOCK(txr);
3098		igb_free_transmit_buffers(txr);
3099		igb_dma_free(adapter, &txr->txdma);
3100		IGB_TX_UNLOCK(txr);
3101		IGB_TX_LOCK_DESTROY(txr);
3102	}
3103	free(adapter->tx_rings, M_DEVBUF);
3104}
3105
3106/*********************************************************************
3107 *
3108 *  Free transmit ring related data structures.
3109 *
3110 **********************************************************************/
3111static void
3112igb_free_transmit_buffers(struct tx_ring *txr)
3113{
3114	struct adapter *adapter = txr->adapter;
3115	struct igb_tx_buffer *tx_buffer;
3116	int             i;
3117
3118	INIT_DEBUGOUT("free_transmit_ring: begin");
3119
3120	if (txr->tx_buffers == NULL)
3121		return;
3122
3123	tx_buffer = txr->tx_buffers;
3124	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3125		if (tx_buffer->m_head != NULL) {
3126			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3127			    BUS_DMASYNC_POSTWRITE);
3128			bus_dmamap_unload(txr->txtag,
3129			    tx_buffer->map);
3130			m_freem(tx_buffer->m_head);
3131			tx_buffer->m_head = NULL;
3132			if (tx_buffer->map != NULL) {
3133				bus_dmamap_destroy(txr->txtag,
3134				    tx_buffer->map);
3135				tx_buffer->map = NULL;
3136			}
3137		} else if (tx_buffer->map != NULL) {
3138			bus_dmamap_unload(txr->txtag,
3139			    tx_buffer->map);
3140			bus_dmamap_destroy(txr->txtag,
3141			    tx_buffer->map);
3142			tx_buffer->map = NULL;
3143		}
3144	}
3145#if __FreeBSD_version >= 800000
3146	if (txr->br != NULL)
3147		buf_ring_free(txr->br, M_DEVBUF);
3148#endif
3149	if (txr->tx_buffers != NULL) {
3150		free(txr->tx_buffers, M_DEVBUF);
3151		txr->tx_buffers = NULL;
3152	}
3153	if (txr->txtag != NULL) {
3154		bus_dma_tag_destroy(txr->txtag);
3155		txr->txtag = NULL;
3156	}
3157	return;
3158}
3159
3160/**********************************************************************
3161 *
3162 *  Setup work for hardware segmentation offload (TSO) on
3163 *  adapters using advanced tx descriptors
3164 *
3165 **********************************************************************/
3166static boolean_t
3167igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3168{
3169	struct adapter *adapter = txr->adapter;
3170	struct e1000_adv_tx_context_desc *TXD;
3171	struct igb_tx_buffer        *tx_buffer;
3172	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3173	u32 mss_l4len_idx = 0;
3174	u16 vtag = 0;
3175	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3176	struct ether_vlan_header *eh;
3177	struct ip *ip;
3178	struct tcphdr *th;
3179
3180
3181	/*
3182	 * Determine where frame payload starts.
3183	 * Jump over vlan headers if already present
3184	 */
3185	eh = mtod(mp, struct ether_vlan_header *);
3186	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3187		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3188	else
3189		ehdrlen = ETHER_HDR_LEN;
3190
3191	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3192	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3193		return FALSE;
3194
3195	/* Only supports IPV4 for now */
3196	ctxd = txr->next_avail_desc;
3197	tx_buffer = &txr->tx_buffers[ctxd];
3198	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3199
3200	ip = (struct ip *)(mp->m_data + ehdrlen);
3201	if (ip->ip_p != IPPROTO_TCP)
3202                return FALSE;   /* 0 */
3203	ip->ip_sum = 0;
3204	ip_hlen = ip->ip_hl << 2;
3205	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3206	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3207	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3208	tcp_hlen = th->th_off << 2;
3209	/*
3210	 * Calculate header length, this is used
3211	 * in the transmit desc in igb_xmit
3212	 */
3213	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3214
3215	/* VLAN MACLEN IPLEN */
3216	if (mp->m_flags & M_VLANTAG) {
3217		vtag = htole16(mp->m_pkthdr.ether_vtag);
3218		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3219	}
3220
3221	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3222	vlan_macip_lens |= ip_hlen;
3223	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3224
3225	/* ADV DTYPE TUCMD */
3226	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3227	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3228	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3229	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3230
3231	/* MSS L4LEN IDX */
3232	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3233	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3234	/* 82575 needs the queue index added */
3235	if (adapter->hw.mac.type == e1000_82575)
3236		mss_l4len_idx |= txr->me << 4;
3237	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3238
3239	TXD->seqnum_seed = htole32(0);
3240	tx_buffer->m_head = NULL;
3241	tx_buffer->next_eop = -1;
3242
3243	if (++ctxd == adapter->num_tx_desc)
3244		ctxd = 0;
3245
3246	txr->tx_avail--;
3247	txr->next_avail_desc = ctxd;
3248	return TRUE;
3249}
3250
3251
3252/*********************************************************************
3253 *
3254 *  Context Descriptor setup for VLAN or CSUM
3255 *
3256 **********************************************************************/
3257
3258static bool
3259igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3260{
3261	struct adapter *adapter = txr->adapter;
3262	struct e1000_adv_tx_context_desc *TXD;
3263	struct igb_tx_buffer        *tx_buffer;
3264	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3265	struct ether_vlan_header *eh;
3266	struct ip *ip = NULL;
3267	struct ip6_hdr *ip6;
3268	int  ehdrlen, ctxd, ip_hlen = 0;
3269	u16	etype, vtag = 0;
3270	u8	ipproto = 0;
3271	bool	offload = TRUE;
3272
3273	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3274		offload = FALSE;
3275
3276	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3277	ctxd = txr->next_avail_desc;
3278	tx_buffer = &txr->tx_buffers[ctxd];
3279	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3280
3281	/*
3282	** In advanced descriptors the vlan tag must
3283	** be placed into the context descriptor, thus
3284	** we need to be here just for that setup.
3285	*/
3286	if (mp->m_flags & M_VLANTAG) {
3287		vtag = htole16(mp->m_pkthdr.ether_vtag);
3288		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3289	} else if (offload == FALSE)
3290		return FALSE;
3291
3292	/*
3293	 * Determine where frame payload starts.
3294	 * Jump over vlan headers if already present,
3295	 * helpful for QinQ too.
3296	 */
3297	eh = mtod(mp, struct ether_vlan_header *);
3298	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3299		etype = ntohs(eh->evl_proto);
3300		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3301	} else {
3302		etype = ntohs(eh->evl_encap_proto);
3303		ehdrlen = ETHER_HDR_LEN;
3304	}
3305
3306	/* Set the ether header length */
3307	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3308
3309	switch (etype) {
3310		case ETHERTYPE_IP:
3311			ip = (struct ip *)(mp->m_data + ehdrlen);
3312			ip_hlen = ip->ip_hl << 2;
3313			if (mp->m_len < ehdrlen + ip_hlen) {
3314				offload = FALSE;
3315				break;
3316			}
3317			ipproto = ip->ip_p;
3318			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3319			break;
3320		case ETHERTYPE_IPV6:
3321			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3322			ip_hlen = sizeof(struct ip6_hdr);
3323			if (mp->m_len < ehdrlen + ip_hlen)
3324				return (FALSE);
3325			ipproto = ip6->ip6_nxt;
3326			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3327			break;
3328		default:
3329			offload = FALSE;
3330			break;
3331	}
3332
3333	vlan_macip_lens |= ip_hlen;
3334	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3335
3336	switch (ipproto) {
3337		case IPPROTO_TCP:
3338			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3339				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3340			break;
3341		case IPPROTO_UDP:
3342			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3343				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3344			break;
3345#if __FreeBSD_version >= 800000
3346		case IPPROTO_SCTP:
3347			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3348				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3349			break;
3350#endif
3351		default:
3352			offload = FALSE;
3353			break;
3354	}
3355
3356	/* 82575 needs the queue index added */
3357	if (adapter->hw.mac.type == e1000_82575)
3358		mss_l4len_idx = txr->me << 4;
3359
3360	/* Now copy bits into descriptor */
3361	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3362	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3363	TXD->seqnum_seed = htole32(0);
3364	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3365
3366	tx_buffer->m_head = NULL;
3367	tx_buffer->next_eop = -1;
3368
3369	/* We've consumed the first desc, adjust counters */
3370	if (++ctxd == adapter->num_tx_desc)
3371		ctxd = 0;
3372	txr->next_avail_desc = ctxd;
3373	--txr->tx_avail;
3374
3375        return (offload);
3376}
3377
3378
3379/**********************************************************************
3380 *
3381 *  Examine each tx_buffer in the used queue. If the hardware is done
3382 *  processing the packet then free associated resources. The
3383 *  tx_buffer is put back on the free queue.
3384 *
3385 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3386 **********************************************************************/
3387static bool
3388igb_txeof(struct tx_ring *txr)
3389{
3390	struct adapter	*adapter = txr->adapter;
3391        int first, last, done;
3392        struct igb_tx_buffer *tx_buffer;
3393        struct e1000_tx_desc   *tx_desc, *eop_desc;
3394	struct ifnet   *ifp = adapter->ifp;
3395
3396	IGB_TX_LOCK_ASSERT(txr);
3397
3398        if (txr->tx_avail == adapter->num_tx_desc)
3399                return FALSE;
3400
3401        first = txr->next_to_clean;
3402        tx_desc = &txr->tx_base[first];
3403        tx_buffer = &txr->tx_buffers[first];
3404	last = tx_buffer->next_eop;
3405        eop_desc = &txr->tx_base[last];
3406
3407	/*
3408	 * What this does is get the index of the
3409	 * first descriptor AFTER the EOP of the
3410	 * first packet, that way we can do the
3411	 * simple comparison on the inner while loop.
3412	 */
3413	if (++last == adapter->num_tx_desc)
3414 		last = 0;
3415	done = last;
3416
3417        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3418	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3419
3420        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3421		/* We clean the range of the packet */
3422		while (first != done) {
3423                	tx_desc->upper.data = 0;
3424                	tx_desc->lower.data = 0;
3425                	tx_desc->buffer_addr = 0;
3426                	++txr->tx_avail;
3427
3428			if (tx_buffer->m_head) {
3429				ifp->if_opackets++;
3430				bus_dmamap_sync(txr->txtag,
3431				    tx_buffer->map,
3432				    BUS_DMASYNC_POSTWRITE);
3433				bus_dmamap_unload(txr->txtag,
3434				    tx_buffer->map);
3435
3436                        	m_freem(tx_buffer->m_head);
3437                        	tx_buffer->m_head = NULL;
3438                	}
3439			tx_buffer->next_eop = -1;
3440			txr->watchdog_time = ticks;
3441
3442	                if (++first == adapter->num_tx_desc)
3443				first = 0;
3444
3445	                tx_buffer = &txr->tx_buffers[first];
3446			tx_desc = &txr->tx_base[first];
3447		}
3448		/* See if we can continue to the next packet */
3449		last = tx_buffer->next_eop;
3450		if (last != -1) {
3451        		eop_desc = &txr->tx_base[last];
3452			/* Get new done point */
3453			if (++last == adapter->num_tx_desc) last = 0;
3454			done = last;
3455		} else
3456			break;
3457        }
3458        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3459            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3460
3461        txr->next_to_clean = first;
3462
3463        /*
3464         * If we have enough room, clear IFF_DRV_OACTIVE
3465         * to tell the stack that it is OK to send packets.
3466         */
3467        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3468                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3469		/* All clean, turn off the watchdog */
3470                if (txr->tx_avail == adapter->num_tx_desc) {
3471			txr->watchdog_check = FALSE;
3472			return FALSE;
3473		}
3474        }
3475
3476        return TRUE;
3477}
3478
3479
3480/*********************************************************************
3481 *
3482 *  Refresh mbuf buffers for a range of descriptors
3483 *
3484 **********************************************************************/
3485static int
3486igb_get_buf(struct rx_ring *rxr, int first, int limit)
3487{
3488	struct adapter		*adapter = rxr->adapter;
3489	bus_dma_segment_t	seg[2];
3490	struct igb_rx_buf	*rxbuf;
3491	struct mbuf		*mh, *mp;
3492	bus_dmamap_t		map;
3493	int			i, nsegs, error;
3494
3495	i = first;
3496	while (i != limit) {
3497		rxbuf = &rxr->rx_buffers[i];
3498
3499		if (rxbuf->m_head == NULL) {
3500			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3501			if (mh == NULL)
3502				goto failure;
3503		} else  /* reuse */
3504			mh = rxbuf->m_head;
3505
3506		mh->m_len = MHLEN;
3507		mh->m_flags |= M_PKTHDR;
3508
3509		if (rxbuf->m_pack == NULL) {
3510			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3511			    M_PKTHDR, adapter->rx_mbuf_sz);
3512			if (mp == NULL)
3513				goto failure;
3514			mp->m_len = adapter->rx_mbuf_sz;
3515			mp->m_flags &= ~M_PKTHDR;
3516		} else {	/* reusing */
3517			mp = rxbuf->m_pack;
3518			mp->m_len = adapter->rx_mbuf_sz;
3519			mp->m_flags &= ~M_PKTHDR;
3520		}
3521
3522		/*
3523		** Need to create a chain for the following
3524		** dmamap call at this point.
3525		*/
3526		mh->m_next = mp;
3527		mh->m_pkthdr.len = mh->m_len + mp->m_len;
3528
3529		/* Get the memory mapping */
3530		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3531		    rxr->spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3532		if (error != 0)
3533			panic("igb_get_buf: dmamap load failure\n");
3534
3535		/* Unload old mapping and update buffer struct */
3536		if (rxbuf->m_head != NULL)
3537			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3538		map = rxbuf->map;
3539		rxbuf->map = rxr->spare_map;
3540		rxr->spare_map = map;
3541		rxbuf->m_head = mh;
3542		rxbuf->m_pack = mp;
3543		bus_dmamap_sync(rxr->rxtag,
3544		    rxbuf->map, BUS_DMASYNC_PREREAD);
3545
3546		/* Update descriptor */
3547		rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3548		rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3549
3550		/* Calculate next index */
3551		if (++i == adapter->num_rx_desc)
3552			i = 0;
3553	}
3554
3555	return (0);
3556
3557failure:
3558	/*
3559	** Its unforunate to have to panic, but
3560	** with the new design I see no other
3561	** graceful failure mode, this is ONLY
3562	** called in the RX clean path, and the
3563	** old mbuf has been used, it MUST be
3564	** refreshed. This should be avoided by
3565	** proper configuration. -jfv
3566	*/
3567	panic("igb_get_buf: ENOBUFS\n");
3568}
3569
3570/*********************************************************************
3571 *
3572 *  Allocate memory for rx_buffer structures. Since we use one
3573 *  rx_buffer per received packet, the maximum number of rx_buffer's
3574 *  that we'll need is equal to the number of receive descriptors
3575 *  that we've allocated.
3576 *
3577 **********************************************************************/
3578static int
3579igb_allocate_receive_buffers(struct rx_ring *rxr)
3580{
3581	struct	adapter 	*adapter = rxr->adapter;
3582	device_t 		dev = adapter->dev;
3583	struct igb_rx_buf	*rxbuf;
3584	int             	i, bsize, error;
3585
3586	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3587	if (!(rxr->rx_buffers =
3588	    (struct igb_rx_buf *) malloc(bsize,
3589	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3590		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3591		error = ENOMEM;
3592		goto fail;
3593	}
3594
3595	/*
3596	** The tag is made to accomodate the largest buffer size
3597	** with packet split (hence the two segments, even though
3598	** it may not always use this.
3599	*/
3600	if ((error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),
3601				   1, 0,		/* alignment, bounds */
3602				   BUS_SPACE_MAXADDR,	/* lowaddr */
3603				   BUS_SPACE_MAXADDR,	/* highaddr */
3604				   NULL, NULL,		/* filter, filterarg */
3605				   MJUM16BYTES,		/* maxsize */
3606				   2,			/* nsegments */
3607				   MJUMPAGESIZE,	/* maxsegsize */
3608				   0,			/* flags */
3609				   NULL,		/* lockfunc */
3610				   NULL,		/* lockfuncarg */
3611				   &rxr->rxtag))) {
3612		device_printf(dev, "Unable to create RX DMA tag\n");
3613		goto fail;
3614	}
3615
3616	/* Create the spare map (used by getbuf) */
3617        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3618	     &rxr->spare_map);
3619	if (error) {
3620		device_printf(dev,
3621		    "%s: bus_dmamap_create header spare failed: %d\n",
3622		    __func__, error);
3623		goto fail;
3624	}
3625
3626	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3627		rxbuf = &rxr->rx_buffers[i];
3628		error = bus_dmamap_create(rxr->rxtag,
3629		    BUS_DMA_NOWAIT, &rxbuf->map);
3630		if (error) {
3631			device_printf(dev, "Unable to create RX DMA maps\n");
3632			goto fail;
3633		}
3634	}
3635
3636	return (0);
3637
3638fail:
3639	/* Frees all, but can handle partial completion */
3640	igb_free_receive_structures(adapter);
3641	return (error);
3642}
3643
3644/*********************************************************************
3645 *
3646 *  Initialize a receive ring and its buffers.
3647 *
3648 **********************************************************************/
3649static int
3650igb_setup_receive_ring(struct rx_ring *rxr)
3651{
3652	struct	adapter		*adapter;
3653	struct  ifnet		*ifp;
3654	device_t		dev;
3655	struct igb_rx_buf	*rxbuf;
3656	bus_dma_segment_t	seg[2];
3657	struct lro_ctrl		*lro = &rxr->lro;
3658	int			rsize, nsegs, error = 0;
3659
3660	adapter = rxr->adapter;
3661	dev = adapter->dev;
3662	ifp = adapter->ifp;
3663
3664	/* Clear the ring contents */
3665	rsize = roundup2(adapter->num_rx_desc *
3666	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3667	bzero((void *)rxr->rx_base, rsize);
3668
3669	/*
3670	** Free current RX buffer structures and their mbufs
3671	*/
3672	for (int i = 0; i < adapter->num_rx_desc; i++) {
3673		rxbuf = &rxr->rx_buffers[i];
3674		bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3675		    BUS_DMASYNC_POSTREAD);
3676		bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3677		if (rxbuf->m_head) {
3678			rxbuf->m_head->m_next = rxbuf->m_pack;
3679			m_freem(rxbuf->m_head);
3680		}
3681		rxbuf->m_head = NULL;
3682		rxbuf->m_pack = NULL;
3683	}
3684
3685	/* Now replenish the mbufs */
3686	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3687
3688		rxbuf = &rxr->rx_buffers[j];
3689		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3690		if (rxbuf->m_head == NULL)
3691			panic("RX ring hdr initialization failed!\n");
3692		rxbuf->m_head->m_len = MHLEN;
3693		rxbuf->m_head->m_flags |= M_PKTHDR;
3694		rxbuf->m_head->m_pkthdr.len = rxbuf->m_head->m_len;
3695
3696		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3697		    M_PKTHDR, adapter->rx_mbuf_sz);
3698		if (rxbuf->m_pack == NULL)
3699			panic("RX ring pkt initialization failed!\n");
3700		rxbuf->m_pack->m_len = adapter->rx_mbuf_sz;
3701		rxbuf->m_head->m_next = rxbuf->m_pack;
3702		rxbuf->m_head->m_pkthdr.len += rxbuf->m_pack->m_len;
3703
3704		/* Get the memory mapping */
3705		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3706		    rxbuf->map, rxbuf->m_head, seg,
3707		    &nsegs, BUS_DMA_NOWAIT);
3708		if (error != 0)
3709			panic("RX ring dma initialization failed!\n");
3710		bus_dmamap_sync(rxr->rxtag,
3711		    rxbuf->map, BUS_DMASYNC_PREREAD);
3712
3713		/* Update descriptor */
3714		rxr->rx_base[j].read.hdr_addr = htole64(seg[0].ds_addr);
3715		rxr->rx_base[j].read.pkt_addr = htole64(seg[1].ds_addr);
3716	}
3717
3718	/* Setup our descriptor indices */
3719	rxr->next_to_check = 0;
3720	rxr->last_cleaned = 0;
3721	rxr->lro_enabled = FALSE;
3722
3723	if (igb_header_split)
3724		rxr->hdr_split = TRUE;
3725	else
3726		ifp->if_capabilities &= ~IFCAP_LRO;
3727
3728	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3729	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3730
3731	/*
3732	** Now set up the LRO interface, we
3733	** also only do head split when LRO
3734	** is enabled, since so often they
3735	** are undesireable in similar setups.
3736	*/
3737	if ((ifp->if_capenable & IFCAP_LRO) && (rxr->hdr_split)) {
3738		int err = tcp_lro_init(lro);
3739		if (err)
3740			panic("LRO Initialization failed!\n");
3741		INIT_DEBUGOUT("RX LRO Initialized\n");
3742		rxr->lro_enabled = TRUE;
3743		lro->ifp = adapter->ifp;
3744	}
3745
3746	return (0);
3747#if 0
3748fail:
3749	/*
3750	 * We need to clean up any buffers allocated
3751	 * so far, 'j' is the failing index.
3752	 */
3753	for (int i = 0; i < j; i++) {
3754		rxbuf = &rxr->rx_buffers[i];
3755		if (rxbuf->m_head != NULL) {
3756			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3757			    BUS_DMASYNC_POSTREAD);
3758			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3759			m_freem(rxbuf->m_head);
3760			rxbuf->m_head = NULL;
3761		}
3762	}
3763	return (ENOBUFS);
3764#endif
3765}
3766
3767/*********************************************************************
3768 *
3769 *  Initialize all receive rings.
3770 *
3771 **********************************************************************/
3772static int
3773igb_setup_receive_structures(struct adapter *adapter)
3774{
3775	struct rx_ring *rxr = adapter->rx_rings;
3776	int i, j;
3777
3778	for (i = 0; i < adapter->num_queues; i++, rxr++)
3779		if (igb_setup_receive_ring(rxr))
3780			goto fail;
3781
3782	return (0);
3783fail:
3784	/*
3785	 * Free RX buffers allocated so far, we will only handle
3786	 * the rings that completed, the failing case will have
3787	 * cleaned up for itself. The value of 'i' will be the
3788	 * failed ring so we must pre-decrement it.
3789	 */
3790	rxr = adapter->rx_rings;
3791	for (--i; i > 0; i--, rxr++) {
3792		for (j = 0; j < adapter->num_rx_desc; j++) {
3793			struct igb_rx_buf *rxbuf;
3794			rxbuf = &rxr->rx_buffers[j];
3795			if (rxbuf->m_head != NULL) {
3796				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3797			  	  BUS_DMASYNC_POSTREAD);
3798				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3799				m_freem(rxbuf->m_head);
3800				rxbuf->m_head = NULL;
3801			}
3802		}
3803	}
3804
3805	return (ENOBUFS);
3806}
3807
3808/*********************************************************************
3809 *
3810 *  Enable receive unit.
3811 *
3812 **********************************************************************/
3813static void
3814igb_initialize_receive_units(struct adapter *adapter)
3815{
3816	struct rx_ring	*rxr = adapter->rx_rings;
3817	struct ifnet	*ifp = adapter->ifp;
3818	u32		rctl, rxcsum, psize, srrctl = 0;
3819
3820	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3821
3822	/*
3823	 * Make sure receives are disabled while setting
3824	 * up the descriptor ring
3825	 */
3826	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3827	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3828
3829	/*
3830	** Set up for header split
3831	*/
3832	if (rxr->hdr_split) {
3833		/* Use a standard mbuf for the header */
3834		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3835		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3836	} else
3837		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3838
3839	/*
3840	** Set up for jumbo frames
3841	*/
3842	if (ifp->if_mtu > ETHERMTU) {
3843		rctl |= E1000_RCTL_LPE;
3844		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3845		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3846
3847		/* Set maximum packet len */
3848		psize = adapter->max_frame_size;
3849		/* are we on a vlan? */
3850		if (adapter->ifp->if_vlantrunk != NULL)
3851			psize += VLAN_TAG_SIZE;
3852		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3853	} else {
3854		rctl &= ~E1000_RCTL_LPE;
3855		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3856		rctl |= E1000_RCTL_SZ_2048;
3857	}
3858
3859	/* Setup the Base and Length of the Rx Descriptor Rings */
3860	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3861		u64 bus_addr = rxr->rxdma.dma_paddr;
3862		u32 rxdctl;
3863
3864		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3865		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3866		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3867		    (uint32_t)(bus_addr >> 32));
3868		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3869		    (uint32_t)bus_addr);
3870		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3871		/* Enable this Queue */
3872		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3873		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3874		rxdctl &= 0xFFF00000;
3875		rxdctl |= IGB_RX_PTHRESH;
3876		rxdctl |= IGB_RX_HTHRESH << 8;
3877		rxdctl |= IGB_RX_WTHRESH << 16;
3878		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3879
3880		/* Initial RX interrupt moderation */
3881		rxr->eitr_setting = igb_ave_latency;
3882		E1000_WRITE_REG(&adapter->hw,
3883		    E1000_EITR(rxr->msix), igb_ave_latency);
3884	}
3885
3886	/*
3887	** Setup for RX MultiQueue
3888	*/
3889	rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3890	if (adapter->num_queues >1) {
3891		u32 random[10], mrqc, shift = 0;
3892		union igb_reta {
3893			u32 dword;
3894			u8  bytes[4];
3895		} reta;
3896
3897		arc4rand(&random, sizeof(random), 0);
3898		if (adapter->hw.mac.type == e1000_82575)
3899			shift = 6;
3900		/* Warning FM follows */
3901		for (int i = 0; i < 128; i++) {
3902			reta.bytes[i & 3] =
3903			    (i % adapter->num_queues) << shift;
3904			if ((i & 3) == 3)
3905				E1000_WRITE_REG(&adapter->hw,
3906				    E1000_RETA(i >> 2), reta.dword);
3907		}
3908		/* Now fill in hash table */
3909		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3910		for (int i = 0; i < 10; i++)
3911			E1000_WRITE_REG_ARRAY(&adapter->hw,
3912			    E1000_RSSRK(0), i, random[i]);
3913
3914		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3915		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3916		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3917		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3918		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3919		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3920		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3921		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3922
3923		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3924
3925		/*
3926		** NOTE: Receive Full-Packet Checksum Offload
3927		** is mutually exclusive with Multiqueue. However
3928		** this is not the same as TCP/IP checksums which
3929		** still work.
3930		*/
3931		rxcsum |= E1000_RXCSUM_PCSD;
3932#if __FreeBSD_version >= 800000
3933		/* For SCTP Offload */
3934		if ((adapter->hw.mac.type == e1000_82576)
3935		    && (ifp->if_capenable & IFCAP_RXCSUM))
3936			rxcsum |= E1000_RXCSUM_CRCOFL;
3937#endif
3938	} else {
3939		/* Non RSS setup */
3940		if (ifp->if_capenable & IFCAP_RXCSUM) {
3941			rxcsum |= E1000_RXCSUM_IPPCSE;
3942#if __FreeBSD_version >= 800000
3943			if (adapter->hw.mac.type == e1000_82576)
3944				rxcsum |= E1000_RXCSUM_CRCOFL;
3945#endif
3946		} else
3947			rxcsum &= ~E1000_RXCSUM_TUOFL;
3948	}
3949	E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3950
3951	/* Setup the Receive Control Register */
3952	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3953	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3954		   E1000_RCTL_RDMTS_HALF |
3955		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3956
3957	/* Make sure VLAN Filters are off */
3958	rctl &= ~E1000_RCTL_VFE;
3959	/* Don't store bad packets */
3960	rctl &= ~E1000_RCTL_SBP;
3961
3962	/* Enable Receives */
3963	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3964
3965	/*
3966	 * Setup the HW Rx Head and Tail Descriptor Pointers
3967	 *   - needs to be after enable
3968	 */
3969	for (int i = 0; i < adapter->num_queues; i++) {
3970		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3971		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3972		     adapter->num_rx_desc - 1);
3973	}
3974	return;
3975}
3976
3977/*********************************************************************
3978 *
3979 *  Free receive rings.
3980 *
3981 **********************************************************************/
3982static void
3983igb_free_receive_structures(struct adapter *adapter)
3984{
3985	struct rx_ring *rxr = adapter->rx_rings;
3986
3987	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3988		struct lro_ctrl	*lro = &rxr->lro;
3989		igb_free_receive_buffers(rxr);
3990		tcp_lro_free(lro);
3991		igb_dma_free(adapter, &rxr->rxdma);
3992	}
3993
3994	free(adapter->rx_rings, M_DEVBUF);
3995}
3996
3997/*********************************************************************
3998 *
3999 *  Free receive ring data structures.
4000 *
4001 **********************************************************************/
4002static void
4003igb_free_receive_buffers(struct rx_ring *rxr)
4004{
4005	struct adapter		*adapter = rxr->adapter;
4006	struct igb_rx_buf	*rx_buffer;
4007
4008	INIT_DEBUGOUT("free_receive_structures: begin");
4009
4010	if (rxr->spare_map) {
4011		bus_dmamap_destroy(rxr->rxtag, rxr->spare_map);
4012		rxr->spare_map = NULL;
4013	}
4014
4015	/* Cleanup any existing buffers */
4016	if (rxr->rx_buffers != NULL) {
4017		rx_buffer = &rxr->rx_buffers[0];
4018		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4019			if (rx_buffer->m_head != NULL) {
4020				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
4021				    BUS_DMASYNC_POSTREAD);
4022				bus_dmamap_unload(rxr->rxtag,
4023				    rx_buffer->map);
4024				m_freem(rx_buffer->m_head);
4025				rx_buffer->m_head = NULL;
4026			} else if (rx_buffer->map != NULL)
4027				bus_dmamap_unload(rxr->rxtag,
4028				    rx_buffer->map);
4029			if (rx_buffer->map != NULL) {
4030				bus_dmamap_destroy(rxr->rxtag,
4031				    rx_buffer->map);
4032				rx_buffer->map = NULL;
4033			}
4034		}
4035	}
4036
4037	if (rxr->rx_buffers != NULL) {
4038		free(rxr->rx_buffers, M_DEVBUF);
4039		rxr->rx_buffers = NULL;
4040	}
4041
4042	if (rxr->rxtag != NULL) {
4043		bus_dma_tag_destroy(rxr->rxtag);
4044		rxr->rxtag = NULL;
4045	}
4046}
4047/*********************************************************************
4048 *
4049 *  This routine executes in interrupt context. It replenishes
4050 *  the mbufs in the descriptor and sends data which has been
4051 *  dma'ed into host memory to upper layer.
4052 *
4053 *  We loop at most count times if count is > 0, or until done if
4054 *  count < 0.
4055 *
4056 *  Return TRUE if more to clean, FALSE otherwise
4057 *********************************************************************/
4058
4059static bool
4060igb_rxeof(struct rx_ring *rxr, int count)
4061{
4062	struct adapter 		*adapter = rxr->adapter;
4063	struct ifnet   		*ifp = adapter->ifp;
4064	struct lro_ctrl		*lro = &rxr->lro;
4065	struct lro_entry	*queued;
4066	int			i, processed = 0;
4067	u32			staterr;
4068	union e1000_adv_rx_desc	*cur;
4069
4070
4071	IGB_RX_LOCK(rxr);
4072	i = rxr->next_to_check;
4073	cur = &rxr->rx_base[i];
4074	staterr = cur->wb.upper.status_error;
4075
4076	if (!(staterr & E1000_RXD_STAT_DD)) {
4077		IGB_RX_UNLOCK(rxr);
4078		return FALSE;
4079	}
4080
4081	/* Sync the ring */
4082	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4083	    BUS_DMASYNC_POSTREAD);
4084
4085	while ((staterr & E1000_RXD_STAT_DD) && (count != 0) &&
4086	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4087		struct mbuf	*sendmp, *mh, *mp, *nh, *np;
4088		struct igb_rx_buf	*nxtbuf;
4089		u32		ptype;
4090		u16		hlen, plen, hdr, nextp, vtag;
4091		bool		accept_frame, eop, sctp = FALSE;
4092
4093
4094		accept_frame = TRUE;
4095		hlen = plen = nextp = 0;
4096		sendmp = mh = mp = nh = np = NULL;
4097
4098		ptype = (le32toh(cur->wb.lower.lo_dword.data) &
4099		    IGB_PKTTYPE_MASK);
4100		if (((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0) &&
4101		    ((ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0))
4102			sctp = TRUE;
4103
4104		/* Sync the buffers */
4105		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4106			    BUS_DMASYNC_POSTREAD);
4107		mh = rxr->rx_buffers[i].m_head;
4108		mp = rxr->rx_buffers[i].m_pack;
4109		vtag = le16toh(cur->wb.upper.vlan);
4110		eop = ((staterr & E1000_RXD_STAT_EOP) != 0);
4111
4112		/* Get the next descriptor we will process */
4113		if (!eop) {
4114			nextp = i + 1;
4115			if (nextp == adapter->num_rx_desc)
4116				nextp = 0;
4117			nxtbuf = &rxr->rx_buffers[nextp];
4118			prefetch(nxtbuf);
4119		}
4120
4121		/*
4122		** The way the hardware is configured to
4123		** split, it will ONLY use the header buffer
4124		** when header split is enabled, otherwise we
4125		** get legacy behavior, ie, both header and
4126		** payload are DMA'd into JUST the payload buffer.
4127		**
4128		** Rather than using the fmp/lmp global pointers
4129		** we now keep the head of a packet chain in the
4130		** m_nextpkt pointer and pass this along from one
4131		** descriptor to the next, until we get EOP.
4132		**
4133		*/
4134		if ((rxr->hdr_split) && (mh->m_nextpkt == NULL)) {
4135			hdr = le16toh(cur->
4136			    wb.lower.lo_dword.hs_rss.hdr_info);
4137			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4138			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4139			if (hlen > IGB_HDR_BUF)
4140				hlen = IGB_HDR_BUF;
4141			plen = le16toh(cur->wb.upper.length);
4142			mh->m_len = hlen;
4143			mh->m_flags |= M_PKTHDR;
4144			mh->m_next = NULL;
4145			mh->m_pkthdr.len = mh->m_len;
4146			/* Null this so getbuf replenishes */
4147			rxr->rx_buffers[i].m_head = NULL;
4148			/*
4149			** Get the payload length, this
4150			** could be zero if its a small
4151			** packet.
4152			*/
4153			if (plen) {
4154				mp->m_len = plen;
4155				mp->m_next = NULL;
4156				mp->m_flags &= ~M_PKTHDR;
4157				mh->m_next = mp;
4158				mh->m_pkthdr.len += mp->m_len;
4159				/* Null this so getbuf replenishes */
4160				rxr->rx_buffers[i].m_pack = NULL;
4161				rxr->rx_split_packets++;
4162			}
4163			/* Setup the forward chain */
4164			if (eop == 0) {
4165				nh = rxr->rx_buffers[nextp].m_head;
4166				np = rxr->rx_buffers[nextp].m_pack;
4167				nh->m_nextpkt = mh;
4168				if (plen)
4169					mp->m_next = np;
4170				else
4171					mh->m_next = np;
4172			} else {
4173				sendmp = mh;
4174				if (staterr & E1000_RXD_STAT_VP) {
4175					sendmp->m_pkthdr.ether_vtag = vtag;
4176					sendmp->m_flags |= M_VLANTAG;
4177				}
4178			}
4179		} else {
4180			/*
4181			** Either no header split, or a
4182			** secondary piece of a fragmented
4183			** packet.
4184			*/
4185			mp->m_len = le16toh(cur->wb.upper.length);
4186			rxr->rx_buffers[i].m_pack = NULL;
4187			/* stored head pointer */
4188			sendmp = mh->m_nextpkt;
4189			if (sendmp != NULL) {
4190				sendmp->m_pkthdr.len += mp->m_len;
4191				sendmp->m_nextpkt = NULL;
4192			} else {
4193				/* first desc of a non-ps chain */
4194				sendmp = mp;
4195				sendmp->m_flags |= M_PKTHDR;
4196				sendmp->m_pkthdr.len = mp->m_len;
4197				if (staterr & E1000_RXD_STAT_VP) {
4198					sendmp->m_pkthdr.ether_vtag = vtag;
4199					sendmp->m_flags |= M_VLANTAG;
4200				}
4201			}
4202			/* Carry head forward */
4203			if (eop == 0) {
4204				nh = rxr->rx_buffers[nextp].m_head;
4205				np = rxr->rx_buffers[nextp].m_pack;
4206				nh->m_nextpkt = sendmp;
4207				mp->m_next = np;
4208				sendmp = NULL;
4209			}
4210			mh->m_nextpkt = NULL;
4211		}
4212
4213		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4214			accept_frame = FALSE;
4215
4216		if (accept_frame) {
4217			++processed;
4218			if (eop) {
4219				--count;
4220				sendmp->m_pkthdr.rcvif = ifp;
4221				ifp->if_ipackets++;
4222				rxr->rx_packets++;
4223				/* capture data for AIM */
4224				rxr->bytes += sendmp->m_pkthdr.len;
4225				rxr->rx_bytes += rxr->bytes;
4226				if (ifp->if_capenable & IFCAP_RXCSUM)
4227					igb_rx_checksum(staterr, sendmp, sctp);
4228				else
4229					sendmp->m_pkthdr.csum_flags = 0;
4230#if __FreeBSD_version >= 800000
4231				/* Get the RSS Hash */
4232				sendmp->m_pkthdr.flowid =
4233				    le32toh(cur->wb.lower.hi_dword.rss);
4234				sendmp->m_flags |= M_FLOWID;
4235#endif
4236			}
4237		} else {
4238			ifp->if_ierrors++;
4239			/* Reuse loaded DMA map and just update mbuf chain */
4240			mh->m_len = MHLEN;
4241			mh->m_flags |= M_PKTHDR;
4242			mh->m_next = NULL;
4243			mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4244			mp->m_data = mp->m_ext.ext_buf;
4245			if (mp->m_next) { /* Free chain */
4246				sendmp = mp->m_next;
4247				m_free(sendmp);
4248			}
4249			mp->m_next = NULL;
4250			if (adapter->max_frame_size <=
4251			    (MCLBYTES - ETHER_ALIGN))
4252				m_adj(mp, ETHER_ALIGN);
4253			sendmp = NULL;
4254		}
4255		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4256		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4257
4258		rxr->last_cleaned = i; /* for updating tail */
4259		if (++i == adapter->num_rx_desc)
4260			i = 0;
4261		/* Prefetch next descriptor */
4262		cur = &rxr->rx_base[i];
4263		prefetch(cur);
4264
4265		/*
4266		** Now send up to the stack,
4267		** note that the RX lock is
4268		** held thru this call.
4269		*/
4270                if (sendmp != NULL) {
4271			/*
4272			** Send to the stack if:
4273			**  - LRO not enabled, or
4274			**  - no LRO resources, or
4275			**  - lro enqueue fails
4276			*/
4277			if ((!rxr->lro_enabled) ||
4278			    ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4279	                        (*ifp->if_input)(ifp, sendmp);
4280                }
4281
4282		/* Replenish every 4 max */
4283		if (processed == 4) {
4284			igb_get_buf(rxr, rxr->next_to_check, i);
4285			processed = 0;
4286			E1000_WRITE_REG(&adapter->hw,
4287			    E1000_RDT(rxr->me), rxr->last_cleaned);
4288			rxr->next_to_check = i;
4289		}
4290
4291		/* Next iteration */
4292		staterr = cur->wb.upper.status_error;
4293	}
4294
4295	/* Replenish remaining */
4296	if (processed != 0) {
4297		igb_get_buf(rxr, rxr->next_to_check, i);
4298		processed = 0;
4299		E1000_WRITE_REG(&adapter->hw,
4300		    E1000_RDT(rxr->me), rxr->last_cleaned);
4301	}
4302
4303	rxr->next_to_check = i;
4304
4305	/*
4306	 * Flush any outstanding LRO work
4307	 */
4308	while (!SLIST_EMPTY(&lro->lro_active)) {
4309		queued = SLIST_FIRST(&lro->lro_active);
4310		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4311		tcp_lro_flush(lro, queued);
4312	}
4313
4314	IGB_RX_UNLOCK(rxr);
4315
4316	/*
4317	** Leaving with more to clean?
4318	** then schedule another interrupt.
4319	*/
4320	if (staterr & E1000_RXD_STAT_DD) {
4321		E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4322		return TRUE;
4323	}
4324
4325	return FALSE;
4326}
4327
4328/*********************************************************************
4329 *
4330 *  Verify that the hardware indicated that the checksum is valid.
4331 *  Inform the stack about the status of checksum so that stack
4332 *  doesn't spend time verifying the checksum.
4333 *
4334 *********************************************************************/
4335static void
4336igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4337{
4338	u16 status = (u16)staterr;
4339	u8  errors = (u8) (staterr >> 24);
4340
4341	/* Ignore Checksum bit is set */
4342	if (status & E1000_RXD_STAT_IXSM) {
4343		mp->m_pkthdr.csum_flags = 0;
4344		return;
4345	}
4346
4347	if (status & E1000_RXD_STAT_IPCS) {
4348		/* Did it pass? */
4349		if (!(errors & E1000_RXD_ERR_IPE)) {
4350			/* IP Checksum Good */
4351			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4352			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4353		} else
4354			mp->m_pkthdr.csum_flags = 0;
4355	}
4356
4357	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4358		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4359#if __FreeBSD_version >= 800000
4360		if (sctp) /* reassign */
4361			type = CSUM_SCTP_VALID;
4362#endif
4363		/* Did it pass? */
4364		if (!(errors & E1000_RXD_ERR_TCPE)) {
4365			mp->m_pkthdr.csum_flags |= type;
4366			if (sctp == FALSE)
4367				mp->m_pkthdr.csum_data = htons(0xffff);
4368		}
4369	}
4370	return;
4371}
4372
4373/*
4374 * This routine is run via an vlan
4375 * config EVENT
4376 */
4377static void
4378igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4379{
4380	struct adapter	*adapter = ifp->if_softc;
4381	u32		index, bit;
4382
4383	if (ifp->if_softc != arg)	/* Not our event */
4384		return;
4385
4386	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4387                return;
4388
4389	index = (vtag >> 5) & 0x7F;
4390	bit = vtag & 0x1F;
4391	igb_shadow_vfta[index] |= (1 << bit);
4392	++adapter->num_vlans;
4393	/* Re-init to load the changes */
4394	igb_init(adapter);
4395}
4396
4397/*
4398 * This routine is run via an vlan
4399 * unconfig EVENT
4400 */
4401static void
4402igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4403{
4404	struct adapter	*adapter = ifp->if_softc;
4405	u32		index, bit;
4406
4407	if (ifp->if_softc != arg)
4408		return;
4409
4410	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4411                return;
4412
4413	index = (vtag >> 5) & 0x7F;
4414	bit = vtag & 0x1F;
4415	igb_shadow_vfta[index] &= ~(1 << bit);
4416	--adapter->num_vlans;
4417	/* Re-init to load the changes */
4418	igb_init(adapter);
4419}
4420
4421static void
4422igb_setup_vlan_hw_support(struct adapter *adapter)
4423{
4424	struct e1000_hw *hw = &adapter->hw;
4425	u32             reg;
4426
4427	/*
4428	** We get here thru init_locked, meaning
4429	** a soft reset, this has already cleared
4430	** the VFTA and other state, so if there
4431	** have been no vlan's registered do nothing.
4432	*/
4433	if (adapter->num_vlans == 0)
4434                return;
4435
4436	/*
4437	** A soft reset zero's out the VFTA, so
4438	** we need to repopulate it now.
4439	*/
4440	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4441                if (igb_shadow_vfta[i] != 0)
4442			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4443                            i, igb_shadow_vfta[i]);
4444
4445	reg = E1000_READ_REG(hw, E1000_CTRL);
4446	reg |= E1000_CTRL_VME;
4447	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4448
4449	/* Enable the Filter Table */
4450	reg = E1000_READ_REG(hw, E1000_RCTL);
4451	reg &= ~E1000_RCTL_CFIEN;
4452	reg |= E1000_RCTL_VFE;
4453	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4454
4455	/* Update the frame size */
4456	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4457	    adapter->max_frame_size + VLAN_TAG_SIZE);
4458}
4459
4460static void
4461igb_enable_intr(struct adapter *adapter)
4462{
4463	/* With RSS set up what to auto clear */
4464	if (adapter->msix_mem) {
4465		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4466		    adapter->eims_mask);
4467		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4468		    adapter->eims_mask);
4469		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4470		    adapter->eims_mask);
4471		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4472		    E1000_IMS_LSC);
4473	} else {
4474		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4475		    IMS_ENABLE_MASK);
4476	}
4477	E1000_WRITE_FLUSH(&adapter->hw);
4478
4479	return;
4480}
4481
4482static void
4483igb_disable_intr(struct adapter *adapter)
4484{
4485	if (adapter->msix_mem) {
4486		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4487		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4488	}
4489	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4490	E1000_WRITE_FLUSH(&adapter->hw);
4491	return;
4492}
4493
4494/*
4495 * Bit of a misnomer, what this really means is
4496 * to enable OS management of the system... aka
4497 * to disable special hardware management features
4498 */
4499static void
4500igb_init_manageability(struct adapter *adapter)
4501{
4502	if (adapter->has_manage) {
4503		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4504		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4505
4506		/* disable hardware interception of ARP */
4507		manc &= ~(E1000_MANC_ARP_EN);
4508
4509                /* enable receiving management packets to the host */
4510		manc |= E1000_MANC_EN_MNG2HOST;
4511		manc2h |= 1 << 5;  /* Mng Port 623 */
4512		manc2h |= 1 << 6;  /* Mng Port 664 */
4513		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4514		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4515	}
4516}
4517
4518/*
4519 * Give control back to hardware management
4520 * controller if there is one.
4521 */
4522static void
4523igb_release_manageability(struct adapter *adapter)
4524{
4525	if (adapter->has_manage) {
4526		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4527
4528		/* re-enable hardware interception of ARP */
4529		manc |= E1000_MANC_ARP_EN;
4530		manc &= ~E1000_MANC_EN_MNG2HOST;
4531
4532		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4533	}
4534}
4535
4536/*
4537 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4538 * For ASF and Pass Through versions of f/w this means that
4539 * the driver is loaded.
4540 *
4541 */
4542static void
4543igb_get_hw_control(struct adapter *adapter)
4544{
4545	u32 ctrl_ext;
4546
4547	/* Let firmware know the driver has taken over */
4548	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4549	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4550	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4551}
4552
4553/*
4554 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4555 * For ASF and Pass Through versions of f/w this means that the
4556 * driver is no longer loaded.
4557 *
4558 */
4559static void
4560igb_release_hw_control(struct adapter *adapter)
4561{
4562	u32 ctrl_ext;
4563
4564	/* Let firmware taken over control of h/w */
4565	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4566	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4567	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4568}
4569
4570static int
4571igb_is_valid_ether_addr(uint8_t *addr)
4572{
4573	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4574
4575	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4576		return (FALSE);
4577	}
4578
4579	return (TRUE);
4580}
4581
4582
4583/*
4584 * Enable PCI Wake On Lan capability
4585 */
4586void
4587igb_enable_wakeup(device_t dev)
4588{
4589	u16     cap, status;
4590	u8      id;
4591
4592	/* First find the capabilities pointer*/
4593	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4594	/* Read the PM Capabilities */
4595	id = pci_read_config(dev, cap, 1);
4596	if (id != PCIY_PMG)     /* Something wrong */
4597		return;
4598	/* OK, we have the power capabilities, so
4599	   now get the status register */
4600	cap += PCIR_POWER_STATUS;
4601	status = pci_read_config(dev, cap, 2);
4602	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4603	pci_write_config(dev, cap, status, 2);
4604	return;
4605}
4606
4607
4608/**********************************************************************
4609 *
4610 *  Update the board statistics counters.
4611 *
4612 **********************************************************************/
4613static void
4614igb_update_stats_counters(struct adapter *adapter)
4615{
4616	struct ifnet   *ifp;
4617
4618	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4619	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4620		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4621		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4622	}
4623	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4624	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4625	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4626	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4627
4628	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4629	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4630	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4631	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4632	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4633	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4634	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4635	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4636	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4637	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4638	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4639	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4640	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4641	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4642	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4643	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4644	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4645	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4646	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4647	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4648
4649	/* For the 64-bit byte counters the low dword must be read first. */
4650	/* Both registers clear on the read of the high dword */
4651
4652	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4653	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4654
4655	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4656	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4657	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4658	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4659	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4660
4661	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4662	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4663
4664	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4665	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4666	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4667	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4668	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4669	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4670	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4671	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4672	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4673	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4674
4675	adapter->stats.algnerrc +=
4676		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4677	adapter->stats.rxerrc +=
4678		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4679	adapter->stats.tncrs +=
4680		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4681	adapter->stats.cexterr +=
4682		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4683	adapter->stats.tsctc +=
4684		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4685	adapter->stats.tsctfc +=
4686		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4687	ifp = adapter->ifp;
4688
4689	ifp->if_collisions = adapter->stats.colc;
4690
4691	/* Rx Errors */
4692	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4693	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4694	    adapter->stats.ruc + adapter->stats.roc +
4695	    adapter->stats.mpc + adapter->stats.cexterr;
4696
4697	/* Tx Errors */
4698	ifp->if_oerrors = adapter->stats.ecol +
4699	    adapter->stats.latecol + adapter->watchdog_events;
4700}
4701
4702
4703/**********************************************************************
4704 *
4705 *  This routine is called only when igb_display_debug_stats is enabled.
4706 *  This routine provides a way to take a look at important statistics
4707 *  maintained by the driver and hardware.
4708 *
4709 **********************************************************************/
4710static void
4711igb_print_debug_info(struct adapter *adapter)
4712{
4713	device_t dev = adapter->dev;
4714	struct rx_ring *rxr = adapter->rx_rings;
4715	struct tx_ring *txr = adapter->tx_rings;
4716	uint8_t *hw_addr = adapter->hw.hw_addr;
4717
4718	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4719	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4720	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4721	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4722
4723#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4724	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4725	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4726	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4727#endif
4728
4729	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4730	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4731	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4732	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4733	    adapter->hw.fc.high_water,
4734	    adapter->hw.fc.low_water);
4735
4736	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4737		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4738		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4739		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4740		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4741		    txr->me, (long long)txr->no_desc_avail);
4742		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4743		    (long long)txr->tx_irq);
4744		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4745		    (long long)txr->tx_packets);
4746	}
4747
4748	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4749		struct lro_ctrl *lro = &rxr->lro;
4750		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4751		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4752		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4753		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4754		    (long long)rxr->rx_packets);
4755		device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4756		    (long long)rxr->rx_split_packets);
4757		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4758		    (long long)rxr->rx_bytes);
4759		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4760		    (long long)rxr->rx_irq);
4761		device_printf(dev,"RX(%d) LRO Queued= %d\n",
4762		    rxr->me, lro->lro_queued);
4763		device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4764		    rxr->me, lro->lro_flushed);
4765	}
4766
4767	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4768
4769	device_printf(dev, "Mbuf defrag failed = %ld\n",
4770	    adapter->mbuf_defrag_failed);
4771	device_printf(dev, "Std mbuf header failed = %ld\n",
4772	    adapter->mbuf_header_failed);
4773	device_printf(dev, "Std mbuf packet failed = %ld\n",
4774	    adapter->mbuf_packet_failed);
4775	device_printf(dev, "Driver dropped packets = %ld\n",
4776	    adapter->dropped_pkts);
4777	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4778		adapter->no_tx_dma_setup);
4779}
4780
4781static void
4782igb_print_hw_stats(struct adapter *adapter)
4783{
4784	device_t dev = adapter->dev;
4785
4786	device_printf(dev, "Excessive collisions = %lld\n",
4787	    (long long)adapter->stats.ecol);
4788#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4789	device_printf(dev, "Symbol errors = %lld\n",
4790	    (long long)adapter->stats.symerrs);
4791#endif
4792	device_printf(dev, "Sequence errors = %lld\n",
4793	    (long long)adapter->stats.sec);
4794	device_printf(dev, "Defer count = %lld\n",
4795	    (long long)adapter->stats.dc);
4796	device_printf(dev, "Missed Packets = %lld\n",
4797	    (long long)adapter->stats.mpc);
4798	device_printf(dev, "Receive No Buffers = %lld\n",
4799	    (long long)adapter->stats.rnbc);
4800	/* RLEC is inaccurate on some hardware, calculate our own. */
4801	device_printf(dev, "Receive Length Errors = %lld\n",
4802	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4803	device_printf(dev, "Receive errors = %lld\n",
4804	    (long long)adapter->stats.rxerrc);
4805	device_printf(dev, "Crc errors = %lld\n",
4806	    (long long)adapter->stats.crcerrs);
4807	device_printf(dev, "Alignment errors = %lld\n",
4808	    (long long)adapter->stats.algnerrc);
4809	/* On 82575 these are collision counts */
4810	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4811	    (long long)adapter->stats.cexterr);
4812	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4813	device_printf(dev, "watchdog timeouts = %ld\n",
4814	    adapter->watchdog_events);
4815	device_printf(dev, "XON Rcvd = %lld\n",
4816	    (long long)adapter->stats.xonrxc);
4817	device_printf(dev, "XON Xmtd = %lld\n",
4818	    (long long)adapter->stats.xontxc);
4819	device_printf(dev, "XOFF Rcvd = %lld\n",
4820	    (long long)adapter->stats.xoffrxc);
4821	device_printf(dev, "XOFF Xmtd = %lld\n",
4822	    (long long)adapter->stats.xofftxc);
4823	device_printf(dev, "Good Packets Rcvd = %lld\n",
4824	    (long long)adapter->stats.gprc);
4825	device_printf(dev, "Good Packets Xmtd = %lld\n",
4826	    (long long)adapter->stats.gptc);
4827	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4828	    (long long)adapter->stats.tsctc);
4829	device_printf(dev, "TSO Contexts Failed = %lld\n",
4830	    (long long)adapter->stats.tsctfc);
4831}
4832
4833/**********************************************************************
4834 *
4835 *  This routine provides a way to dump out the adapter eeprom,
4836 *  often a useful debug/service tool. This only dumps the first
4837 *  32 words, stuff that matters is in that extent.
4838 *
4839 **********************************************************************/
4840static void
4841igb_print_nvm_info(struct adapter *adapter)
4842{
4843	u16	eeprom_data;
4844	int	i, j, row = 0;
4845
4846	/* Its a bit crude, but it gets the job done */
4847	printf("\nInterface EEPROM Dump:\n");
4848	printf("Offset\n0x0000  ");
4849	for (i = 0, j = 0; i < 32; i++, j++) {
4850		if (j == 8) { /* Make the offset block */
4851			j = 0; ++row;
4852			printf("\n0x00%x0  ",row);
4853		}
4854		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4855		printf("%04x ", eeprom_data);
4856	}
4857	printf("\n");
4858}
4859
4860static int
4861igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4862{
4863	struct adapter *adapter;
4864	int error;
4865	int result;
4866
4867	result = -1;
4868	error = sysctl_handle_int(oidp, &result, 0, req);
4869
4870	if (error || !req->newptr)
4871		return (error);
4872
4873	if (result == 1) {
4874		adapter = (struct adapter *)arg1;
4875		igb_print_debug_info(adapter);
4876	}
4877	/*
4878	 * This value will cause a hex dump of the
4879	 * first 32 16-bit words of the EEPROM to
4880	 * the screen.
4881	 */
4882	if (result == 2) {
4883		adapter = (struct adapter *)arg1;
4884		igb_print_nvm_info(adapter);
4885        }
4886
4887	return (error);
4888}
4889
4890
4891static int
4892igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4893{
4894	struct adapter *adapter;
4895	int error;
4896	int result;
4897
4898	result = -1;
4899	error = sysctl_handle_int(oidp, &result, 0, req);
4900
4901	if (error || !req->newptr)
4902		return (error);
4903
4904	if (result == 1) {
4905		adapter = (struct adapter *)arg1;
4906		igb_print_hw_stats(adapter);
4907	}
4908
4909	return (error);
4910}
4911
4912static void
4913igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4914	const char *description, int *limit, int value)
4915{
4916	*limit = value;
4917	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4918	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4919	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4920}
4921
4922#ifdef IGB_IEEE1588
4923/*
4924** igb_hwtstamp_ioctl - control hardware time stamping
4925**
4926** Outgoing time stamping can be enabled and disabled. Play nice and
4927** disable it when requested, although it shouldn't case any overhead
4928** when no packet needs it. At most one packet in the queue may be
4929** marked for time stamping, otherwise it would be impossible to tell
4930** for sure to which packet the hardware time stamp belongs.
4931**
4932** Incoming time stamping has to be configured via the hardware
4933** filters. Not all combinations are supported, in particular event
4934** type has to be specified. Matching the kind of event packet is
4935** not supported, with the exception of "all V2 events regardless of
4936** level 2 or 4".
4937**
4938*/
4939static int
4940igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4941{
4942	struct e1000_hw *hw = &adapter->hw;
4943	struct hwtstamp_ctrl *config;
4944	u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4945	u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4946	u32 tsync_rx_ctl_type = 0;
4947	u32 tsync_rx_cfg = 0;
4948	int is_l4 = 0;
4949	int is_l2 = 0;
4950	u16 port = 319; /* PTP */
4951	u32 regval;
4952
4953	config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4954
4955	/* reserved for future extensions */
4956	if (config->flags)
4957		return (EINVAL);
4958
4959	switch (config->tx_type) {
4960	case HWTSTAMP_TX_OFF:
4961		tsync_tx_ctl_bit = 0;
4962		break;
4963	case HWTSTAMP_TX_ON:
4964		tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4965		break;
4966	default:
4967		return (ERANGE);
4968	}
4969
4970	switch (config->rx_filter) {
4971	case HWTSTAMP_FILTER_NONE:
4972		tsync_rx_ctl_bit = 0;
4973		break;
4974	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4975	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4976	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4977	case HWTSTAMP_FILTER_ALL:
4978		/*
4979		 * register TSYNCRXCFG must be set, therefore it is not
4980		 * possible to time stamp both Sync and Delay_Req messages
4981		 * => fall back to time stamping all packets
4982		 */
4983		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4984		config->rx_filter = HWTSTAMP_FILTER_ALL;
4985		break;
4986	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4987		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4988		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4989		is_l4 = 1;
4990		break;
4991	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4992		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4993		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4994		is_l4 = 1;
4995		break;
4996	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4997	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4998		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4999		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5000		is_l2 = 1;
5001		is_l4 = 1;
5002		config->rx_filter = HWTSTAMP_FILTER_SOME;
5003		break;
5004	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5005	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5006		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5007		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5008		is_l2 = 1;
5009		is_l4 = 1;
5010		config->rx_filter = HWTSTAMP_FILTER_SOME;
5011		break;
5012	case HWTSTAMP_FILTER_PTP_V2_EVENT:
5013	case HWTSTAMP_FILTER_PTP_V2_SYNC:
5014	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5015		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5016		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5017		is_l2 = 1;
5018		break;
5019	default:
5020		return -ERANGE;
5021	}
5022
5023	/* enable/disable TX */
5024	regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
5025	regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
5026	E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
5027
5028	/* enable/disable RX, define which PTP packets are time stamped */
5029	regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
5030	regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
5031	regval = (regval & ~0xE) | tsync_rx_ctl_type;
5032	E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
5033	E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
5034
5035	/*
5036	 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
5037	 *                                          (Ethertype to filter on)
5038	 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
5039	 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
5040	 */
5041	E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
5042
5043	/* L4 Queue Filter[0]: only filter by source and destination port */
5044	E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
5045	E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
5046	     ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
5047	E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
5048	     (htons(port)
5049	      | (0<<16) /* immediate interrupt disabled */
5050	      | 0 /* (1<<17) bit cleared: do not bypass
5051		     destination port check */)
5052		: 0);
5053	E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
5054	     (0x11 /* UDP */
5055	      | (1<<15) /* VF not compared */
5056	      | (1<<27) /* Enable Timestamping */
5057	      | (7<<28) /* only source port filter enabled,
5058			   source/target address and protocol
5059			   masked */)
5060	     : ((1<<15) | (15<<28) /* all mask bits set = filter not
5061				      enabled */));
5062
5063	wrfl();
5064
5065	adapter->hwtstamp_ctrl = config;
5066
5067	/* clear TX/RX time stamp registers, just to be sure */
5068	regval = E1000_READ_REG(hw, E1000_TXSTMPH);
5069	regval = E1000_READ_REG(hw, E1000_RXSTMPH);
5070
5071	return (error);
5072}
5073
5074/*
5075** igb_read_clock - read raw cycle counter (to be used by time counter)
5076*/
5077static cycle_t igb_read_clock(const struct cyclecounter *tc)
5078{
5079       struct igb_adapter *adapter =
5080               container_of(tc, struct igb_adapter, cycles);
5081       struct e1000_hw *hw = &adapter->hw;
5082       u64 stamp;
5083
5084       stamp =  E1000_READ_REG(hw, E1000_SYSTIML);
5085       stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5086
5087       return (stamp);
5088}
5089
5090#endif /* IGB_IEEE1588 */
5091