if_igb.c revision 217318
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 217318 2011-01-12 19:53:23Z mdf $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 2.0.7";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	/* required last entry */
145	{ 0, 0, 0, 0, 0}
146};
147
148/*********************************************************************
149 *  Table of branding strings for all supported NICs.
150 *********************************************************************/
151
152static char *igb_strings[] = {
153	"Intel(R) PRO/1000 Network Connection"
154};
155
156/*********************************************************************
157 *  Function prototypes
158 *********************************************************************/
159static int	igb_probe(device_t);
160static int	igb_attach(device_t);
161static int	igb_detach(device_t);
162static int	igb_shutdown(device_t);
163static int	igb_suspend(device_t);
164static int	igb_resume(device_t);
165static void	igb_start(struct ifnet *);
166static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
167#if __FreeBSD_version >= 800000
168static int	igb_mq_start(struct ifnet *, struct mbuf *);
169static int	igb_mq_start_locked(struct ifnet *,
170		    struct tx_ring *, struct mbuf *);
171static void	igb_qflush(struct ifnet *);
172#endif
173static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
174static void	igb_init(void *);
175static void	igb_init_locked(struct adapter *);
176static void	igb_stop(void *);
177static void	igb_media_status(struct ifnet *, struct ifmediareq *);
178static int	igb_media_change(struct ifnet *);
179static void	igb_identify_hardware(struct adapter *);
180static int	igb_allocate_pci_resources(struct adapter *);
181static int	igb_allocate_msix(struct adapter *);
182static int	igb_allocate_legacy(struct adapter *);
183static int	igb_setup_msix(struct adapter *);
184static void	igb_free_pci_resources(struct adapter *);
185static void	igb_local_timer(void *);
186static void	igb_reset(struct adapter *);
187static int	igb_setup_interface(device_t, struct adapter *);
188static int	igb_allocate_queues(struct adapter *);
189static void	igb_configure_queues(struct adapter *);
190
191static int	igb_allocate_transmit_buffers(struct tx_ring *);
192static void	igb_setup_transmit_structures(struct adapter *);
193static void	igb_setup_transmit_ring(struct tx_ring *);
194static void	igb_initialize_transmit_units(struct adapter *);
195static void	igb_free_transmit_structures(struct adapter *);
196static void	igb_free_transmit_buffers(struct tx_ring *);
197
198static int	igb_allocate_receive_buffers(struct rx_ring *);
199static int	igb_setup_receive_structures(struct adapter *);
200static int	igb_setup_receive_ring(struct rx_ring *);
201static void	igb_initialize_receive_units(struct adapter *);
202static void	igb_free_receive_structures(struct adapter *);
203static void	igb_free_receive_buffers(struct rx_ring *);
204static void	igb_free_receive_ring(struct rx_ring *);
205
206static void	igb_enable_intr(struct adapter *);
207static void	igb_disable_intr(struct adapter *);
208static void	igb_update_stats_counters(struct adapter *);
209static bool	igb_txeof(struct tx_ring *);
210
211static __inline	void igb_rx_discard(struct rx_ring *, int);
212static __inline void igb_rx_input(struct rx_ring *,
213		    struct ifnet *, struct mbuf *, u32);
214
215static bool	igb_rxeof(struct igb_queue *, int, int *);
216static void	igb_rx_checksum(u32, struct mbuf *, u32);
217static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
218static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
219static void	igb_set_promisc(struct adapter *);
220static void	igb_disable_promisc(struct adapter *);
221static void	igb_set_multi(struct adapter *);
222static void	igb_update_link_status(struct adapter *);
223static void	igb_refresh_mbufs(struct rx_ring *, int);
224
225static void	igb_register_vlan(void *, struct ifnet *, u16);
226static void	igb_unregister_vlan(void *, struct ifnet *, u16);
227static void	igb_setup_vlan_hw_support(struct adapter *);
228
229static int	igb_xmit(struct tx_ring *, struct mbuf **);
230static int	igb_dma_malloc(struct adapter *, bus_size_t,
231		    struct igb_dma_alloc *, int);
232static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
233static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
234static void	igb_print_nvm_info(struct adapter *);
235static int 	igb_is_valid_ether_addr(u8 *);
236static void     igb_add_hw_stats(struct adapter *);
237
238static void	igb_vf_init_stats(struct adapter *);
239static void	igb_update_vf_stats_counters(struct adapter *);
240
241/* Management and WOL Support */
242static void	igb_init_manageability(struct adapter *);
243static void	igb_release_manageability(struct adapter *);
244static void     igb_get_hw_control(struct adapter *);
245static void     igb_release_hw_control(struct adapter *);
246static void     igb_enable_wakeup(device_t);
247static void     igb_led_func(void *, int);
248
249static int	igb_irq_fast(void *);
250static void	igb_add_rx_process_limit(struct adapter *, const char *,
251		    const char *, int *, int);
252static void	igb_handle_que(void *context, int pending);
253static void	igb_handle_link(void *context, int pending);
254
255/* These are MSIX only irq handlers */
256static void	igb_msix_que(void *);
257static void	igb_msix_link(void *);
258
259#ifdef DEVICE_POLLING
260static poll_handler_t igb_poll;
261#endif /* POLLING */
262
263/*********************************************************************
264 *  FreeBSD Device Interface Entry Points
265 *********************************************************************/
266
267static device_method_t igb_methods[] = {
268	/* Device interface */
269	DEVMETHOD(device_probe, igb_probe),
270	DEVMETHOD(device_attach, igb_attach),
271	DEVMETHOD(device_detach, igb_detach),
272	DEVMETHOD(device_shutdown, igb_shutdown),
273	DEVMETHOD(device_suspend, igb_suspend),
274	DEVMETHOD(device_resume, igb_resume),
275	{0, 0}
276};
277
278static driver_t igb_driver = {
279	"igb", igb_methods, sizeof(struct adapter),
280};
281
282static devclass_t igb_devclass;
283DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
284MODULE_DEPEND(igb, pci, 1, 1, 1);
285MODULE_DEPEND(igb, ether, 1, 1, 1);
286
287/*********************************************************************
288 *  Tunable default values.
289 *********************************************************************/
290
291/* Descriptor defaults */
292static int igb_rxd = IGB_DEFAULT_RXD;
293static int igb_txd = IGB_DEFAULT_TXD;
294TUNABLE_INT("hw.igb.rxd", &igb_rxd);
295TUNABLE_INT("hw.igb.txd", &igb_txd);
296
297/*
298** AIM: Adaptive Interrupt Moderation
299** which means that the interrupt rate
300** is varied over time based on the
301** traffic for that interrupt vector
302*/
303static int igb_enable_aim = TRUE;
304TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
305
306/*
307 * MSIX should be the default for best performance,
308 * but this allows it to be forced off for testing.
309 */
310static int igb_enable_msix = 1;
311TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
312
313/*
314** Tuneable Interrupt rate
315*/
316static int igb_max_interrupt_rate = 8000;
317TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
318
319/*
320** Header split causes the packet header to
321** be dma'd to a seperate mbuf from the payload.
322** this can have memory alignment benefits. But
323** another plus is that small packets often fit
324** into the header and thus use no cluster. Its
325** a very workload dependent type feature.
326*/
327static bool igb_header_split = FALSE;
328TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
329
330/*
331** This will autoconfigure based on
332** the number of CPUs if left at 0.
333*/
334static int igb_num_queues = 0;
335TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
336
337/* How many packets rxeof tries to clean at a time */
338static int igb_rx_process_limit = 100;
339TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
340
341/* Flow control setting - default to FULL */
342static int igb_fc_setting = e1000_fc_full;
343TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
344
345/*********************************************************************
346 *  Device identification routine
347 *
348 *  igb_probe determines if the driver should be loaded on
349 *  adapter based on PCI vendor/device id of the adapter.
350 *
351 *  return BUS_PROBE_DEFAULT on success, positive on failure
352 *********************************************************************/
353
354static int
355igb_probe(device_t dev)
356{
357	char		adapter_name[60];
358	uint16_t	pci_vendor_id = 0;
359	uint16_t	pci_device_id = 0;
360	uint16_t	pci_subvendor_id = 0;
361	uint16_t	pci_subdevice_id = 0;
362	igb_vendor_info_t *ent;
363
364	INIT_DEBUGOUT("igb_probe: begin");
365
366	pci_vendor_id = pci_get_vendor(dev);
367	if (pci_vendor_id != IGB_VENDOR_ID)
368		return (ENXIO);
369
370	pci_device_id = pci_get_device(dev);
371	pci_subvendor_id = pci_get_subvendor(dev);
372	pci_subdevice_id = pci_get_subdevice(dev);
373
374	ent = igb_vendor_info_array;
375	while (ent->vendor_id != 0) {
376		if ((pci_vendor_id == ent->vendor_id) &&
377		    (pci_device_id == ent->device_id) &&
378
379		    ((pci_subvendor_id == ent->subvendor_id) ||
380		    (ent->subvendor_id == PCI_ANY_ID)) &&
381
382		    ((pci_subdevice_id == ent->subdevice_id) ||
383		    (ent->subdevice_id == PCI_ANY_ID))) {
384			sprintf(adapter_name, "%s %s",
385				igb_strings[ent->index],
386				igb_driver_version);
387			device_set_desc_copy(dev, adapter_name);
388			return (BUS_PROBE_DEFAULT);
389		}
390		ent++;
391	}
392
393	return (ENXIO);
394}
395
396/*********************************************************************
397 *  Device initialization routine
398 *
399 *  The attach entry point is called when the driver is being loaded.
400 *  This routine identifies the type of hardware, allocates all resources
401 *  and initializes the hardware.
402 *
403 *  return 0 on success, positive on failure
404 *********************************************************************/
405
406static int
407igb_attach(device_t dev)
408{
409	struct adapter	*adapter;
410	int		error = 0;
411	u16		eeprom_data;
412
413	INIT_DEBUGOUT("igb_attach: begin");
414
415	adapter = device_get_softc(dev);
416	adapter->dev = adapter->osdep.dev = dev;
417	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
418
419	/* SYSCTL stuff */
420	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
421	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
422	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
423	    igb_sysctl_nvm_info, "I", "NVM Information");
424
425	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
426	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
427	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
428	    &igb_fc_setting, 0, "Flow Control");
429
430	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
431	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
432	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
433	    &igb_enable_aim, 1, "Interrupt Moderation");
434
435	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
436
437	/* Determine hardware and mac info */
438	igb_identify_hardware(adapter);
439
440	/* Setup PCI resources */
441	if (igb_allocate_pci_resources(adapter)) {
442		device_printf(dev, "Allocation of PCI resources failed\n");
443		error = ENXIO;
444		goto err_pci;
445	}
446
447	/* Do Shared Code initialization */
448	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
449		device_printf(dev, "Setup of Shared code failed\n");
450		error = ENXIO;
451		goto err_pci;
452	}
453
454	e1000_get_bus_info(&adapter->hw);
455
456	/* Sysctls for limiting the amount of work done in the taskqueue */
457	igb_add_rx_process_limit(adapter, "rx_processing_limit",
458	    "max number of rx packets to process", &adapter->rx_process_limit,
459	    igb_rx_process_limit);
460
461	/*
462	 * Validate number of transmit and receive descriptors. It
463	 * must not exceed hardware maximum, and must be multiple
464	 * of E1000_DBA_ALIGN.
465	 */
466	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
467	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
468		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
469		    IGB_DEFAULT_TXD, igb_txd);
470		adapter->num_tx_desc = IGB_DEFAULT_TXD;
471	} else
472		adapter->num_tx_desc = igb_txd;
473	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
474	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
475		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
476		    IGB_DEFAULT_RXD, igb_rxd);
477		adapter->num_rx_desc = IGB_DEFAULT_RXD;
478	} else
479		adapter->num_rx_desc = igb_rxd;
480
481	adapter->hw.mac.autoneg = DO_AUTO_NEG;
482	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
483	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484
485	/* Copper options */
486	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
487		adapter->hw.phy.mdix = AUTO_ALL_MODES;
488		adapter->hw.phy.disable_polarity_correction = FALSE;
489		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
490	}
491
492	/*
493	 * Set the frame limits assuming
494	 * standard ethernet sized frames.
495	 */
496	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
497	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
498
499	/*
500	** Allocate and Setup Queues
501	*/
502	if (igb_allocate_queues(adapter)) {
503		error = ENOMEM;
504		goto err_pci;
505	}
506
507	/* Allocate the appropriate stats memory */
508	if (adapter->hw.mac.type == e1000_vfadapt) {
509		adapter->stats =
510		    (struct e1000_vf_stats *)malloc(sizeof \
511		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
512		igb_vf_init_stats(adapter);
513	} else
514		adapter->stats =
515		    (struct e1000_hw_stats *)malloc(sizeof \
516		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
517	if (adapter->stats == NULL) {
518		device_printf(dev, "Can not allocate stats memory\n");
519		error = ENOMEM;
520		goto err_late;
521	}
522
523	/* Allocate multicast array memory. */
524	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
525	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
526	if (adapter->mta == NULL) {
527		device_printf(dev, "Can not allocate multicast setup array\n");
528		error = ENOMEM;
529		goto err_late;
530	}
531
532	/*
533	** Start from a known state, this is
534	** important in reading the nvm and
535	** mac from that.
536	*/
537	e1000_reset_hw(&adapter->hw);
538
539	/* Make sure we have a good EEPROM before we read from it */
540	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
541		/*
542		** Some PCI-E parts fail the first check due to
543		** the link being in sleep state, call it again,
544		** if it fails a second time its a real issue.
545		*/
546		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
547			device_printf(dev,
548			    "The EEPROM Checksum Is Not Valid\n");
549			error = EIO;
550			goto err_late;
551		}
552	}
553
554	/*
555	** Copy the permanent MAC address out of the EEPROM
556	*/
557	if (e1000_read_mac_addr(&adapter->hw) < 0) {
558		device_printf(dev, "EEPROM read error while reading MAC"
559		    " address\n");
560		error = EIO;
561		goto err_late;
562	}
563	/* Check its sanity */
564	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
565		device_printf(dev, "Invalid MAC address\n");
566		error = EIO;
567		goto err_late;
568	}
569
570	/*
571	** Configure Interrupts
572	*/
573	if ((adapter->msix > 1) && (igb_enable_msix))
574		error = igb_allocate_msix(adapter);
575	else /* MSI or Legacy */
576		error = igb_allocate_legacy(adapter);
577	if (error)
578		goto err_late;
579
580	/* Setup OS specific network interface */
581	if (igb_setup_interface(dev, adapter) != 0)
582		goto err_late;
583
584	/* Now get a good starting state */
585	igb_reset(adapter);
586
587	/* Initialize statistics */
588	igb_update_stats_counters(adapter);
589
590	adapter->hw.mac.get_link_status = 1;
591	igb_update_link_status(adapter);
592
593	/* Indicate SOL/IDER usage */
594	if (e1000_check_reset_block(&adapter->hw))
595		device_printf(dev,
596		    "PHY reset is blocked due to SOL/IDER session.\n");
597
598	/* Determine if we have to control management hardware */
599	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
600
601	/*
602	 * Setup Wake-on-Lan
603	 */
604	/* APME bit in EEPROM is mapped to WUC.APME */
605	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
606	if (eeprom_data)
607		adapter->wol = E1000_WUFC_MAG;
608
609	/* Register for VLAN events */
610	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
611	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
612	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
613	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
614
615	igb_add_hw_stats(adapter);
616
617	/* Tell the stack that the interface is not active */
618	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
619
620	adapter->led_dev = led_create(igb_led_func, adapter,
621	    device_get_nameunit(dev));
622
623	INIT_DEBUGOUT("igb_attach: end");
624
625	return (0);
626
627err_late:
628	igb_free_transmit_structures(adapter);
629	igb_free_receive_structures(adapter);
630	igb_release_hw_control(adapter);
631	if (adapter->ifp != NULL)
632		if_free(adapter->ifp);
633err_pci:
634	igb_free_pci_resources(adapter);
635	free(adapter->mta, M_DEVBUF);
636	IGB_CORE_LOCK_DESTROY(adapter);
637
638	return (error);
639}
640
641/*********************************************************************
642 *  Device removal routine
643 *
644 *  The detach entry point is called when the driver is being removed.
645 *  This routine stops the adapter and deallocates all the resources
646 *  that were allocated for driver operation.
647 *
648 *  return 0 on success, positive on failure
649 *********************************************************************/
650
651static int
652igb_detach(device_t dev)
653{
654	struct adapter	*adapter = device_get_softc(dev);
655	struct ifnet	*ifp = adapter->ifp;
656
657	INIT_DEBUGOUT("igb_detach: begin");
658
659	/* Make sure VLANS are not using driver */
660	if (adapter->ifp->if_vlantrunk != NULL) {
661		device_printf(dev,"Vlan in use, detach first\n");
662		return (EBUSY);
663	}
664
665	if (adapter->led_dev != NULL)
666		led_destroy(adapter->led_dev);
667
668#ifdef DEVICE_POLLING
669	if (ifp->if_capenable & IFCAP_POLLING)
670		ether_poll_deregister(ifp);
671#endif
672
673	IGB_CORE_LOCK(adapter);
674	adapter->in_detach = 1;
675	igb_stop(adapter);
676	IGB_CORE_UNLOCK(adapter);
677
678	e1000_phy_hw_reset(&adapter->hw);
679
680	/* Give control back to firmware */
681	igb_release_manageability(adapter);
682	igb_release_hw_control(adapter);
683
684	if (adapter->wol) {
685		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
686		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
687		igb_enable_wakeup(dev);
688	}
689
690	/* Unregister VLAN events */
691	if (adapter->vlan_attach != NULL)
692		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
693	if (adapter->vlan_detach != NULL)
694		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
695
696	ether_ifdetach(adapter->ifp);
697
698	callout_drain(&adapter->timer);
699
700	igb_free_pci_resources(adapter);
701	bus_generic_detach(dev);
702	if_free(ifp);
703
704	igb_free_transmit_structures(adapter);
705	igb_free_receive_structures(adapter);
706	free(adapter->mta, M_DEVBUF);
707
708	IGB_CORE_LOCK_DESTROY(adapter);
709
710	return (0);
711}
712
713/*********************************************************************
714 *
715 *  Shutdown entry point
716 *
717 **********************************************************************/
718
719static int
720igb_shutdown(device_t dev)
721{
722	return igb_suspend(dev);
723}
724
725/*
726 * Suspend/resume device methods.
727 */
728static int
729igb_suspend(device_t dev)
730{
731	struct adapter *adapter = device_get_softc(dev);
732
733	IGB_CORE_LOCK(adapter);
734
735	igb_stop(adapter);
736
737        igb_release_manageability(adapter);
738	igb_release_hw_control(adapter);
739
740        if (adapter->wol) {
741                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
742                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
743                igb_enable_wakeup(dev);
744        }
745
746	IGB_CORE_UNLOCK(adapter);
747
748	return bus_generic_suspend(dev);
749}
750
751static int
752igb_resume(device_t dev)
753{
754	struct adapter *adapter = device_get_softc(dev);
755	struct ifnet *ifp = adapter->ifp;
756
757	IGB_CORE_LOCK(adapter);
758	igb_init_locked(adapter);
759	igb_init_manageability(adapter);
760
761	if ((ifp->if_flags & IFF_UP) &&
762	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
763		igb_start(ifp);
764
765	IGB_CORE_UNLOCK(adapter);
766
767	return bus_generic_resume(dev);
768}
769
770
771/*********************************************************************
772 *  Transmit entry point
773 *
774 *  igb_start is called by the stack to initiate a transmit.
775 *  The driver will remain in this routine as long as there are
776 *  packets to transmit and transmit resources are available.
777 *  In case resources are not available stack is notified and
778 *  the packet is requeued.
779 **********************************************************************/
780
781static void
782igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
783{
784	struct adapter	*adapter = ifp->if_softc;
785	struct mbuf	*m_head;
786
787	IGB_TX_LOCK_ASSERT(txr);
788
789	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
790	    IFF_DRV_RUNNING)
791		return;
792	if (!adapter->link_active)
793		return;
794
795	/* Call cleanup if number of TX descriptors low */
796	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
797		igb_txeof(txr);
798
799	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
800		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
801			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
802			break;
803		}
804		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
805		if (m_head == NULL)
806			break;
807		/*
808		 *  Encapsulation can modify our pointer, and or make it
809		 *  NULL on failure.  In that event, we can't requeue.
810		 */
811		if (igb_xmit(txr, &m_head)) {
812			if (m_head == NULL)
813				break;
814			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
815			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
816			break;
817		}
818
819		/* Send a copy of the frame to the BPF listener */
820		ETHER_BPF_MTAP(ifp, m_head);
821
822		/* Set watchdog on */
823		txr->watchdog_time = ticks;
824		txr->queue_status = IGB_QUEUE_WORKING;
825	}
826}
827
828/*
829 * Legacy TX driver routine, called from the
830 * stack, always uses tx[0], and spins for it.
831 * Should not be used with multiqueue tx
832 */
833static void
834igb_start(struct ifnet *ifp)
835{
836	struct adapter	*adapter = ifp->if_softc;
837	struct tx_ring	*txr = adapter->tx_rings;
838
839	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
840		IGB_TX_LOCK(txr);
841		igb_start_locked(txr, ifp);
842		IGB_TX_UNLOCK(txr);
843	}
844	return;
845}
846
847#if __FreeBSD_version >= 800000
848/*
849** Multiqueue Transmit driver
850**
851*/
852static int
853igb_mq_start(struct ifnet *ifp, struct mbuf *m)
854{
855	struct adapter		*adapter = ifp->if_softc;
856	struct igb_queue	*que;
857	struct tx_ring		*txr;
858	int 			i = 0, err = 0;
859
860	/* Which queue to use */
861	if ((m->m_flags & M_FLOWID) != 0)
862		i = m->m_pkthdr.flowid % adapter->num_queues;
863
864	txr = &adapter->tx_rings[i];
865	que = &adapter->queues[i];
866
867	if (IGB_TX_TRYLOCK(txr)) {
868		err = igb_mq_start_locked(ifp, txr, m);
869		IGB_TX_UNLOCK(txr);
870	} else {
871		err = drbr_enqueue(ifp, txr->br, m);
872		taskqueue_enqueue(que->tq, &que->que_task);
873	}
874
875	return (err);
876}
877
878static int
879igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
880{
881	struct adapter  *adapter = txr->adapter;
882        struct mbuf     *next;
883        int             err = 0, enq;
884
885	IGB_TX_LOCK_ASSERT(txr);
886
887	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
888	    IFF_DRV_RUNNING || adapter->link_active == 0) {
889		if (m != NULL)
890			err = drbr_enqueue(ifp, txr->br, m);
891		return (err);
892	}
893
894	/* Call cleanup if number of TX descriptors low */
895	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
896		igb_txeof(txr);
897
898	enq = 0;
899	if (m == NULL) {
900		next = drbr_dequeue(ifp, txr->br);
901	} else if (drbr_needs_enqueue(ifp, txr->br)) {
902		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
903			return (err);
904		next = drbr_dequeue(ifp, txr->br);
905	} else
906		next = m;
907
908	/* Process the queue */
909	while (next != NULL) {
910		if ((err = igb_xmit(txr, &next)) != 0) {
911			if (next != NULL)
912				err = drbr_enqueue(ifp, txr->br, next);
913			break;
914		}
915		enq++;
916		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
917		ETHER_BPF_MTAP(ifp, next);
918		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
919			break;
920		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
921			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
922			break;
923		}
924		next = drbr_dequeue(ifp, txr->br);
925	}
926	if (enq > 0) {
927		/* Set the watchdog */
928		txr->queue_status = IGB_QUEUE_WORKING;
929		txr->watchdog_time = ticks;
930	}
931	return (err);
932}
933
934/*
935** Flush all ring buffers
936*/
937static void
938igb_qflush(struct ifnet *ifp)
939{
940	struct adapter	*adapter = ifp->if_softc;
941	struct tx_ring	*txr = adapter->tx_rings;
942	struct mbuf	*m;
943
944	for (int i = 0; i < adapter->num_queues; i++, txr++) {
945		IGB_TX_LOCK(txr);
946		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
947			m_freem(m);
948		IGB_TX_UNLOCK(txr);
949	}
950	if_qflush(ifp);
951}
952#endif /* __FreeBSD_version >= 800000 */
953
954/*********************************************************************
955 *  Ioctl entry point
956 *
957 *  igb_ioctl is called when the user wants to configure the
958 *  interface.
959 *
960 *  return 0 on success, positive on failure
961 **********************************************************************/
962
963static int
964igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
965{
966	struct adapter	*adapter = ifp->if_softc;
967	struct ifreq *ifr = (struct ifreq *)data;
968#ifdef INET
969	struct ifaddr *ifa = (struct ifaddr *)data;
970#endif
971	int error = 0;
972
973	if (adapter->in_detach)
974		return (error);
975
976	switch (command) {
977	case SIOCSIFADDR:
978#ifdef INET
979		if (ifa->ifa_addr->sa_family == AF_INET) {
980			/*
981			 * XXX
982			 * Since resetting hardware takes a very long time
983			 * and results in link renegotiation we only
984			 * initialize the hardware only when it is absolutely
985			 * required.
986			 */
987			ifp->if_flags |= IFF_UP;
988			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989				IGB_CORE_LOCK(adapter);
990				igb_init_locked(adapter);
991				IGB_CORE_UNLOCK(adapter);
992			}
993			if (!(ifp->if_flags & IFF_NOARP))
994				arp_ifinit(ifp, ifa);
995		} else
996#endif
997			error = ether_ioctl(ifp, command, data);
998		break;
999	case SIOCSIFMTU:
1000	    {
1001		int max_frame_size;
1002
1003		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1004
1005		IGB_CORE_LOCK(adapter);
1006		max_frame_size = 9234;
1007		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1008		    ETHER_CRC_LEN) {
1009			IGB_CORE_UNLOCK(adapter);
1010			error = EINVAL;
1011			break;
1012		}
1013
1014		ifp->if_mtu = ifr->ifr_mtu;
1015		adapter->max_frame_size =
1016		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1017		igb_init_locked(adapter);
1018		IGB_CORE_UNLOCK(adapter);
1019		break;
1020	    }
1021	case SIOCSIFFLAGS:
1022		IOCTL_DEBUGOUT("ioctl rcv'd:\
1023		    SIOCSIFFLAGS (Set Interface Flags)");
1024		IGB_CORE_LOCK(adapter);
1025		if (ifp->if_flags & IFF_UP) {
1026			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1027				if ((ifp->if_flags ^ adapter->if_flags) &
1028				    (IFF_PROMISC | IFF_ALLMULTI)) {
1029					igb_disable_promisc(adapter);
1030					igb_set_promisc(adapter);
1031				}
1032			} else
1033				igb_init_locked(adapter);
1034		} else
1035			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1036				igb_stop(adapter);
1037		adapter->if_flags = ifp->if_flags;
1038		IGB_CORE_UNLOCK(adapter);
1039		break;
1040	case SIOCADDMULTI:
1041	case SIOCDELMULTI:
1042		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1043		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044			IGB_CORE_LOCK(adapter);
1045			igb_disable_intr(adapter);
1046			igb_set_multi(adapter);
1047#ifdef DEVICE_POLLING
1048			if (!(ifp->if_capenable & IFCAP_POLLING))
1049#endif
1050				igb_enable_intr(adapter);
1051			IGB_CORE_UNLOCK(adapter);
1052		}
1053		break;
1054	case SIOCSIFMEDIA:
1055		/*
1056		** As the speed/duplex settings are being
1057		** changed, we need toreset the PHY.
1058		*/
1059		adapter->hw.phy.reset_disable = FALSE;
1060		/* Check SOL/IDER usage */
1061		IGB_CORE_LOCK(adapter);
1062		if (e1000_check_reset_block(&adapter->hw)) {
1063			IGB_CORE_UNLOCK(adapter);
1064			device_printf(adapter->dev, "Media change is"
1065			    " blocked due to SOL/IDER session.\n");
1066			break;
1067		}
1068		IGB_CORE_UNLOCK(adapter);
1069	case SIOCGIFMEDIA:
1070		IOCTL_DEBUGOUT("ioctl rcv'd: \
1071		    SIOCxIFMEDIA (Get/Set Interface Media)");
1072		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1073		break;
1074	case SIOCSIFCAP:
1075	    {
1076		int mask, reinit;
1077
1078		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1079		reinit = 0;
1080		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1081#ifdef DEVICE_POLLING
1082		if (mask & IFCAP_POLLING) {
1083			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1084				error = ether_poll_register(igb_poll, ifp);
1085				if (error)
1086					return (error);
1087				IGB_CORE_LOCK(adapter);
1088				igb_disable_intr(adapter);
1089				ifp->if_capenable |= IFCAP_POLLING;
1090				IGB_CORE_UNLOCK(adapter);
1091			} else {
1092				error = ether_poll_deregister(ifp);
1093				/* Enable interrupt even in error case */
1094				IGB_CORE_LOCK(adapter);
1095				igb_enable_intr(adapter);
1096				ifp->if_capenable &= ~IFCAP_POLLING;
1097				IGB_CORE_UNLOCK(adapter);
1098			}
1099		}
1100#endif
1101		if (mask & IFCAP_HWCSUM) {
1102			ifp->if_capenable ^= IFCAP_HWCSUM;
1103			reinit = 1;
1104		}
1105		if (mask & IFCAP_TSO4) {
1106			ifp->if_capenable ^= IFCAP_TSO4;
1107			reinit = 1;
1108		}
1109		if (mask & IFCAP_VLAN_HWTAGGING) {
1110			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1111			reinit = 1;
1112		}
1113		if (mask & IFCAP_VLAN_HWFILTER) {
1114			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_LRO) {
1118			ifp->if_capenable ^= IFCAP_LRO;
1119			reinit = 1;
1120		}
1121		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1122			igb_init(adapter);
1123		VLAN_CAPABILITIES(ifp);
1124		break;
1125	    }
1126
1127	default:
1128		error = ether_ioctl(ifp, command, data);
1129		break;
1130	}
1131
1132	return (error);
1133}
1134
1135
1136/*********************************************************************
1137 *  Init entry point
1138 *
1139 *  This routine is used in two ways. It is used by the stack as
1140 *  init entry point in network interface structure. It is also used
1141 *  by the driver as a hw/sw initialization routine to get to a
1142 *  consistent state.
1143 *
1144 *  return 0 on success, positive on failure
1145 **********************************************************************/
1146
1147static void
1148igb_init_locked(struct adapter *adapter)
1149{
1150	struct ifnet	*ifp = adapter->ifp;
1151	device_t	dev = adapter->dev;
1152
1153	INIT_DEBUGOUT("igb_init: begin");
1154
1155	IGB_CORE_LOCK_ASSERT(adapter);
1156
1157	igb_disable_intr(adapter);
1158	callout_stop(&adapter->timer);
1159
1160	/* Get the latest mac address, User can use a LAA */
1161        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1162              ETHER_ADDR_LEN);
1163
1164	/* Put the address into the Receive Address Array */
1165	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1166
1167	igb_reset(adapter);
1168	igb_update_link_status(adapter);
1169
1170	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1171
1172	/* Set hardware offload abilities */
1173	ifp->if_hwassist = 0;
1174	if (ifp->if_capenable & IFCAP_TXCSUM) {
1175		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1176#if __FreeBSD_version >= 800000
1177		if (adapter->hw.mac.type == e1000_82576)
1178			ifp->if_hwassist |= CSUM_SCTP;
1179#endif
1180	}
1181
1182	if (ifp->if_capenable & IFCAP_TSO4)
1183		ifp->if_hwassist |= CSUM_TSO;
1184
1185	/* Configure for OS presence */
1186	igb_init_manageability(adapter);
1187
1188	/* Prepare transmit descriptors and buffers */
1189	igb_setup_transmit_structures(adapter);
1190	igb_initialize_transmit_units(adapter);
1191
1192	/* Setup Multicast table */
1193	igb_set_multi(adapter);
1194
1195	/*
1196	** Figure out the desired mbuf pool
1197	** for doing jumbo/packetsplit
1198	*/
1199	if (adapter->max_frame_size <= 2048)
1200		adapter->rx_mbuf_sz = MCLBYTES;
1201	else if (adapter->max_frame_size <= 4096)
1202		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1203	else
1204		adapter->rx_mbuf_sz = MJUM9BYTES;
1205
1206	/* Prepare receive descriptors and buffers */
1207	if (igb_setup_receive_structures(adapter)) {
1208		device_printf(dev, "Could not setup receive structures\n");
1209		return;
1210	}
1211	igb_initialize_receive_units(adapter);
1212
1213        /* Use real VLAN Filter support? */
1214	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1215		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1216			/* Use real VLAN Filter support */
1217			igb_setup_vlan_hw_support(adapter);
1218		else {
1219			u32 ctrl;
1220			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1221			ctrl |= E1000_CTRL_VME;
1222			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1223		}
1224	}
1225
1226	/* Don't lose promiscuous settings */
1227	igb_set_promisc(adapter);
1228
1229	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1230	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1231
1232	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1233	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1234
1235	if (adapter->msix > 1) /* Set up queue routing */
1236		igb_configure_queues(adapter);
1237
1238	/* this clears any pending interrupts */
1239	E1000_READ_REG(&adapter->hw, E1000_ICR);
1240#ifdef DEVICE_POLLING
1241	/*
1242	 * Only enable interrupts if we are not polling, make sure
1243	 * they are off otherwise.
1244	 */
1245	if (ifp->if_capenable & IFCAP_POLLING)
1246		igb_disable_intr(adapter);
1247	else
1248#endif /* DEVICE_POLLING */
1249	{
1250	igb_enable_intr(adapter);
1251	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1252	}
1253
1254	/* Don't reset the phy next time init gets called */
1255	adapter->hw.phy.reset_disable = TRUE;
1256}
1257
1258static void
1259igb_init(void *arg)
1260{
1261	struct adapter *adapter = arg;
1262
1263	IGB_CORE_LOCK(adapter);
1264	igb_init_locked(adapter);
1265	IGB_CORE_UNLOCK(adapter);
1266}
1267
1268
1269static void
1270igb_handle_que(void *context, int pending)
1271{
1272	struct igb_queue *que = context;
1273	struct adapter *adapter = que->adapter;
1274	struct tx_ring *txr = que->txr;
1275	struct ifnet	*ifp = adapter->ifp;
1276
1277	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1278		bool	more;
1279
1280		more = igb_rxeof(que, -1, NULL);
1281
1282		IGB_TX_LOCK(txr);
1283		if (igb_txeof(txr))
1284			more = TRUE;
1285#if __FreeBSD_version >= 800000
1286		if (!drbr_empty(ifp, txr->br))
1287			igb_mq_start_locked(ifp, txr, NULL);
1288#else
1289		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1290			igb_start_locked(txr, ifp);
1291#endif
1292		IGB_TX_UNLOCK(txr);
1293		if (more) {
1294			taskqueue_enqueue(que->tq, &que->que_task);
1295			return;
1296		}
1297	}
1298
1299#ifdef DEVICE_POLLING
1300	if (ifp->if_capenable & IFCAP_POLLING)
1301		return;
1302#endif
1303	/* Reenable this interrupt */
1304	if (que->eims)
1305		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1306	else
1307		igb_enable_intr(adapter);
1308}
1309
1310/* Deal with link in a sleepable context */
1311static void
1312igb_handle_link(void *context, int pending)
1313{
1314	struct adapter *adapter = context;
1315
1316	adapter->hw.mac.get_link_status = 1;
1317	igb_update_link_status(adapter);
1318}
1319
1320/*********************************************************************
1321 *
1322 *  MSI/Legacy Deferred
1323 *  Interrupt Service routine
1324 *
1325 *********************************************************************/
1326static int
1327igb_irq_fast(void *arg)
1328{
1329	struct adapter		*adapter = arg;
1330	struct igb_queue	*que = adapter->queues;
1331	u32			reg_icr;
1332
1333
1334	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1335
1336	/* Hot eject?  */
1337	if (reg_icr == 0xffffffff)
1338		return FILTER_STRAY;
1339
1340	/* Definitely not our interrupt.  */
1341	if (reg_icr == 0x0)
1342		return FILTER_STRAY;
1343
1344	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1345		return FILTER_STRAY;
1346
1347	/*
1348	 * Mask interrupts until the taskqueue is finished running.  This is
1349	 * cheap, just assume that it is needed.  This also works around the
1350	 * MSI message reordering errata on certain systems.
1351	 */
1352	igb_disable_intr(adapter);
1353	taskqueue_enqueue(que->tq, &que->que_task);
1354
1355	/* Link status change */
1356	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1357		taskqueue_enqueue(que->tq, &adapter->link_task);
1358
1359	if (reg_icr & E1000_ICR_RXO)
1360		adapter->rx_overruns++;
1361	return FILTER_HANDLED;
1362}
1363
1364#ifdef DEVICE_POLLING
1365/*********************************************************************
1366 *
1367 *  Legacy polling routine : if using this code you MUST be sure that
1368 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1369 *
1370 *********************************************************************/
1371#if __FreeBSD_version >= 800000
1372#define POLL_RETURN_COUNT(a) (a)
1373static int
1374#else
1375#define POLL_RETURN_COUNT(a)
1376static void
1377#endif
1378igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1379{
1380	struct adapter		*adapter = ifp->if_softc;
1381	struct igb_queue	*que = adapter->queues;
1382	struct tx_ring		*txr = adapter->tx_rings;
1383	u32			reg_icr, rx_done = 0;
1384	u32			loop = IGB_MAX_LOOP;
1385	bool			more;
1386
1387	IGB_CORE_LOCK(adapter);
1388	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1389		IGB_CORE_UNLOCK(adapter);
1390		return POLL_RETURN_COUNT(rx_done);
1391	}
1392
1393	if (cmd == POLL_AND_CHECK_STATUS) {
1394		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1395		/* Link status change */
1396		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1397			igb_handle_link(adapter, 0);
1398
1399		if (reg_icr & E1000_ICR_RXO)
1400			adapter->rx_overruns++;
1401	}
1402	IGB_CORE_UNLOCK(adapter);
1403
1404	igb_rxeof(que, count, &rx_done);
1405
1406	IGB_TX_LOCK(txr);
1407	do {
1408		more = igb_txeof(txr);
1409	} while (loop-- && more);
1410#if __FreeBSD_version >= 800000
1411	if (!drbr_empty(ifp, txr->br))
1412		igb_mq_start_locked(ifp, txr, NULL);
1413#else
1414	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415		igb_start_locked(txr, ifp);
1416#endif
1417	IGB_TX_UNLOCK(txr);
1418	return POLL_RETURN_COUNT(rx_done);
1419}
1420#endif /* DEVICE_POLLING */
1421
1422/*********************************************************************
1423 *
1424 *  MSIX TX Interrupt Service routine
1425 *
1426 **********************************************************************/
1427static void
1428igb_msix_que(void *arg)
1429{
1430	struct igb_queue *que = arg;
1431	struct adapter *adapter = que->adapter;
1432	struct tx_ring *txr = que->txr;
1433	struct rx_ring *rxr = que->rxr;
1434	u32		newitr = 0;
1435	bool		more_tx, more_rx;
1436
1437	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1438	++que->irqs;
1439
1440	IGB_TX_LOCK(txr);
1441	more_tx = igb_txeof(txr);
1442	IGB_TX_UNLOCK(txr);
1443
1444	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1445
1446	if (igb_enable_aim == FALSE)
1447		goto no_calc;
1448	/*
1449	** Do Adaptive Interrupt Moderation:
1450        **  - Write out last calculated setting
1451	**  - Calculate based on average size over
1452	**    the last interval.
1453	*/
1454        if (que->eitr_setting)
1455                E1000_WRITE_REG(&adapter->hw,
1456                    E1000_EITR(que->msix), que->eitr_setting);
1457
1458        que->eitr_setting = 0;
1459
1460        /* Idle, do nothing */
1461        if ((txr->bytes == 0) && (rxr->bytes == 0))
1462                goto no_calc;
1463
1464        /* Used half Default if sub-gig */
1465        if (adapter->link_speed != 1000)
1466                newitr = IGB_DEFAULT_ITR / 2;
1467        else {
1468		if ((txr->bytes) && (txr->packets))
1469                	newitr = txr->bytes/txr->packets;
1470		if ((rxr->bytes) && (rxr->packets))
1471			newitr = max(newitr,
1472			    (rxr->bytes / rxr->packets));
1473                newitr += 24; /* account for hardware frame, crc */
1474		/* set an upper boundary */
1475		newitr = min(newitr, 3000);
1476		/* Be nice to the mid range */
1477                if ((newitr > 300) && (newitr < 1200))
1478                        newitr = (newitr / 3);
1479                else
1480                        newitr = (newitr / 2);
1481        }
1482        newitr &= 0x7FFC;  /* Mask invalid bits */
1483        if (adapter->hw.mac.type == e1000_82575)
1484                newitr |= newitr << 16;
1485        else
1486                newitr |= E1000_EITR_CNT_IGNR;
1487
1488        /* save for next interrupt */
1489        que->eitr_setting = newitr;
1490
1491        /* Reset state */
1492        txr->bytes = 0;
1493        txr->packets = 0;
1494        rxr->bytes = 0;
1495        rxr->packets = 0;
1496
1497no_calc:
1498	/* Schedule a clean task if needed*/
1499	if (more_tx || more_rx)
1500		taskqueue_enqueue(que->tq, &que->que_task);
1501	else
1502		/* Reenable this interrupt */
1503		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1504	return;
1505}
1506
1507
1508/*********************************************************************
1509 *
1510 *  MSIX Link Interrupt Service routine
1511 *
1512 **********************************************************************/
1513
1514static void
1515igb_msix_link(void *arg)
1516{
1517	struct adapter	*adapter = arg;
1518	u32       	icr;
1519
1520	++adapter->link_irq;
1521	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1522	if (!(icr & E1000_ICR_LSC))
1523		goto spurious;
1524	igb_handle_link(adapter, 0);
1525
1526spurious:
1527	/* Rearm */
1528	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1529	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1530	return;
1531}
1532
1533
1534/*********************************************************************
1535 *
1536 *  Media Ioctl callback
1537 *
1538 *  This routine is called whenever the user queries the status of
1539 *  the interface using ifconfig.
1540 *
1541 **********************************************************************/
1542static void
1543igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1544{
1545	struct adapter *adapter = ifp->if_softc;
1546	u_char fiber_type = IFM_1000_SX;
1547
1548	INIT_DEBUGOUT("igb_media_status: begin");
1549
1550	IGB_CORE_LOCK(adapter);
1551	igb_update_link_status(adapter);
1552
1553	ifmr->ifm_status = IFM_AVALID;
1554	ifmr->ifm_active = IFM_ETHER;
1555
1556	if (!adapter->link_active) {
1557		IGB_CORE_UNLOCK(adapter);
1558		return;
1559	}
1560
1561	ifmr->ifm_status |= IFM_ACTIVE;
1562
1563	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1564	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1565		ifmr->ifm_active |= fiber_type | IFM_FDX;
1566	else {
1567		switch (adapter->link_speed) {
1568		case 10:
1569			ifmr->ifm_active |= IFM_10_T;
1570			break;
1571		case 100:
1572			ifmr->ifm_active |= IFM_100_TX;
1573			break;
1574		case 1000:
1575			ifmr->ifm_active |= IFM_1000_T;
1576			break;
1577		}
1578		if (adapter->link_duplex == FULL_DUPLEX)
1579			ifmr->ifm_active |= IFM_FDX;
1580		else
1581			ifmr->ifm_active |= IFM_HDX;
1582	}
1583	IGB_CORE_UNLOCK(adapter);
1584}
1585
1586/*********************************************************************
1587 *
1588 *  Media Ioctl callback
1589 *
1590 *  This routine is called when the user changes speed/duplex using
1591 *  media/mediopt option with ifconfig.
1592 *
1593 **********************************************************************/
1594static int
1595igb_media_change(struct ifnet *ifp)
1596{
1597	struct adapter *adapter = ifp->if_softc;
1598	struct ifmedia  *ifm = &adapter->media;
1599
1600	INIT_DEBUGOUT("igb_media_change: begin");
1601
1602	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1603		return (EINVAL);
1604
1605	IGB_CORE_LOCK(adapter);
1606	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1607	case IFM_AUTO:
1608		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1609		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1610		break;
1611	case IFM_1000_LX:
1612	case IFM_1000_SX:
1613	case IFM_1000_T:
1614		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1615		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1616		break;
1617	case IFM_100_TX:
1618		adapter->hw.mac.autoneg = FALSE;
1619		adapter->hw.phy.autoneg_advertised = 0;
1620		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1621			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1622		else
1623			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1624		break;
1625	case IFM_10_T:
1626		adapter->hw.mac.autoneg = FALSE;
1627		adapter->hw.phy.autoneg_advertised = 0;
1628		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1629			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1630		else
1631			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1632		break;
1633	default:
1634		device_printf(adapter->dev, "Unsupported media type\n");
1635	}
1636
1637	igb_init_locked(adapter);
1638	IGB_CORE_UNLOCK(adapter);
1639
1640	return (0);
1641}
1642
1643
1644/*********************************************************************
1645 *
1646 *  This routine maps the mbufs to Advanced TX descriptors.
1647 *  used by the 82575 adapter.
1648 *
1649 **********************************************************************/
1650
1651static int
1652igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1653{
1654	struct adapter		*adapter = txr->adapter;
1655	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1656	bus_dmamap_t		map;
1657	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1658	union e1000_adv_tx_desc	*txd = NULL;
1659	struct mbuf		*m_head;
1660	u32			olinfo_status = 0, cmd_type_len = 0;
1661	int			nsegs, i, j, error, first, last = 0;
1662	u32			hdrlen = 0;
1663
1664	m_head = *m_headp;
1665
1666
1667	/* Set basic descriptor constants */
1668	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1669	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1670	if (m_head->m_flags & M_VLANTAG)
1671		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1672
1673        /*
1674         * Force a cleanup if number of TX descriptors
1675         * available hits the threshold
1676         */
1677	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1678		igb_txeof(txr);
1679		/* Now do we at least have a minimal? */
1680		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1681			txr->no_desc_avail++;
1682			return (ENOBUFS);
1683		}
1684	}
1685
1686	/*
1687         * Map the packet for DMA.
1688	 *
1689	 * Capture the first descriptor index,
1690	 * this descriptor will have the index
1691	 * of the EOP which is the only one that
1692	 * now gets a DONE bit writeback.
1693	 */
1694	first = txr->next_avail_desc;
1695	tx_buffer = &txr->tx_buffers[first];
1696	tx_buffer_mapped = tx_buffer;
1697	map = tx_buffer->map;
1698
1699	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1700	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1701
1702	if (error == EFBIG) {
1703		struct mbuf *m;
1704
1705		m = m_defrag(*m_headp, M_DONTWAIT);
1706		if (m == NULL) {
1707			adapter->mbuf_defrag_failed++;
1708			m_freem(*m_headp);
1709			*m_headp = NULL;
1710			return (ENOBUFS);
1711		}
1712		*m_headp = m;
1713
1714		/* Try it again */
1715		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1716		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1717
1718		if (error == ENOMEM) {
1719			adapter->no_tx_dma_setup++;
1720			return (error);
1721		} else if (error != 0) {
1722			adapter->no_tx_dma_setup++;
1723			m_freem(*m_headp);
1724			*m_headp = NULL;
1725			return (error);
1726		}
1727	} else if (error == ENOMEM) {
1728		adapter->no_tx_dma_setup++;
1729		return (error);
1730	} else if (error != 0) {
1731		adapter->no_tx_dma_setup++;
1732		m_freem(*m_headp);
1733		*m_headp = NULL;
1734		return (error);
1735	}
1736
1737	/* Check again to be sure we have enough descriptors */
1738        if (nsegs > (txr->tx_avail - 2)) {
1739                txr->no_desc_avail++;
1740		bus_dmamap_unload(txr->txtag, map);
1741		return (ENOBUFS);
1742        }
1743	m_head = *m_headp;
1744
1745        /*
1746         * Set up the context descriptor:
1747         * used when any hardware offload is done.
1748	 * This includes CSUM, VLAN, and TSO. It
1749	 * will use the first descriptor.
1750         */
1751        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1752		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1753			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1754			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1755			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1756		} else
1757			return (ENXIO);
1758	} else if (igb_tx_ctx_setup(txr, m_head))
1759		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1760
1761	/* Calculate payload length */
1762	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1763	    << E1000_ADVTXD_PAYLEN_SHIFT);
1764
1765	/* 82575 needs the queue index added */
1766	if (adapter->hw.mac.type == e1000_82575)
1767		olinfo_status |= txr->me << 4;
1768
1769	/* Set up our transmit descriptors */
1770	i = txr->next_avail_desc;
1771	for (j = 0; j < nsegs; j++) {
1772		bus_size_t seg_len;
1773		bus_addr_t seg_addr;
1774
1775		tx_buffer = &txr->tx_buffers[i];
1776		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1777		seg_addr = segs[j].ds_addr;
1778		seg_len  = segs[j].ds_len;
1779
1780		txd->read.buffer_addr = htole64(seg_addr);
1781		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1782		txd->read.olinfo_status = htole32(olinfo_status);
1783		last = i;
1784		if (++i == adapter->num_tx_desc)
1785			i = 0;
1786		tx_buffer->m_head = NULL;
1787		tx_buffer->next_eop = -1;
1788	}
1789
1790	txr->next_avail_desc = i;
1791	txr->tx_avail -= nsegs;
1792
1793        tx_buffer->m_head = m_head;
1794	tx_buffer_mapped->map = tx_buffer->map;
1795	tx_buffer->map = map;
1796        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1797
1798        /*
1799         * Last Descriptor of Packet
1800	 * needs End Of Packet (EOP)
1801	 * and Report Status (RS)
1802         */
1803        txd->read.cmd_type_len |=
1804	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1805	/*
1806	 * Keep track in the first buffer which
1807	 * descriptor will be written back
1808	 */
1809	tx_buffer = &txr->tx_buffers[first];
1810	tx_buffer->next_eop = last;
1811	txr->watchdog_time = ticks;
1812
1813	/*
1814	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1815	 * that this frame is available to transmit.
1816	 */
1817	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1818	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1819	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1820	++txr->tx_packets;
1821
1822	return (0);
1823
1824}
1825
1826static void
1827igb_set_promisc(struct adapter *adapter)
1828{
1829	struct ifnet	*ifp = adapter->ifp;
1830	struct e1000_hw *hw = &adapter->hw;
1831	u32		reg;
1832
1833	if (hw->mac.type == e1000_vfadapt) {
1834		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1835		return;
1836	}
1837
1838	reg = E1000_READ_REG(hw, E1000_RCTL);
1839	if (ifp->if_flags & IFF_PROMISC) {
1840		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1841		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1842	} else if (ifp->if_flags & IFF_ALLMULTI) {
1843		reg |= E1000_RCTL_MPE;
1844		reg &= ~E1000_RCTL_UPE;
1845		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1846	}
1847}
1848
1849static void
1850igb_disable_promisc(struct adapter *adapter)
1851{
1852	struct e1000_hw *hw = &adapter->hw;
1853	u32		reg;
1854
1855	if (hw->mac.type == e1000_vfadapt) {
1856		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1857		return;
1858	}
1859	reg = E1000_READ_REG(hw, E1000_RCTL);
1860	reg &=  (~E1000_RCTL_UPE);
1861	reg &=  (~E1000_RCTL_MPE);
1862	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1863}
1864
1865
1866/*********************************************************************
1867 *  Multicast Update
1868 *
1869 *  This routine is called whenever multicast address list is updated.
1870 *
1871 **********************************************************************/
1872
1873static void
1874igb_set_multi(struct adapter *adapter)
1875{
1876	struct ifnet	*ifp = adapter->ifp;
1877	struct ifmultiaddr *ifma;
1878	u32 reg_rctl = 0;
1879	u8  *mta;
1880
1881	int mcnt = 0;
1882
1883	IOCTL_DEBUGOUT("igb_set_multi: begin");
1884
1885	mta = adapter->mta;
1886	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1887	    MAX_NUM_MULTICAST_ADDRESSES);
1888
1889#if __FreeBSD_version < 800000
1890	IF_ADDR_LOCK(ifp);
1891#else
1892	if_maddr_rlock(ifp);
1893#endif
1894	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1895		if (ifma->ifma_addr->sa_family != AF_LINK)
1896			continue;
1897
1898		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1899			break;
1900
1901		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1902		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1903		mcnt++;
1904	}
1905#if __FreeBSD_version < 800000
1906	IF_ADDR_UNLOCK(ifp);
1907#else
1908	if_maddr_runlock(ifp);
1909#endif
1910
1911	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1912		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1913		reg_rctl |= E1000_RCTL_MPE;
1914		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1915	} else
1916		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1917}
1918
1919
1920/*********************************************************************
1921 *  Timer routine:
1922 *  	This routine checks for link status,
1923 *	updates statistics, and does the watchdog.
1924 *
1925 **********************************************************************/
1926
1927static void
1928igb_local_timer(void *arg)
1929{
1930	struct adapter		*adapter = arg;
1931	device_t		dev = adapter->dev;
1932	struct tx_ring		*txr = adapter->tx_rings;
1933
1934
1935	IGB_CORE_LOCK_ASSERT(adapter);
1936
1937	igb_update_link_status(adapter);
1938	igb_update_stats_counters(adapter);
1939
1940	/*
1941	** If flow control has paused us since last checking
1942	** it invalidates the watchdog timing, so dont run it.
1943	*/
1944	if (adapter->pause_frames) {
1945		adapter->pause_frames = 0;
1946		goto out;
1947	}
1948
1949        /*
1950        ** Watchdog: check for time since any descriptor was cleaned
1951        */
1952	for (int i = 0; i < adapter->num_queues; i++, txr++)
1953		if (txr->queue_status == IGB_QUEUE_HUNG)
1954			goto timeout;
1955out:
1956	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1957	return;
1958
1959timeout:
1960	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1961	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1962            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1963            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1964	device_printf(dev,"TX(%d) desc avail = %d,"
1965            "Next TX to Clean = %d\n",
1966            txr->me, txr->tx_avail, txr->next_to_clean);
1967	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1968	adapter->watchdog_events++;
1969	igb_init_locked(adapter);
1970}
1971
1972static void
1973igb_update_link_status(struct adapter *adapter)
1974{
1975	struct e1000_hw *hw = &adapter->hw;
1976	struct ifnet *ifp = adapter->ifp;
1977	device_t dev = adapter->dev;
1978	struct tx_ring *txr = adapter->tx_rings;
1979	u32 link_check = 0;
1980
1981	/* Get the cached link value or read for real */
1982        switch (hw->phy.media_type) {
1983        case e1000_media_type_copper:
1984                if (hw->mac.get_link_status) {
1985			/* Do the work to read phy */
1986                        e1000_check_for_link(hw);
1987                        link_check = !hw->mac.get_link_status;
1988                } else
1989                        link_check = TRUE;
1990                break;
1991        case e1000_media_type_fiber:
1992                e1000_check_for_link(hw);
1993                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1994                                 E1000_STATUS_LU);
1995                break;
1996        case e1000_media_type_internal_serdes:
1997                e1000_check_for_link(hw);
1998                link_check = adapter->hw.mac.serdes_has_link;
1999                break;
2000	/* VF device is type_unknown */
2001        case e1000_media_type_unknown:
2002                e1000_check_for_link(hw);
2003		link_check = !hw->mac.get_link_status;
2004		/* Fall thru */
2005        default:
2006                break;
2007        }
2008
2009	/* Now we check if a transition has happened */
2010	if (link_check && (adapter->link_active == 0)) {
2011		e1000_get_speed_and_duplex(&adapter->hw,
2012		    &adapter->link_speed, &adapter->link_duplex);
2013		if (bootverbose)
2014			device_printf(dev, "Link is up %d Mbps %s\n",
2015			    adapter->link_speed,
2016			    ((adapter->link_duplex == FULL_DUPLEX) ?
2017			    "Full Duplex" : "Half Duplex"));
2018		adapter->link_active = 1;
2019		ifp->if_baudrate = adapter->link_speed * 1000000;
2020		/* This can sleep */
2021		if_link_state_change(ifp, LINK_STATE_UP);
2022	} else if (!link_check && (adapter->link_active == 1)) {
2023		ifp->if_baudrate = adapter->link_speed = 0;
2024		adapter->link_duplex = 0;
2025		if (bootverbose)
2026			device_printf(dev, "Link is Down\n");
2027		adapter->link_active = 0;
2028		/* This can sleep */
2029		if_link_state_change(ifp, LINK_STATE_DOWN);
2030		/* Turn off watchdogs */
2031		for (int i = 0; i < adapter->num_queues; i++, txr++)
2032			txr->queue_status = IGB_QUEUE_IDLE;
2033	}
2034}
2035
2036/*********************************************************************
2037 *
2038 *  This routine disables all traffic on the adapter by issuing a
2039 *  global reset on the MAC and deallocates TX/RX buffers.
2040 *
2041 **********************************************************************/
2042
2043static void
2044igb_stop(void *arg)
2045{
2046	struct adapter	*adapter = arg;
2047	struct ifnet	*ifp = adapter->ifp;
2048	struct tx_ring *txr = adapter->tx_rings;
2049
2050	IGB_CORE_LOCK_ASSERT(adapter);
2051
2052	INIT_DEBUGOUT("igb_stop: begin");
2053
2054	igb_disable_intr(adapter);
2055
2056	callout_stop(&adapter->timer);
2057
2058	/* Tell the stack that the interface is no longer active */
2059	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2060
2061	/* Unarm watchdog timer. */
2062	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2063		IGB_TX_LOCK(txr);
2064		txr->queue_status = IGB_QUEUE_IDLE;
2065		IGB_TX_UNLOCK(txr);
2066	}
2067
2068	e1000_reset_hw(&adapter->hw);
2069	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2070
2071	e1000_led_off(&adapter->hw);
2072	e1000_cleanup_led(&adapter->hw);
2073}
2074
2075
2076/*********************************************************************
2077 *
2078 *  Determine hardware revision.
2079 *
2080 **********************************************************************/
2081static void
2082igb_identify_hardware(struct adapter *adapter)
2083{
2084	device_t dev = adapter->dev;
2085
2086	/* Make sure our PCI config space has the necessary stuff set */
2087	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2088	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2089	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2090		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2091		    "bits were not set!\n");
2092		adapter->hw.bus.pci_cmd_word |=
2093		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2094		pci_write_config(dev, PCIR_COMMAND,
2095		    adapter->hw.bus.pci_cmd_word, 2);
2096	}
2097
2098	/* Save off the information about this board */
2099	adapter->hw.vendor_id = pci_get_vendor(dev);
2100	adapter->hw.device_id = pci_get_device(dev);
2101	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2102	adapter->hw.subsystem_vendor_id =
2103	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2104	adapter->hw.subsystem_device_id =
2105	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2106
2107	/* Set MAC type early for PCI setup */
2108	e1000_set_mac_type(&adapter->hw);
2109}
2110
2111static int
2112igb_allocate_pci_resources(struct adapter *adapter)
2113{
2114	device_t	dev = adapter->dev;
2115	int		rid;
2116
2117	rid = PCIR_BAR(0);
2118	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2119	    &rid, RF_ACTIVE);
2120	if (adapter->pci_mem == NULL) {
2121		device_printf(dev, "Unable to allocate bus resource: memory\n");
2122		return (ENXIO);
2123	}
2124	adapter->osdep.mem_bus_space_tag =
2125	    rman_get_bustag(adapter->pci_mem);
2126	adapter->osdep.mem_bus_space_handle =
2127	    rman_get_bushandle(adapter->pci_mem);
2128	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2129
2130	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2131
2132	/* This will setup either MSI/X or MSI */
2133	adapter->msix = igb_setup_msix(adapter);
2134	adapter->hw.back = &adapter->osdep;
2135
2136	return (0);
2137}
2138
2139/*********************************************************************
2140 *
2141 *  Setup the Legacy or MSI Interrupt handler
2142 *
2143 **********************************************************************/
2144static int
2145igb_allocate_legacy(struct adapter *adapter)
2146{
2147	device_t		dev = adapter->dev;
2148	struct igb_queue	*que = adapter->queues;
2149	int			error, rid = 0;
2150
2151	/* Turn off all interrupts */
2152	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2153
2154	/* MSI RID is 1 */
2155	if (adapter->msix == 1)
2156		rid = 1;
2157
2158	/* We allocate a single interrupt resource */
2159	adapter->res = bus_alloc_resource_any(dev,
2160	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2161	if (adapter->res == NULL) {
2162		device_printf(dev, "Unable to allocate bus resource: "
2163		    "interrupt\n");
2164		return (ENXIO);
2165	}
2166
2167	/*
2168	 * Try allocating a fast interrupt and the associated deferred
2169	 * processing contexts.
2170	 */
2171	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2172	/* Make tasklet for deferred link handling */
2173	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2174	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2175	    taskqueue_thread_enqueue, &que->tq);
2176	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2177	    device_get_nameunit(adapter->dev));
2178	if ((error = bus_setup_intr(dev, adapter->res,
2179	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2180	    adapter, &adapter->tag)) != 0) {
2181		device_printf(dev, "Failed to register fast interrupt "
2182			    "handler: %d\n", error);
2183		taskqueue_free(que->tq);
2184		que->tq = NULL;
2185		return (error);
2186	}
2187
2188	return (0);
2189}
2190
2191
2192/*********************************************************************
2193 *
2194 *  Setup the MSIX Queue Interrupt handlers:
2195 *
2196 **********************************************************************/
2197static int
2198igb_allocate_msix(struct adapter *adapter)
2199{
2200	device_t		dev = adapter->dev;
2201	struct igb_queue	*que = adapter->queues;
2202	int			error, rid, vector = 0;
2203
2204
2205	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2206		rid = vector +1;
2207		que->res = bus_alloc_resource_any(dev,
2208		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2209		if (que->res == NULL) {
2210			device_printf(dev,
2211			    "Unable to allocate bus resource: "
2212			    "MSIX Queue Interrupt\n");
2213			return (ENXIO);
2214		}
2215		error = bus_setup_intr(dev, que->res,
2216	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2217		    igb_msix_que, que, &que->tag);
2218		if (error) {
2219			que->res = NULL;
2220			device_printf(dev, "Failed to register Queue handler");
2221			return (error);
2222		}
2223#if __FreeBSD_version >= 800504
2224		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2225#endif
2226		que->msix = vector;
2227		if (adapter->hw.mac.type == e1000_82575)
2228			que->eims = E1000_EICR_TX_QUEUE0 << i;
2229		else
2230			que->eims = 1 << vector;
2231		/*
2232		** Bind the msix vector, and thus the
2233		** rings to the corresponding cpu.
2234		*/
2235		if (adapter->num_queues > 1)
2236			bus_bind_intr(dev, que->res, i);
2237		/* Make tasklet for deferred handling */
2238		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2239		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2240		    taskqueue_thread_enqueue, &que->tq);
2241		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2242		    device_get_nameunit(adapter->dev));
2243	}
2244
2245	/* And Link */
2246	rid = vector + 1;
2247	adapter->res = bus_alloc_resource_any(dev,
2248	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2249	if (adapter->res == NULL) {
2250		device_printf(dev,
2251		    "Unable to allocate bus resource: "
2252		    "MSIX Link Interrupt\n");
2253		return (ENXIO);
2254	}
2255	if ((error = bus_setup_intr(dev, adapter->res,
2256	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2257	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2258		device_printf(dev, "Failed to register Link handler");
2259		return (error);
2260	}
2261#if __FreeBSD_version >= 800504
2262	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2263#endif
2264	adapter->linkvec = vector;
2265
2266	return (0);
2267}
2268
2269
2270static void
2271igb_configure_queues(struct adapter *adapter)
2272{
2273	struct	e1000_hw	*hw = &adapter->hw;
2274	struct	igb_queue	*que;
2275	u32			tmp, ivar = 0, newitr = 0;
2276
2277	/* First turn on RSS capability */
2278	if (adapter->hw.mac.type > e1000_82575)
2279		E1000_WRITE_REG(hw, E1000_GPIE,
2280		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2281		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2282
2283	/* Turn on MSIX */
2284	switch (adapter->hw.mac.type) {
2285	case e1000_82580:
2286	case e1000_vfadapt:
2287		/* RX entries */
2288		for (int i = 0; i < adapter->num_queues; i++) {
2289			u32 index = i >> 1;
2290			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2291			que = &adapter->queues[i];
2292			if (i & 1) {
2293				ivar &= 0xFF00FFFF;
2294				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2295			} else {
2296				ivar &= 0xFFFFFF00;
2297				ivar |= que->msix | E1000_IVAR_VALID;
2298			}
2299			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2300		}
2301		/* TX entries */
2302		for (int i = 0; i < adapter->num_queues; i++) {
2303			u32 index = i >> 1;
2304			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2305			que = &adapter->queues[i];
2306			if (i & 1) {
2307				ivar &= 0x00FFFFFF;
2308				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2309			} else {
2310				ivar &= 0xFFFF00FF;
2311				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2312			}
2313			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2314			adapter->eims_mask |= que->eims;
2315		}
2316
2317		/* And for the link interrupt */
2318		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2319		adapter->link_mask = 1 << adapter->linkvec;
2320		adapter->eims_mask |= adapter->link_mask;
2321		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2322		break;
2323	case e1000_82576:
2324		/* RX entries */
2325		for (int i = 0; i < adapter->num_queues; i++) {
2326			u32 index = i & 0x7; /* Each IVAR has two entries */
2327			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2328			que = &adapter->queues[i];
2329			if (i < 8) {
2330				ivar &= 0xFFFFFF00;
2331				ivar |= que->msix | E1000_IVAR_VALID;
2332			} else {
2333				ivar &= 0xFF00FFFF;
2334				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2335			}
2336			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2337			adapter->eims_mask |= que->eims;
2338		}
2339		/* TX entries */
2340		for (int i = 0; i < adapter->num_queues; i++) {
2341			u32 index = i & 0x7; /* Each IVAR has two entries */
2342			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2343			que = &adapter->queues[i];
2344			if (i < 8) {
2345				ivar &= 0xFFFF00FF;
2346				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2347			} else {
2348				ivar &= 0x00FFFFFF;
2349				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2350			}
2351			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2352			adapter->eims_mask |= que->eims;
2353		}
2354
2355		/* And for the link interrupt */
2356		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2357		adapter->link_mask = 1 << adapter->linkvec;
2358		adapter->eims_mask |= adapter->link_mask;
2359		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2360		break;
2361
2362	case e1000_82575:
2363                /* enable MSI-X support*/
2364		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2365                tmp |= E1000_CTRL_EXT_PBA_CLR;
2366                /* Auto-Mask interrupts upon ICR read. */
2367                tmp |= E1000_CTRL_EXT_EIAME;
2368                tmp |= E1000_CTRL_EXT_IRCA;
2369                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2370
2371		/* Queues */
2372		for (int i = 0; i < adapter->num_queues; i++) {
2373			que = &adapter->queues[i];
2374			tmp = E1000_EICR_RX_QUEUE0 << i;
2375			tmp |= E1000_EICR_TX_QUEUE0 << i;
2376			que->eims = tmp;
2377			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2378			    i, que->eims);
2379			adapter->eims_mask |= que->eims;
2380		}
2381
2382		/* Link */
2383		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2384		    E1000_EIMS_OTHER);
2385		adapter->link_mask |= E1000_EIMS_OTHER;
2386		adapter->eims_mask |= adapter->link_mask;
2387	default:
2388		break;
2389	}
2390
2391	/* Set the starting interrupt rate */
2392	if (igb_max_interrupt_rate > 0)
2393		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2394
2395        if (hw->mac.type == e1000_82575)
2396                newitr |= newitr << 16;
2397        else
2398                newitr |= E1000_EITR_CNT_IGNR;
2399
2400	for (int i = 0; i < adapter->num_queues; i++) {
2401		que = &adapter->queues[i];
2402		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2403	}
2404
2405	return;
2406}
2407
2408
2409static void
2410igb_free_pci_resources(struct adapter *adapter)
2411{
2412	struct		igb_queue *que = adapter->queues;
2413	device_t	dev = adapter->dev;
2414	int		rid;
2415
2416	/*
2417	** There is a slight possibility of a failure mode
2418	** in attach that will result in entering this function
2419	** before interrupt resources have been initialized, and
2420	** in that case we do not want to execute the loops below
2421	** We can detect this reliably by the state of the adapter
2422	** res pointer.
2423	*/
2424	if (adapter->res == NULL)
2425		goto mem;
2426
2427	/*
2428	 * First release all the interrupt resources:
2429	 */
2430	for (int i = 0; i < adapter->num_queues; i++, que++) {
2431		rid = que->msix + 1;
2432		if (que->tag != NULL) {
2433			bus_teardown_intr(dev, que->res, que->tag);
2434			que->tag = NULL;
2435		}
2436		if (que->res != NULL)
2437			bus_release_resource(dev,
2438			    SYS_RES_IRQ, rid, que->res);
2439	}
2440
2441	/* Clean the Legacy or Link interrupt last */
2442	if (adapter->linkvec) /* we are doing MSIX */
2443		rid = adapter->linkvec + 1;
2444	else
2445		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2446
2447	if (adapter->tag != NULL) {
2448		bus_teardown_intr(dev, adapter->res, adapter->tag);
2449		adapter->tag = NULL;
2450	}
2451	if (adapter->res != NULL)
2452		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2453
2454mem:
2455	if (adapter->msix)
2456		pci_release_msi(dev);
2457
2458	if (adapter->msix_mem != NULL)
2459		bus_release_resource(dev, SYS_RES_MEMORY,
2460		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2461
2462	if (adapter->pci_mem != NULL)
2463		bus_release_resource(dev, SYS_RES_MEMORY,
2464		    PCIR_BAR(0), adapter->pci_mem);
2465
2466}
2467
2468/*
2469 * Setup Either MSI/X or MSI
2470 */
2471static int
2472igb_setup_msix(struct adapter *adapter)
2473{
2474	device_t dev = adapter->dev;
2475	int rid, want, queues, msgs;
2476
2477	/* tuneable override */
2478	if (igb_enable_msix == 0)
2479		goto msi;
2480
2481	/* First try MSI/X */
2482	rid = PCIR_BAR(IGB_MSIX_BAR);
2483	adapter->msix_mem = bus_alloc_resource_any(dev,
2484	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2485       	if (!adapter->msix_mem) {
2486		/* May not be enabled */
2487		device_printf(adapter->dev,
2488		    "Unable to map MSIX table \n");
2489		goto msi;
2490	}
2491
2492	msgs = pci_msix_count(dev);
2493	if (msgs == 0) { /* system has msix disabled */
2494		bus_release_resource(dev, SYS_RES_MEMORY,
2495		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2496		adapter->msix_mem = NULL;
2497		goto msi;
2498	}
2499
2500	/* Figure out a reasonable auto config value */
2501	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2502
2503	/* Manual override */
2504	if (igb_num_queues != 0)
2505		queues = igb_num_queues;
2506	if (queues > 8)  /* max queues */
2507		queues = 8;
2508
2509	/* Can have max of 4 queues on 82575 */
2510	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2511		queues = 4;
2512
2513	/* Limit the VF adapter to one queue */
2514	if (adapter->hw.mac.type == e1000_vfadapt)
2515		queues = 1;
2516
2517	/*
2518	** One vector (RX/TX pair) per queue
2519	** plus an additional for Link interrupt
2520	*/
2521	want = queues + 1;
2522	if (msgs >= want)
2523		msgs = want;
2524	else {
2525               	device_printf(adapter->dev,
2526		    "MSIX Configuration Problem, "
2527		    "%d vectors configured, but %d queues wanted!\n",
2528		    msgs, want);
2529		return (ENXIO);
2530	}
2531	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2532               	device_printf(adapter->dev,
2533		    "Using MSIX interrupts with %d vectors\n", msgs);
2534		adapter->num_queues = queues;
2535		return (msgs);
2536	}
2537msi:
2538       	msgs = pci_msi_count(dev);
2539       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2540               	device_printf(adapter->dev,"Using MSI interrupt\n");
2541	return (msgs);
2542}
2543
2544/*********************************************************************
2545 *
2546 *  Set up an fresh starting state
2547 *
2548 **********************************************************************/
2549static void
2550igb_reset(struct adapter *adapter)
2551{
2552	device_t	dev = adapter->dev;
2553	struct e1000_hw *hw = &adapter->hw;
2554	struct e1000_fc_info *fc = &hw->fc;
2555	struct ifnet	*ifp = adapter->ifp;
2556	u32		pba = 0;
2557	u16		hwm;
2558
2559	INIT_DEBUGOUT("igb_reset: begin");
2560
2561	/* Let the firmware know the OS is in control */
2562	igb_get_hw_control(adapter);
2563
2564	/*
2565	 * Packet Buffer Allocation (PBA)
2566	 * Writing PBA sets the receive portion of the buffer
2567	 * the remainder is used for the transmit buffer.
2568	 */
2569	switch (hw->mac.type) {
2570	case e1000_82575:
2571		pba = E1000_PBA_32K;
2572		break;
2573	case e1000_82576:
2574	case e1000_vfadapt:
2575		pba = E1000_PBA_64K;
2576		break;
2577	case e1000_82580:
2578		pba = E1000_PBA_35K;
2579	default:
2580		break;
2581	}
2582
2583	/* Special needs in case of Jumbo frames */
2584	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2585		u32 tx_space, min_tx, min_rx;
2586		pba = E1000_READ_REG(hw, E1000_PBA);
2587		tx_space = pba >> 16;
2588		pba &= 0xffff;
2589		min_tx = (adapter->max_frame_size +
2590		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2591		min_tx = roundup2(min_tx, 1024);
2592		min_tx >>= 10;
2593                min_rx = adapter->max_frame_size;
2594                min_rx = roundup2(min_rx, 1024);
2595                min_rx >>= 10;
2596		if (tx_space < min_tx &&
2597		    ((min_tx - tx_space) < pba)) {
2598			pba = pba - (min_tx - tx_space);
2599			/*
2600                         * if short on rx space, rx wins
2601                         * and must trump tx adjustment
2602			 */
2603                        if (pba < min_rx)
2604                                pba = min_rx;
2605		}
2606		E1000_WRITE_REG(hw, E1000_PBA, pba);
2607	}
2608
2609	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2610
2611	/*
2612	 * These parameters control the automatic generation (Tx) and
2613	 * response (Rx) to Ethernet PAUSE frames.
2614	 * - High water mark should allow for at least two frames to be
2615	 *   received after sending an XOFF.
2616	 * - Low water mark works best when it is very near the high water mark.
2617	 *   This allows the receiver to restart by sending XON when it has
2618	 *   drained a bit.
2619	 */
2620	hwm = min(((pba << 10) * 9 / 10),
2621	    ((pba << 10) - 2 * adapter->max_frame_size));
2622
2623	if (hw->mac.type < e1000_82576) {
2624		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2625		fc->low_water = fc->high_water - 8;
2626	} else {
2627		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2628		fc->low_water = fc->high_water - 16;
2629	}
2630
2631	fc->pause_time = IGB_FC_PAUSE_TIME;
2632	fc->send_xon = TRUE;
2633
2634	/* Set Flow control, use the tunable location if sane */
2635	if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
2636		fc->requested_mode = igb_fc_setting;
2637	else
2638		fc->requested_mode = e1000_fc_none;
2639
2640	fc->current_mode = fc->requested_mode;
2641
2642	/* Issue a global reset */
2643	e1000_reset_hw(hw);
2644	E1000_WRITE_REG(hw, E1000_WUC, 0);
2645
2646	if (e1000_init_hw(hw) < 0)
2647		device_printf(dev, "Hardware Initialization Failed\n");
2648
2649	if (hw->mac.type == e1000_82580) {
2650		u32 reg;
2651
2652		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2653		/*
2654		 * 0x80000000 - enable DMA COAL
2655		 * 0x10000000 - use L0s as low power
2656		 * 0x20000000 - use L1 as low power
2657		 * X << 16 - exit dma coal when rx data exceeds X kB
2658		 * Y - upper limit to stay in dma coal in units of 32usecs
2659		 */
2660		E1000_WRITE_REG(hw, E1000_DMACR,
2661		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2662
2663		/* set hwm to PBA -  2 * max frame size */
2664		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2665		/*
2666		 * This sets the time to wait before requesting transition to
2667		 * low power state to number of usecs needed to receive 1 512
2668		 * byte frame at gigabit line rate
2669		 */
2670		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2671
2672		/* free space in tx packet buffer to wake from DMA coal */
2673		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2674		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2675
2676		/* make low power state decision controlled by DMA coal */
2677		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2678		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2679		    reg | E1000_PCIEMISC_LX_DECISION);
2680	}
2681
2682	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2683	e1000_get_phy_info(hw);
2684	e1000_check_for_link(hw);
2685	return;
2686}
2687
2688/*********************************************************************
2689 *
2690 *  Setup networking device structure and register an interface.
2691 *
2692 **********************************************************************/
2693static int
2694igb_setup_interface(device_t dev, struct adapter *adapter)
2695{
2696	struct ifnet   *ifp;
2697
2698	INIT_DEBUGOUT("igb_setup_interface: begin");
2699
2700	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2701	if (ifp == NULL) {
2702		device_printf(dev, "can not allocate ifnet structure\n");
2703		return (-1);
2704	}
2705	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2706	ifp->if_mtu = ETHERMTU;
2707	ifp->if_init =  igb_init;
2708	ifp->if_softc = adapter;
2709	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2710	ifp->if_ioctl = igb_ioctl;
2711	ifp->if_start = igb_start;
2712#if __FreeBSD_version >= 800000
2713	ifp->if_transmit = igb_mq_start;
2714	ifp->if_qflush = igb_qflush;
2715#endif
2716	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2717	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2718	IFQ_SET_READY(&ifp->if_snd);
2719
2720	ether_ifattach(ifp, adapter->hw.mac.addr);
2721
2722	ifp->if_capabilities = ifp->if_capenable = 0;
2723
2724	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2725	ifp->if_capabilities |= IFCAP_TSO4;
2726	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2727	ifp->if_capenable = ifp->if_capabilities;
2728
2729	/* Don't enable LRO by default */
2730	ifp->if_capabilities |= IFCAP_LRO;
2731
2732#ifdef DEVICE_POLLING
2733	ifp->if_capabilities |= IFCAP_POLLING;
2734#endif
2735
2736	/*
2737	 * Tell the upper layer(s) we
2738	 * support full VLAN capability.
2739	 */
2740	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2741	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2742	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2743
2744	/*
2745	** Dont turn this on by default, if vlans are
2746	** created on another pseudo device (eg. lagg)
2747	** then vlan events are not passed thru, breaking
2748	** operation, but with HW FILTER off it works. If
2749	** using vlans directly on the em driver you can
2750	** enable this and get full hardware tag filtering.
2751	*/
2752	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2753
2754	/*
2755	 * Specify the media types supported by this adapter and register
2756	 * callbacks to update media and link information
2757	 */
2758	ifmedia_init(&adapter->media, IFM_IMASK,
2759	    igb_media_change, igb_media_status);
2760	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2761	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2762		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2763			    0, NULL);
2764		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2765	} else {
2766		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2767		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2768			    0, NULL);
2769		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2770			    0, NULL);
2771		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2772			    0, NULL);
2773		if (adapter->hw.phy.type != e1000_phy_ife) {
2774			ifmedia_add(&adapter->media,
2775				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2776			ifmedia_add(&adapter->media,
2777				IFM_ETHER | IFM_1000_T, 0, NULL);
2778		}
2779	}
2780	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2781	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2782	return (0);
2783}
2784
2785
2786/*
2787 * Manage DMA'able memory.
2788 */
2789static void
2790igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2791{
2792	if (error)
2793		return;
2794	*(bus_addr_t *) arg = segs[0].ds_addr;
2795}
2796
2797static int
2798igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2799        struct igb_dma_alloc *dma, int mapflags)
2800{
2801	int error;
2802
2803	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2804				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2805				BUS_SPACE_MAXADDR,	/* lowaddr */
2806				BUS_SPACE_MAXADDR,	/* highaddr */
2807				NULL, NULL,		/* filter, filterarg */
2808				size,			/* maxsize */
2809				1,			/* nsegments */
2810				size,			/* maxsegsize */
2811				0,			/* flags */
2812				NULL,			/* lockfunc */
2813				NULL,			/* lockarg */
2814				&dma->dma_tag);
2815	if (error) {
2816		device_printf(adapter->dev,
2817		    "%s: bus_dma_tag_create failed: %d\n",
2818		    __func__, error);
2819		goto fail_0;
2820	}
2821
2822	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2823	    BUS_DMA_NOWAIT, &dma->dma_map);
2824	if (error) {
2825		device_printf(adapter->dev,
2826		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2827		    __func__, (uintmax_t)size, error);
2828		goto fail_2;
2829	}
2830
2831	dma->dma_paddr = 0;
2832	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2833	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2834	if (error || dma->dma_paddr == 0) {
2835		device_printf(adapter->dev,
2836		    "%s: bus_dmamap_load failed: %d\n",
2837		    __func__, error);
2838		goto fail_3;
2839	}
2840
2841	return (0);
2842
2843fail_3:
2844	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2845fail_2:
2846	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2847	bus_dma_tag_destroy(dma->dma_tag);
2848fail_0:
2849	dma->dma_map = NULL;
2850	dma->dma_tag = NULL;
2851
2852	return (error);
2853}
2854
2855static void
2856igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2857{
2858	if (dma->dma_tag == NULL)
2859		return;
2860	if (dma->dma_map != NULL) {
2861		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2862		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2863		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2864		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2865		dma->dma_map = NULL;
2866	}
2867	bus_dma_tag_destroy(dma->dma_tag);
2868	dma->dma_tag = NULL;
2869}
2870
2871
2872/*********************************************************************
2873 *
2874 *  Allocate memory for the transmit and receive rings, and then
2875 *  the descriptors associated with each, called only once at attach.
2876 *
2877 **********************************************************************/
2878static int
2879igb_allocate_queues(struct adapter *adapter)
2880{
2881	device_t dev = adapter->dev;
2882	struct igb_queue	*que = NULL;
2883	struct tx_ring		*txr = NULL;
2884	struct rx_ring		*rxr = NULL;
2885	int rsize, tsize, error = E1000_SUCCESS;
2886	int txconf = 0, rxconf = 0;
2887
2888	/* First allocate the top level queue structs */
2889	if (!(adapter->queues =
2890	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2891	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2892		device_printf(dev, "Unable to allocate queue memory\n");
2893		error = ENOMEM;
2894		goto fail;
2895	}
2896
2897	/* Next allocate the TX ring struct memory */
2898	if (!(adapter->tx_rings =
2899	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2900	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2901		device_printf(dev, "Unable to allocate TX ring memory\n");
2902		error = ENOMEM;
2903		goto tx_fail;
2904	}
2905
2906	/* Now allocate the RX */
2907	if (!(adapter->rx_rings =
2908	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2909	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2910		device_printf(dev, "Unable to allocate RX ring memory\n");
2911		error = ENOMEM;
2912		goto rx_fail;
2913	}
2914
2915	tsize = roundup2(adapter->num_tx_desc *
2916	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2917	/*
2918	 * Now set up the TX queues, txconf is needed to handle the
2919	 * possibility that things fail midcourse and we need to
2920	 * undo memory gracefully
2921	 */
2922	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2923		/* Set up some basics */
2924		txr = &adapter->tx_rings[i];
2925		txr->adapter = adapter;
2926		txr->me = i;
2927
2928		/* Initialize the TX lock */
2929		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2930		    device_get_nameunit(dev), txr->me);
2931		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2932
2933		if (igb_dma_malloc(adapter, tsize,
2934			&txr->txdma, BUS_DMA_NOWAIT)) {
2935			device_printf(dev,
2936			    "Unable to allocate TX Descriptor memory\n");
2937			error = ENOMEM;
2938			goto err_tx_desc;
2939		}
2940		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2941		bzero((void *)txr->tx_base, tsize);
2942
2943        	/* Now allocate transmit buffers for the ring */
2944        	if (igb_allocate_transmit_buffers(txr)) {
2945			device_printf(dev,
2946			    "Critical Failure setting up transmit buffers\n");
2947			error = ENOMEM;
2948			goto err_tx_desc;
2949        	}
2950#if __FreeBSD_version >= 800000
2951		/* Allocate a buf ring */
2952		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2953		    M_WAITOK, &txr->tx_mtx);
2954#endif
2955	}
2956
2957	/*
2958	 * Next the RX queues...
2959	 */
2960	rsize = roundup2(adapter->num_rx_desc *
2961	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2962	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2963		rxr = &adapter->rx_rings[i];
2964		rxr->adapter = adapter;
2965		rxr->me = i;
2966
2967		/* Initialize the RX lock */
2968		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2969		    device_get_nameunit(dev), txr->me);
2970		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2971
2972		if (igb_dma_malloc(adapter, rsize,
2973			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2974			device_printf(dev,
2975			    "Unable to allocate RxDescriptor memory\n");
2976			error = ENOMEM;
2977			goto err_rx_desc;
2978		}
2979		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2980		bzero((void *)rxr->rx_base, rsize);
2981
2982        	/* Allocate receive buffers for the ring*/
2983		if (igb_allocate_receive_buffers(rxr)) {
2984			device_printf(dev,
2985			    "Critical Failure setting up receive buffers\n");
2986			error = ENOMEM;
2987			goto err_rx_desc;
2988		}
2989	}
2990
2991	/*
2992	** Finally set up the queue holding structs
2993	*/
2994	for (int i = 0; i < adapter->num_queues; i++) {
2995		que = &adapter->queues[i];
2996		que->adapter = adapter;
2997		que->txr = &adapter->tx_rings[i];
2998		que->rxr = &adapter->rx_rings[i];
2999	}
3000
3001	return (0);
3002
3003err_rx_desc:
3004	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3005		igb_dma_free(adapter, &rxr->rxdma);
3006err_tx_desc:
3007	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3008		igb_dma_free(adapter, &txr->txdma);
3009	free(adapter->rx_rings, M_DEVBUF);
3010rx_fail:
3011#if __FreeBSD_version >= 800000
3012	buf_ring_free(txr->br, M_DEVBUF);
3013#endif
3014	free(adapter->tx_rings, M_DEVBUF);
3015tx_fail:
3016	free(adapter->queues, M_DEVBUF);
3017fail:
3018	return (error);
3019}
3020
3021/*********************************************************************
3022 *
3023 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3024 *  the information needed to transmit a packet on the wire. This is
3025 *  called only once at attach, setup is done every reset.
3026 *
3027 **********************************************************************/
3028static int
3029igb_allocate_transmit_buffers(struct tx_ring *txr)
3030{
3031	struct adapter *adapter = txr->adapter;
3032	device_t dev = adapter->dev;
3033	struct igb_tx_buffer *txbuf;
3034	int error, i;
3035
3036	/*
3037	 * Setup DMA descriptor areas.
3038	 */
3039	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3040			       1, 0,			/* alignment, bounds */
3041			       BUS_SPACE_MAXADDR,	/* lowaddr */
3042			       BUS_SPACE_MAXADDR,	/* highaddr */
3043			       NULL, NULL,		/* filter, filterarg */
3044			       IGB_TSO_SIZE,		/* maxsize */
3045			       IGB_MAX_SCATTER,		/* nsegments */
3046			       PAGE_SIZE,		/* maxsegsize */
3047			       0,			/* flags */
3048			       NULL,			/* lockfunc */
3049			       NULL,			/* lockfuncarg */
3050			       &txr->txtag))) {
3051		device_printf(dev,"Unable to allocate TX DMA tag\n");
3052		goto fail;
3053	}
3054
3055	if (!(txr->tx_buffers =
3056	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3057	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3058		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3059		error = ENOMEM;
3060		goto fail;
3061	}
3062
3063        /* Create the descriptor buffer dma maps */
3064	txbuf = txr->tx_buffers;
3065	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3066		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3067		if (error != 0) {
3068			device_printf(dev, "Unable to create TX DMA map\n");
3069			goto fail;
3070		}
3071	}
3072
3073	return 0;
3074fail:
3075	/* We free all, it handles case where we are in the middle */
3076	igb_free_transmit_structures(adapter);
3077	return (error);
3078}
3079
3080/*********************************************************************
3081 *
3082 *  Initialize a transmit ring.
3083 *
3084 **********************************************************************/
3085static void
3086igb_setup_transmit_ring(struct tx_ring *txr)
3087{
3088	struct adapter *adapter = txr->adapter;
3089	struct igb_tx_buffer *txbuf;
3090	int i;
3091
3092	/* Clear the old descriptor contents */
3093	IGB_TX_LOCK(txr);
3094	bzero((void *)txr->tx_base,
3095	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3096	/* Reset indices */
3097	txr->next_avail_desc = 0;
3098	txr->next_to_clean = 0;
3099
3100	/* Free any existing tx buffers. */
3101        txbuf = txr->tx_buffers;
3102	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3103		if (txbuf->m_head != NULL) {
3104			bus_dmamap_sync(txr->txtag, txbuf->map,
3105			    BUS_DMASYNC_POSTWRITE);
3106			bus_dmamap_unload(txr->txtag, txbuf->map);
3107			m_freem(txbuf->m_head);
3108			txbuf->m_head = NULL;
3109		}
3110		/* clear the watch index */
3111		txbuf->next_eop = -1;
3112        }
3113
3114	/* Set number of descriptors available */
3115	txr->tx_avail = adapter->num_tx_desc;
3116
3117	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3118	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3119	IGB_TX_UNLOCK(txr);
3120}
3121
3122/*********************************************************************
3123 *
3124 *  Initialize all transmit rings.
3125 *
3126 **********************************************************************/
3127static void
3128igb_setup_transmit_structures(struct adapter *adapter)
3129{
3130	struct tx_ring *txr = adapter->tx_rings;
3131
3132	for (int i = 0; i < adapter->num_queues; i++, txr++)
3133		igb_setup_transmit_ring(txr);
3134
3135	return;
3136}
3137
3138/*********************************************************************
3139 *
3140 *  Enable transmit unit.
3141 *
3142 **********************************************************************/
3143static void
3144igb_initialize_transmit_units(struct adapter *adapter)
3145{
3146	struct tx_ring	*txr = adapter->tx_rings;
3147	struct e1000_hw *hw = &adapter->hw;
3148	u32		tctl, txdctl;
3149
3150	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3151	tctl = txdctl = 0;
3152
3153	/* Setup the Tx Descriptor Rings */
3154	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3155		u64 bus_addr = txr->txdma.dma_paddr;
3156
3157		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3158		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3159		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3160		    (uint32_t)(bus_addr >> 32));
3161		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3162		    (uint32_t)bus_addr);
3163
3164		/* Setup the HW Tx Head and Tail descriptor pointers */
3165		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3166		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3167
3168		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3169		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3170		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3171
3172		txr->queue_status = IGB_QUEUE_IDLE;
3173
3174		txdctl |= IGB_TX_PTHRESH;
3175		txdctl |= IGB_TX_HTHRESH << 8;
3176		txdctl |= IGB_TX_WTHRESH << 16;
3177		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3178		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3179	}
3180
3181	if (adapter->hw.mac.type == e1000_vfadapt)
3182		return;
3183
3184	e1000_config_collision_dist(hw);
3185
3186	/* Program the Transmit Control Register */
3187	tctl = E1000_READ_REG(hw, E1000_TCTL);
3188	tctl &= ~E1000_TCTL_CT;
3189	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3190		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3191
3192	/* This write will effectively turn on the transmit unit. */
3193	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3194}
3195
3196/*********************************************************************
3197 *
3198 *  Free all transmit rings.
3199 *
3200 **********************************************************************/
3201static void
3202igb_free_transmit_structures(struct adapter *adapter)
3203{
3204	struct tx_ring *txr = adapter->tx_rings;
3205
3206	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3207		IGB_TX_LOCK(txr);
3208		igb_free_transmit_buffers(txr);
3209		igb_dma_free(adapter, &txr->txdma);
3210		IGB_TX_UNLOCK(txr);
3211		IGB_TX_LOCK_DESTROY(txr);
3212	}
3213	free(adapter->tx_rings, M_DEVBUF);
3214}
3215
3216/*********************************************************************
3217 *
3218 *  Free transmit ring related data structures.
3219 *
3220 **********************************************************************/
3221static void
3222igb_free_transmit_buffers(struct tx_ring *txr)
3223{
3224	struct adapter *adapter = txr->adapter;
3225	struct igb_tx_buffer *tx_buffer;
3226	int             i;
3227
3228	INIT_DEBUGOUT("free_transmit_ring: begin");
3229
3230	if (txr->tx_buffers == NULL)
3231		return;
3232
3233	tx_buffer = txr->tx_buffers;
3234	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3235		if (tx_buffer->m_head != NULL) {
3236			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3237			    BUS_DMASYNC_POSTWRITE);
3238			bus_dmamap_unload(txr->txtag,
3239			    tx_buffer->map);
3240			m_freem(tx_buffer->m_head);
3241			tx_buffer->m_head = NULL;
3242			if (tx_buffer->map != NULL) {
3243				bus_dmamap_destroy(txr->txtag,
3244				    tx_buffer->map);
3245				tx_buffer->map = NULL;
3246			}
3247		} else if (tx_buffer->map != NULL) {
3248			bus_dmamap_unload(txr->txtag,
3249			    tx_buffer->map);
3250			bus_dmamap_destroy(txr->txtag,
3251			    tx_buffer->map);
3252			tx_buffer->map = NULL;
3253		}
3254	}
3255#if __FreeBSD_version >= 800000
3256	if (txr->br != NULL)
3257		buf_ring_free(txr->br, M_DEVBUF);
3258#endif
3259	if (txr->tx_buffers != NULL) {
3260		free(txr->tx_buffers, M_DEVBUF);
3261		txr->tx_buffers = NULL;
3262	}
3263	if (txr->txtag != NULL) {
3264		bus_dma_tag_destroy(txr->txtag);
3265		txr->txtag = NULL;
3266	}
3267	return;
3268}
3269
3270/**********************************************************************
3271 *
3272 *  Setup work for hardware segmentation offload (TSO)
3273 *
3274 **********************************************************************/
3275static boolean_t
3276igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3277{
3278	struct adapter *adapter = txr->adapter;
3279	struct e1000_adv_tx_context_desc *TXD;
3280	struct igb_tx_buffer        *tx_buffer;
3281	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3282	u32 mss_l4len_idx = 0;
3283	u16 vtag = 0;
3284	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3285	struct ether_vlan_header *eh;
3286	struct ip *ip;
3287	struct tcphdr *th;
3288
3289
3290	/*
3291	 * Determine where frame payload starts.
3292	 * Jump over vlan headers if already present
3293	 */
3294	eh = mtod(mp, struct ether_vlan_header *);
3295	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3296		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3297	else
3298		ehdrlen = ETHER_HDR_LEN;
3299
3300	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3301	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3302		return FALSE;
3303
3304	/* Only supports IPV4 for now */
3305	ctxd = txr->next_avail_desc;
3306	tx_buffer = &txr->tx_buffers[ctxd];
3307	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3308
3309	ip = (struct ip *)(mp->m_data + ehdrlen);
3310	if (ip->ip_p != IPPROTO_TCP)
3311                return FALSE;   /* 0 */
3312	ip->ip_sum = 0;
3313	ip_hlen = ip->ip_hl << 2;
3314	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3315	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3316	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3317	tcp_hlen = th->th_off << 2;
3318	/*
3319	 * Calculate header length, this is used
3320	 * in the transmit desc in igb_xmit
3321	 */
3322	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3323
3324	/* VLAN MACLEN IPLEN */
3325	if (mp->m_flags & M_VLANTAG) {
3326		vtag = htole16(mp->m_pkthdr.ether_vtag);
3327		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3328	}
3329
3330	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3331	vlan_macip_lens |= ip_hlen;
3332	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3333
3334	/* ADV DTYPE TUCMD */
3335	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3336	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3337	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3338	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3339
3340	/* MSS L4LEN IDX */
3341	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3342	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3343	/* 82575 needs the queue index added */
3344	if (adapter->hw.mac.type == e1000_82575)
3345		mss_l4len_idx |= txr->me << 4;
3346	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3347
3348	TXD->seqnum_seed = htole32(0);
3349	tx_buffer->m_head = NULL;
3350	tx_buffer->next_eop = -1;
3351
3352	if (++ctxd == adapter->num_tx_desc)
3353		ctxd = 0;
3354
3355	txr->tx_avail--;
3356	txr->next_avail_desc = ctxd;
3357	return TRUE;
3358}
3359
3360
3361/*********************************************************************
3362 *
3363 *  Context Descriptor setup for VLAN or CSUM
3364 *
3365 **********************************************************************/
3366
3367static bool
3368igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3369{
3370	struct adapter *adapter = txr->adapter;
3371	struct e1000_adv_tx_context_desc *TXD;
3372	struct igb_tx_buffer        *tx_buffer;
3373	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3374	struct ether_vlan_header *eh;
3375	struct ip *ip = NULL;
3376	struct ip6_hdr *ip6;
3377	int  ehdrlen, ctxd, ip_hlen = 0;
3378	u16	etype, vtag = 0;
3379	u8	ipproto = 0;
3380	bool	offload = TRUE;
3381
3382	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3383		offload = FALSE;
3384
3385	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3386	ctxd = txr->next_avail_desc;
3387	tx_buffer = &txr->tx_buffers[ctxd];
3388	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3389
3390	/*
3391	** In advanced descriptors the vlan tag must
3392	** be placed into the context descriptor, thus
3393	** we need to be here just for that setup.
3394	*/
3395	if (mp->m_flags & M_VLANTAG) {
3396		vtag = htole16(mp->m_pkthdr.ether_vtag);
3397		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3398	} else if (offload == FALSE)
3399		return FALSE;
3400
3401	/*
3402	 * Determine where frame payload starts.
3403	 * Jump over vlan headers if already present,
3404	 * helpful for QinQ too.
3405	 */
3406	eh = mtod(mp, struct ether_vlan_header *);
3407	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3408		etype = ntohs(eh->evl_proto);
3409		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3410	} else {
3411		etype = ntohs(eh->evl_encap_proto);
3412		ehdrlen = ETHER_HDR_LEN;
3413	}
3414
3415	/* Set the ether header length */
3416	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3417
3418	switch (etype) {
3419		case ETHERTYPE_IP:
3420			ip = (struct ip *)(mp->m_data + ehdrlen);
3421			ip_hlen = ip->ip_hl << 2;
3422			if (mp->m_len < ehdrlen + ip_hlen) {
3423				offload = FALSE;
3424				break;
3425			}
3426			ipproto = ip->ip_p;
3427			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3428			break;
3429		case ETHERTYPE_IPV6:
3430			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3431			ip_hlen = sizeof(struct ip6_hdr);
3432			ipproto = ip6->ip6_nxt;
3433			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3434			break;
3435		default:
3436			offload = FALSE;
3437			break;
3438	}
3439
3440	vlan_macip_lens |= ip_hlen;
3441	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3442
3443	switch (ipproto) {
3444		case IPPROTO_TCP:
3445			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3446				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3447			break;
3448		case IPPROTO_UDP:
3449			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3450				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3451			break;
3452#if __FreeBSD_version >= 800000
3453		case IPPROTO_SCTP:
3454			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3455				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3456			break;
3457#endif
3458		default:
3459			offload = FALSE;
3460			break;
3461	}
3462
3463	/* 82575 needs the queue index added */
3464	if (adapter->hw.mac.type == e1000_82575)
3465		mss_l4len_idx = txr->me << 4;
3466
3467	/* Now copy bits into descriptor */
3468	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3469	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3470	TXD->seqnum_seed = htole32(0);
3471	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3472
3473	tx_buffer->m_head = NULL;
3474	tx_buffer->next_eop = -1;
3475
3476	/* We've consumed the first desc, adjust counters */
3477	if (++ctxd == adapter->num_tx_desc)
3478		ctxd = 0;
3479	txr->next_avail_desc = ctxd;
3480	--txr->tx_avail;
3481
3482        return (offload);
3483}
3484
3485
3486/**********************************************************************
3487 *
3488 *  Examine each tx_buffer in the used queue. If the hardware is done
3489 *  processing the packet then free associated resources. The
3490 *  tx_buffer is put back on the free queue.
3491 *
3492 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3493 **********************************************************************/
3494static bool
3495igb_txeof(struct tx_ring *txr)
3496{
3497	struct adapter	*adapter = txr->adapter;
3498        int first, last, done, processed;
3499        struct igb_tx_buffer *tx_buffer;
3500        struct e1000_tx_desc   *tx_desc, *eop_desc;
3501	struct ifnet   *ifp = adapter->ifp;
3502
3503	IGB_TX_LOCK_ASSERT(txr);
3504
3505        if (txr->tx_avail == adapter->num_tx_desc) {
3506		txr->queue_status = IGB_QUEUE_IDLE;
3507                return FALSE;
3508	}
3509
3510	processed = 0;
3511        first = txr->next_to_clean;
3512        tx_desc = &txr->tx_base[first];
3513        tx_buffer = &txr->tx_buffers[first];
3514	last = tx_buffer->next_eop;
3515        eop_desc = &txr->tx_base[last];
3516
3517	/*
3518	 * What this does is get the index of the
3519	 * first descriptor AFTER the EOP of the
3520	 * first packet, that way we can do the
3521	 * simple comparison on the inner while loop.
3522	 */
3523	if (++last == adapter->num_tx_desc)
3524 		last = 0;
3525	done = last;
3526
3527        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3528            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3529
3530        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3531		/* We clean the range of the packet */
3532		while (first != done) {
3533                	tx_desc->upper.data = 0;
3534                	tx_desc->lower.data = 0;
3535                	tx_desc->buffer_addr = 0;
3536                	++txr->tx_avail;
3537			++processed;
3538
3539			if (tx_buffer->m_head) {
3540				txr->bytes +=
3541				    tx_buffer->m_head->m_pkthdr.len;
3542				bus_dmamap_sync(txr->txtag,
3543				    tx_buffer->map,
3544				    BUS_DMASYNC_POSTWRITE);
3545				bus_dmamap_unload(txr->txtag,
3546				    tx_buffer->map);
3547
3548                        	m_freem(tx_buffer->m_head);
3549                        	tx_buffer->m_head = NULL;
3550                	}
3551			tx_buffer->next_eop = -1;
3552			txr->watchdog_time = ticks;
3553
3554	                if (++first == adapter->num_tx_desc)
3555				first = 0;
3556
3557	                tx_buffer = &txr->tx_buffers[first];
3558			tx_desc = &txr->tx_base[first];
3559		}
3560		++txr->packets;
3561		++ifp->if_opackets;
3562		/* See if we can continue to the next packet */
3563		last = tx_buffer->next_eop;
3564		if (last != -1) {
3565        		eop_desc = &txr->tx_base[last];
3566			/* Get new done point */
3567			if (++last == adapter->num_tx_desc) last = 0;
3568			done = last;
3569		} else
3570			break;
3571        }
3572        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3573            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3574
3575        txr->next_to_clean = first;
3576
3577	/*
3578	** Watchdog calculation, we know there's
3579	** work outstanding or the first return
3580	** would have been taken, so none processed
3581	** for too long indicates a hang.
3582	*/
3583	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3584		txr->queue_status = IGB_QUEUE_HUNG;
3585
3586        /*
3587         * If we have enough room, clear IFF_DRV_OACTIVE
3588         * to tell the stack that it is OK to send packets.
3589         */
3590        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3591                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3592		/* All clean, turn off the watchdog */
3593                if (txr->tx_avail == adapter->num_tx_desc) {
3594			txr->queue_status = IGB_QUEUE_IDLE;
3595			return (FALSE);
3596		}
3597        }
3598
3599	return (TRUE);
3600}
3601
3602
3603/*********************************************************************
3604 *
3605 *  Refresh mbuf buffers for RX descriptor rings
3606 *   - now keeps its own state so discards due to resource
3607 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3608 *     it just returns, keeping its placeholder, thus it can simply
3609 *     be recalled to try again.
3610 *
3611 **********************************************************************/
3612static void
3613igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3614{
3615	struct adapter		*adapter = rxr->adapter;
3616	bus_dma_segment_t	hseg[1];
3617	bus_dma_segment_t	pseg[1];
3618	struct igb_rx_buf	*rxbuf;
3619	struct mbuf		*mh, *mp;
3620	int			i, nsegs, error, cleaned;
3621
3622	i = rxr->next_to_refresh;
3623	cleaned = -1; /* Signify no completions */
3624	while (i != limit) {
3625		rxbuf = &rxr->rx_buffers[i];
3626		/* No hdr mbuf used with header split off */
3627		if (rxr->hdr_split == FALSE)
3628			goto no_split;
3629		if (rxbuf->m_head == NULL) {
3630			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3631			if (mh == NULL)
3632				goto update;
3633		} else
3634			mh = rxbuf->m_head;
3635
3636		mh->m_pkthdr.len = mh->m_len = MHLEN;
3637		mh->m_len = MHLEN;
3638		mh->m_flags |= M_PKTHDR;
3639		/* Get the memory mapping */
3640		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3641		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3642		if (error != 0) {
3643			printf("Refresh mbufs: hdr dmamap load"
3644			    " failure - %d\n", error);
3645			m_free(mh);
3646			rxbuf->m_head = NULL;
3647			goto update;
3648		}
3649		rxbuf->m_head = mh;
3650		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3651		    BUS_DMASYNC_PREREAD);
3652		rxr->rx_base[i].read.hdr_addr =
3653		    htole64(hseg[0].ds_addr);
3654no_split:
3655		if (rxbuf->m_pack == NULL) {
3656			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3657			    M_PKTHDR, adapter->rx_mbuf_sz);
3658			if (mp == NULL)
3659				goto update;
3660		} else
3661			mp = rxbuf->m_pack;
3662
3663		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3664		/* Get the memory mapping */
3665		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3666		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3667		if (error != 0) {
3668			printf("Refresh mbufs: payload dmamap load"
3669			    " failure - %d\n", error);
3670			m_free(mp);
3671			rxbuf->m_pack = NULL;
3672			goto update;
3673		}
3674		rxbuf->m_pack = mp;
3675		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3676		    BUS_DMASYNC_PREREAD);
3677		rxr->rx_base[i].read.pkt_addr =
3678		    htole64(pseg[0].ds_addr);
3679
3680		cleaned = i;
3681		/* Calculate next index */
3682		if (++i == adapter->num_rx_desc)
3683			i = 0;
3684		/* This is the work marker for refresh */
3685		rxr->next_to_refresh = i;
3686	}
3687update:
3688	if (cleaned != -1) /* If we refreshed some, bump tail */
3689		E1000_WRITE_REG(&adapter->hw,
3690		    E1000_RDT(rxr->me), cleaned);
3691	return;
3692}
3693
3694
3695/*********************************************************************
3696 *
3697 *  Allocate memory for rx_buffer structures. Since we use one
3698 *  rx_buffer per received packet, the maximum number of rx_buffer's
3699 *  that we'll need is equal to the number of receive descriptors
3700 *  that we've allocated.
3701 *
3702 **********************************************************************/
3703static int
3704igb_allocate_receive_buffers(struct rx_ring *rxr)
3705{
3706	struct	adapter 	*adapter = rxr->adapter;
3707	device_t 		dev = adapter->dev;
3708	struct igb_rx_buf	*rxbuf;
3709	int             	i, bsize, error;
3710
3711	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3712	if (!(rxr->rx_buffers =
3713	    (struct igb_rx_buf *) malloc(bsize,
3714	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3715		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3716		error = ENOMEM;
3717		goto fail;
3718	}
3719
3720	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3721				   1, 0,		/* alignment, bounds */
3722				   BUS_SPACE_MAXADDR,	/* lowaddr */
3723				   BUS_SPACE_MAXADDR,	/* highaddr */
3724				   NULL, NULL,		/* filter, filterarg */
3725				   MSIZE,		/* maxsize */
3726				   1,			/* nsegments */
3727				   MSIZE,		/* maxsegsize */
3728				   0,			/* flags */
3729				   NULL,		/* lockfunc */
3730				   NULL,		/* lockfuncarg */
3731				   &rxr->htag))) {
3732		device_printf(dev, "Unable to create RX DMA tag\n");
3733		goto fail;
3734	}
3735
3736	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3737				   1, 0,		/* alignment, bounds */
3738				   BUS_SPACE_MAXADDR,	/* lowaddr */
3739				   BUS_SPACE_MAXADDR,	/* highaddr */
3740				   NULL, NULL,		/* filter, filterarg */
3741				   MJUM9BYTES,		/* maxsize */
3742				   1,			/* nsegments */
3743				   MJUM9BYTES,		/* maxsegsize */
3744				   0,			/* flags */
3745				   NULL,		/* lockfunc */
3746				   NULL,		/* lockfuncarg */
3747				   &rxr->ptag))) {
3748		device_printf(dev, "Unable to create RX payload DMA tag\n");
3749		goto fail;
3750	}
3751
3752	for (i = 0; i < adapter->num_rx_desc; i++) {
3753		rxbuf = &rxr->rx_buffers[i];
3754		error = bus_dmamap_create(rxr->htag,
3755		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3756		if (error) {
3757			device_printf(dev,
3758			    "Unable to create RX head DMA maps\n");
3759			goto fail;
3760		}
3761		error = bus_dmamap_create(rxr->ptag,
3762		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3763		if (error) {
3764			device_printf(dev,
3765			    "Unable to create RX packet DMA maps\n");
3766			goto fail;
3767		}
3768	}
3769
3770	return (0);
3771
3772fail:
3773	/* Frees all, but can handle partial completion */
3774	igb_free_receive_structures(adapter);
3775	return (error);
3776}
3777
3778
3779static void
3780igb_free_receive_ring(struct rx_ring *rxr)
3781{
3782	struct	adapter		*adapter;
3783	struct igb_rx_buf	*rxbuf;
3784	int i;
3785
3786	adapter = rxr->adapter;
3787	for (i = 0; i < adapter->num_rx_desc; i++) {
3788		rxbuf = &rxr->rx_buffers[i];
3789		if (rxbuf->m_head != NULL) {
3790			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3791			    BUS_DMASYNC_POSTREAD);
3792			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3793			rxbuf->m_head->m_flags |= M_PKTHDR;
3794			m_freem(rxbuf->m_head);
3795		}
3796		if (rxbuf->m_pack != NULL) {
3797			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3798			    BUS_DMASYNC_POSTREAD);
3799			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3800			rxbuf->m_pack->m_flags |= M_PKTHDR;
3801			m_freem(rxbuf->m_pack);
3802		}
3803		rxbuf->m_head = NULL;
3804		rxbuf->m_pack = NULL;
3805	}
3806}
3807
3808
3809/*********************************************************************
3810 *
3811 *  Initialize a receive ring and its buffers.
3812 *
3813 **********************************************************************/
3814static int
3815igb_setup_receive_ring(struct rx_ring *rxr)
3816{
3817	struct	adapter		*adapter;
3818	struct  ifnet		*ifp;
3819	device_t		dev;
3820	struct igb_rx_buf	*rxbuf;
3821	bus_dma_segment_t	pseg[1], hseg[1];
3822	struct lro_ctrl		*lro = &rxr->lro;
3823	int			rsize, nsegs, error = 0;
3824
3825	adapter = rxr->adapter;
3826	dev = adapter->dev;
3827	ifp = adapter->ifp;
3828
3829	/* Clear the ring contents */
3830	IGB_RX_LOCK(rxr);
3831	rsize = roundup2(adapter->num_rx_desc *
3832	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3833	bzero((void *)rxr->rx_base, rsize);
3834
3835	/*
3836	** Free current RX buffer structures and their mbufs
3837	*/
3838	igb_free_receive_ring(rxr);
3839
3840	/* Configure for header split? */
3841	if (igb_header_split)
3842		rxr->hdr_split = TRUE;
3843
3844        /* Now replenish the ring mbufs */
3845	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3846		struct mbuf	*mh, *mp;
3847
3848		rxbuf = &rxr->rx_buffers[j];
3849		if (rxr->hdr_split == FALSE)
3850			goto skip_head;
3851
3852		/* First the header */
3853		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3854		if (rxbuf->m_head == NULL) {
3855			error = ENOBUFS;
3856                        goto fail;
3857		}
3858		m_adj(rxbuf->m_head, ETHER_ALIGN);
3859		mh = rxbuf->m_head;
3860		mh->m_len = mh->m_pkthdr.len = MHLEN;
3861		mh->m_flags |= M_PKTHDR;
3862		/* Get the memory mapping */
3863		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3864		    rxbuf->hmap, rxbuf->m_head, hseg,
3865		    &nsegs, BUS_DMA_NOWAIT);
3866		if (error != 0) /* Nothing elegant to do here */
3867                        goto fail;
3868		bus_dmamap_sync(rxr->htag,
3869		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3870		/* Update descriptor */
3871		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3872
3873skip_head:
3874		/* Now the payload cluster */
3875		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3876		    M_PKTHDR, adapter->rx_mbuf_sz);
3877		if (rxbuf->m_pack == NULL) {
3878			error = ENOBUFS;
3879                        goto fail;
3880		}
3881		mp = rxbuf->m_pack;
3882		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3883		/* Get the memory mapping */
3884		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3885		    rxbuf->pmap, mp, pseg,
3886		    &nsegs, BUS_DMA_NOWAIT);
3887		if (error != 0)
3888                        goto fail;
3889		bus_dmamap_sync(rxr->ptag,
3890		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3891		/* Update descriptor */
3892		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3893        }
3894
3895	/* Setup our descriptor indices */
3896	rxr->next_to_check = 0;
3897	rxr->next_to_refresh = 0;
3898	rxr->lro_enabled = FALSE;
3899	rxr->rx_split_packets = 0;
3900	rxr->rx_bytes = 0;
3901
3902	rxr->fmp = NULL;
3903	rxr->lmp = NULL;
3904	rxr->discard = FALSE;
3905
3906	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3907	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3908
3909	/*
3910	** Now set up the LRO interface, we
3911	** also only do head split when LRO
3912	** is enabled, since so often they
3913	** are undesireable in similar setups.
3914	*/
3915	if (ifp->if_capenable & IFCAP_LRO) {
3916		error = tcp_lro_init(lro);
3917		if (error) {
3918			device_printf(dev, "LRO Initialization failed!\n");
3919			goto fail;
3920		}
3921		INIT_DEBUGOUT("RX LRO Initialized\n");
3922		rxr->lro_enabled = TRUE;
3923		lro->ifp = adapter->ifp;
3924	}
3925
3926	IGB_RX_UNLOCK(rxr);
3927	return (0);
3928
3929fail:
3930	igb_free_receive_ring(rxr);
3931	IGB_RX_UNLOCK(rxr);
3932	return (error);
3933}
3934
3935/*********************************************************************
3936 *
3937 *  Initialize all receive rings.
3938 *
3939 **********************************************************************/
3940static int
3941igb_setup_receive_structures(struct adapter *adapter)
3942{
3943	struct rx_ring *rxr = adapter->rx_rings;
3944	int i;
3945
3946	for (i = 0; i < adapter->num_queues; i++, rxr++)
3947		if (igb_setup_receive_ring(rxr))
3948			goto fail;
3949
3950	return (0);
3951fail:
3952	/*
3953	 * Free RX buffers allocated so far, we will only handle
3954	 * the rings that completed, the failing case will have
3955	 * cleaned up for itself. 'i' is the endpoint.
3956	 */
3957	for (int j = 0; j > i; ++j) {
3958		rxr = &adapter->rx_rings[i];
3959		IGB_RX_LOCK(rxr);
3960		igb_free_receive_ring(rxr);
3961		IGB_RX_UNLOCK(rxr);
3962	}
3963
3964	return (ENOBUFS);
3965}
3966
3967/*********************************************************************
3968 *
3969 *  Enable receive unit.
3970 *
3971 **********************************************************************/
3972static void
3973igb_initialize_receive_units(struct adapter *adapter)
3974{
3975	struct rx_ring	*rxr = adapter->rx_rings;
3976	struct ifnet	*ifp = adapter->ifp;
3977	struct e1000_hw *hw = &adapter->hw;
3978	u32		rctl, rxcsum, psize, srrctl = 0;
3979
3980	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3981
3982	/*
3983	 * Make sure receives are disabled while setting
3984	 * up the descriptor ring
3985	 */
3986	rctl = E1000_READ_REG(hw, E1000_RCTL);
3987	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3988
3989	/*
3990	** Set up for header split
3991	*/
3992	if (rxr->hdr_split) {
3993		/* Use a standard mbuf for the header */
3994		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3995		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3996	} else
3997		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3998
3999	/*
4000	** Set up for jumbo frames
4001	*/
4002	if (ifp->if_mtu > ETHERMTU) {
4003		rctl |= E1000_RCTL_LPE;
4004		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4005			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4006			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4007		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4008			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4009			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4010		}
4011		/* Set maximum packet len */
4012		psize = adapter->max_frame_size;
4013		/* are we on a vlan? */
4014		if (adapter->ifp->if_vlantrunk != NULL)
4015			psize += VLAN_TAG_SIZE;
4016		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4017	} else {
4018		rctl &= ~E1000_RCTL_LPE;
4019		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4020		rctl |= E1000_RCTL_SZ_2048;
4021	}
4022
4023	/* Setup the Base and Length of the Rx Descriptor Rings */
4024	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4025		u64 bus_addr = rxr->rxdma.dma_paddr;
4026		u32 rxdctl;
4027
4028		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4029		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4030		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4031		    (uint32_t)(bus_addr >> 32));
4032		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4033		    (uint32_t)bus_addr);
4034		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4035		/* Enable this Queue */
4036		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4037		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4038		rxdctl &= 0xFFF00000;
4039		rxdctl |= IGB_RX_PTHRESH;
4040		rxdctl |= IGB_RX_HTHRESH << 8;
4041		rxdctl |= IGB_RX_WTHRESH << 16;
4042		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4043	}
4044
4045	/*
4046	** Setup for RX MultiQueue
4047	*/
4048	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4049	if (adapter->num_queues >1) {
4050		u32 random[10], mrqc, shift = 0;
4051		union igb_reta {
4052			u32 dword;
4053			u8  bytes[4];
4054		} reta;
4055
4056		arc4rand(&random, sizeof(random), 0);
4057		if (adapter->hw.mac.type == e1000_82575)
4058			shift = 6;
4059		/* Warning FM follows */
4060		for (int i = 0; i < 128; i++) {
4061			reta.bytes[i & 3] =
4062			    (i % adapter->num_queues) << shift;
4063			if ((i & 3) == 3)
4064				E1000_WRITE_REG(hw,
4065				    E1000_RETA(i >> 2), reta.dword);
4066		}
4067		/* Now fill in hash table */
4068		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4069		for (int i = 0; i < 10; i++)
4070			E1000_WRITE_REG_ARRAY(hw,
4071			    E1000_RSSRK(0), i, random[i]);
4072
4073		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4074		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4075		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4076		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4077		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4078		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4079		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4080		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4081
4082		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4083
4084		/*
4085		** NOTE: Receive Full-Packet Checksum Offload
4086		** is mutually exclusive with Multiqueue. However
4087		** this is not the same as TCP/IP checksums which
4088		** still work.
4089		*/
4090		rxcsum |= E1000_RXCSUM_PCSD;
4091#if __FreeBSD_version >= 800000
4092		/* For SCTP Offload */
4093		if ((hw->mac.type == e1000_82576)
4094		    && (ifp->if_capenable & IFCAP_RXCSUM))
4095			rxcsum |= E1000_RXCSUM_CRCOFL;
4096#endif
4097	} else {
4098		/* Non RSS setup */
4099		if (ifp->if_capenable & IFCAP_RXCSUM) {
4100			rxcsum |= E1000_RXCSUM_IPPCSE;
4101#if __FreeBSD_version >= 800000
4102			if (adapter->hw.mac.type == e1000_82576)
4103				rxcsum |= E1000_RXCSUM_CRCOFL;
4104#endif
4105		} else
4106			rxcsum &= ~E1000_RXCSUM_TUOFL;
4107	}
4108	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4109
4110	/* Setup the Receive Control Register */
4111	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4112	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4113		   E1000_RCTL_RDMTS_HALF |
4114		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4115	/* Strip CRC bytes. */
4116	rctl |= E1000_RCTL_SECRC;
4117	/* Make sure VLAN Filters are off */
4118	rctl &= ~E1000_RCTL_VFE;
4119	/* Don't store bad packets */
4120	rctl &= ~E1000_RCTL_SBP;
4121
4122	/* Enable Receives */
4123	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4124
4125	/*
4126	 * Setup the HW Rx Head and Tail Descriptor Pointers
4127	 *   - needs to be after enable
4128	 */
4129	for (int i = 0; i < adapter->num_queues; i++) {
4130		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4131		E1000_WRITE_REG(hw, E1000_RDT(i),
4132		     adapter->num_rx_desc - 1);
4133	}
4134	return;
4135}
4136
4137/*********************************************************************
4138 *
4139 *  Free receive rings.
4140 *
4141 **********************************************************************/
4142static void
4143igb_free_receive_structures(struct adapter *adapter)
4144{
4145	struct rx_ring *rxr = adapter->rx_rings;
4146
4147	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4148		struct lro_ctrl	*lro = &rxr->lro;
4149		igb_free_receive_buffers(rxr);
4150		tcp_lro_free(lro);
4151		igb_dma_free(adapter, &rxr->rxdma);
4152	}
4153
4154	free(adapter->rx_rings, M_DEVBUF);
4155}
4156
4157/*********************************************************************
4158 *
4159 *  Free receive ring data structures.
4160 *
4161 **********************************************************************/
4162static void
4163igb_free_receive_buffers(struct rx_ring *rxr)
4164{
4165	struct adapter		*adapter = rxr->adapter;
4166	struct igb_rx_buf	*rxbuf;
4167	int i;
4168
4169	INIT_DEBUGOUT("free_receive_structures: begin");
4170
4171	/* Cleanup any existing buffers */
4172	if (rxr->rx_buffers != NULL) {
4173		for (i = 0; i < adapter->num_rx_desc; i++) {
4174			rxbuf = &rxr->rx_buffers[i];
4175			if (rxbuf->m_head != NULL) {
4176				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4177				    BUS_DMASYNC_POSTREAD);
4178				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4179				rxbuf->m_head->m_flags |= M_PKTHDR;
4180				m_freem(rxbuf->m_head);
4181			}
4182			if (rxbuf->m_pack != NULL) {
4183				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4184				    BUS_DMASYNC_POSTREAD);
4185				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4186				rxbuf->m_pack->m_flags |= M_PKTHDR;
4187				m_freem(rxbuf->m_pack);
4188			}
4189			rxbuf->m_head = NULL;
4190			rxbuf->m_pack = NULL;
4191			if (rxbuf->hmap != NULL) {
4192				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4193				rxbuf->hmap = NULL;
4194			}
4195			if (rxbuf->pmap != NULL) {
4196				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4197				rxbuf->pmap = NULL;
4198			}
4199		}
4200		if (rxr->rx_buffers != NULL) {
4201			free(rxr->rx_buffers, M_DEVBUF);
4202			rxr->rx_buffers = NULL;
4203		}
4204	}
4205
4206	if (rxr->htag != NULL) {
4207		bus_dma_tag_destroy(rxr->htag);
4208		rxr->htag = NULL;
4209	}
4210	if (rxr->ptag != NULL) {
4211		bus_dma_tag_destroy(rxr->ptag);
4212		rxr->ptag = NULL;
4213	}
4214}
4215
4216static __inline void
4217igb_rx_discard(struct rx_ring *rxr, int i)
4218{
4219	struct igb_rx_buf	*rbuf;
4220
4221	rbuf = &rxr->rx_buffers[i];
4222
4223	/* Partially received? Free the chain */
4224	if (rxr->fmp != NULL) {
4225		rxr->fmp->m_flags |= M_PKTHDR;
4226		m_freem(rxr->fmp);
4227		rxr->fmp = NULL;
4228		rxr->lmp = NULL;
4229	}
4230
4231	/*
4232	** With advanced descriptors the writeback
4233	** clobbers the buffer addrs, so its easier
4234	** to just free the existing mbufs and take
4235	** the normal refresh path to get new buffers
4236	** and mapping.
4237	*/
4238	if (rbuf->m_head) {
4239		m_free(rbuf->m_head);
4240		rbuf->m_head = NULL;
4241	}
4242
4243	if (rbuf->m_pack) {
4244		m_free(rbuf->m_pack);
4245		rbuf->m_pack = NULL;
4246	}
4247
4248	return;
4249}
4250
4251static __inline void
4252igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4253{
4254
4255	/*
4256	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4257	 * should be computed by hardware. Also it should not have VLAN tag in
4258	 * ethernet header.
4259	 */
4260	if (rxr->lro_enabled &&
4261	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4262	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4263	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4264	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4265	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4266	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4267		/*
4268		 * Send to the stack if:
4269		 **  - LRO not enabled, or
4270		 **  - no LRO resources, or
4271		 **  - lro enqueue fails
4272		 */
4273		if (rxr->lro.lro_cnt != 0)
4274			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4275				return;
4276	}
4277	IGB_RX_UNLOCK(rxr);
4278	(*ifp->if_input)(ifp, m);
4279	IGB_RX_LOCK(rxr);
4280}
4281
4282/*********************************************************************
4283 *
4284 *  This routine executes in interrupt context. It replenishes
4285 *  the mbufs in the descriptor and sends data which has been
4286 *  dma'ed into host memory to upper layer.
4287 *
4288 *  We loop at most count times if count is > 0, or until done if
4289 *  count < 0.
4290 *
4291 *  Return TRUE if more to clean, FALSE otherwise
4292 *********************************************************************/
4293static bool
4294igb_rxeof(struct igb_queue *que, int count, int *done)
4295{
4296	struct adapter		*adapter = que->adapter;
4297	struct rx_ring		*rxr = que->rxr;
4298	struct ifnet		*ifp = adapter->ifp;
4299	struct lro_ctrl		*lro = &rxr->lro;
4300	struct lro_entry	*queued;
4301	int			i, processed = 0, rxdone = 0;
4302	u32			ptype, staterr = 0;
4303	union e1000_adv_rx_desc	*cur;
4304
4305	IGB_RX_LOCK(rxr);
4306	/* Sync the ring. */
4307	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4308	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4309
4310	/* Main clean loop */
4311	for (i = rxr->next_to_check; count != 0;) {
4312		struct mbuf		*sendmp, *mh, *mp;
4313		struct igb_rx_buf	*rxbuf;
4314		u16			hlen, plen, hdr, vtag;
4315		bool			eop = FALSE;
4316
4317		cur = &rxr->rx_base[i];
4318		staterr = le32toh(cur->wb.upper.status_error);
4319		if ((staterr & E1000_RXD_STAT_DD) == 0)
4320			break;
4321		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4322			break;
4323		count--;
4324		sendmp = mh = mp = NULL;
4325		cur->wb.upper.status_error = 0;
4326		rxbuf = &rxr->rx_buffers[i];
4327		plen = le16toh(cur->wb.upper.length);
4328		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4329		vtag = le16toh(cur->wb.upper.vlan);
4330		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4331		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4332
4333		/* Make sure all segments of a bad packet are discarded */
4334		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4335		    (rxr->discard)) {
4336			ifp->if_ierrors++;
4337			++rxr->rx_discarded;
4338			if (!eop) /* Catch subsequent segs */
4339				rxr->discard = TRUE;
4340			else
4341				rxr->discard = FALSE;
4342			igb_rx_discard(rxr, i);
4343			goto next_desc;
4344		}
4345
4346		/*
4347		** The way the hardware is configured to
4348		** split, it will ONLY use the header buffer
4349		** when header split is enabled, otherwise we
4350		** get normal behavior, ie, both header and
4351		** payload are DMA'd into the payload buffer.
4352		**
4353		** The fmp test is to catch the case where a
4354		** packet spans multiple descriptors, in that
4355		** case only the first header is valid.
4356		*/
4357		if (rxr->hdr_split && rxr->fmp == NULL) {
4358			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4359			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4360			if (hlen > IGB_HDR_BUF)
4361				hlen = IGB_HDR_BUF;
4362			mh = rxr->rx_buffers[i].m_head;
4363			mh->m_len = hlen;
4364			/* clear buf pointer for refresh */
4365			rxbuf->m_head = NULL;
4366			/*
4367			** Get the payload length, this
4368			** could be zero if its a small
4369			** packet.
4370			*/
4371			if (plen > 0) {
4372				mp = rxr->rx_buffers[i].m_pack;
4373				mp->m_len = plen;
4374				mh->m_next = mp;
4375				/* clear buf pointer */
4376				rxbuf->m_pack = NULL;
4377				rxr->rx_split_packets++;
4378			}
4379		} else {
4380			/*
4381			** Either no header split, or a
4382			** secondary piece of a fragmented
4383			** split packet.
4384			*/
4385			mh = rxr->rx_buffers[i].m_pack;
4386			mh->m_len = plen;
4387			/* clear buf info for refresh */
4388			rxbuf->m_pack = NULL;
4389		}
4390
4391		++processed; /* So we know when to refresh */
4392
4393		/* Initial frame - setup */
4394		if (rxr->fmp == NULL) {
4395			mh->m_pkthdr.len = mh->m_len;
4396			/* Save the head of the chain */
4397			rxr->fmp = mh;
4398			rxr->lmp = mh;
4399			if (mp != NULL) {
4400				/* Add payload if split */
4401				mh->m_pkthdr.len += mp->m_len;
4402				rxr->lmp = mh->m_next;
4403			}
4404		} else {
4405			/* Chain mbuf's together */
4406			rxr->lmp->m_next = mh;
4407			rxr->lmp = rxr->lmp->m_next;
4408			rxr->fmp->m_pkthdr.len += mh->m_len;
4409		}
4410
4411		if (eop) {
4412			rxr->fmp->m_pkthdr.rcvif = ifp;
4413			ifp->if_ipackets++;
4414			rxr->rx_packets++;
4415			/* capture data for AIM */
4416			rxr->packets++;
4417			rxr->bytes += rxr->fmp->m_pkthdr.len;
4418			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4419
4420			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4421				igb_rx_checksum(staterr, rxr->fmp, ptype);
4422
4423			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4424			    (staterr & E1000_RXD_STAT_VP) != 0) {
4425				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4426				rxr->fmp->m_flags |= M_VLANTAG;
4427			}
4428#if __FreeBSD_version >= 800000
4429			rxr->fmp->m_pkthdr.flowid = que->msix;
4430			rxr->fmp->m_flags |= M_FLOWID;
4431#endif
4432			sendmp = rxr->fmp;
4433			/* Make sure to set M_PKTHDR. */
4434			sendmp->m_flags |= M_PKTHDR;
4435			rxr->fmp = NULL;
4436			rxr->lmp = NULL;
4437		}
4438
4439next_desc:
4440		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4441		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4442
4443		/* Advance our pointers to the next descriptor. */
4444		if (++i == adapter->num_rx_desc)
4445			i = 0;
4446		/*
4447		** Send to the stack or LRO
4448		*/
4449		if (sendmp != NULL) {
4450			rxr->next_to_check = i;
4451			igb_rx_input(rxr, ifp, sendmp, ptype);
4452			i = rxr->next_to_check;
4453			rxdone++;
4454		}
4455
4456		/* Every 8 descriptors we go to refresh mbufs */
4457		if (processed == 8) {
4458                        igb_refresh_mbufs(rxr, i);
4459                        processed = 0;
4460		}
4461	}
4462
4463	/* Catch any remainders */
4464	if (processed != 0) {
4465		igb_refresh_mbufs(rxr, i);
4466		processed = 0;
4467	}
4468
4469	rxr->next_to_check = i;
4470
4471	/*
4472	 * Flush any outstanding LRO work
4473	 */
4474	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4475		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4476		tcp_lro_flush(lro, queued);
4477	}
4478
4479	IGB_RX_UNLOCK(rxr);
4480
4481	if (done != NULL)
4482		*done = rxdone;
4483
4484	/*
4485	** We still have cleaning to do?
4486	** Schedule another interrupt if so.
4487	*/
4488	if ((staterr & E1000_RXD_STAT_DD) != 0)
4489		return (TRUE);
4490
4491	return (FALSE);
4492}
4493
4494/*********************************************************************
4495 *
4496 *  Verify that the hardware indicated that the checksum is valid.
4497 *  Inform the stack about the status of checksum so that stack
4498 *  doesn't spend time verifying the checksum.
4499 *
4500 *********************************************************************/
4501static void
4502igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4503{
4504	u16 status = (u16)staterr;
4505	u8  errors = (u8) (staterr >> 24);
4506	int sctp;
4507
4508	/* Ignore Checksum bit is set */
4509	if (status & E1000_RXD_STAT_IXSM) {
4510		mp->m_pkthdr.csum_flags = 0;
4511		return;
4512	}
4513
4514	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4515	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4516		sctp = 1;
4517	else
4518		sctp = 0;
4519	if (status & E1000_RXD_STAT_IPCS) {
4520		/* Did it pass? */
4521		if (!(errors & E1000_RXD_ERR_IPE)) {
4522			/* IP Checksum Good */
4523			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4524			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4525		} else
4526			mp->m_pkthdr.csum_flags = 0;
4527	}
4528
4529	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4530		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4531#if __FreeBSD_version >= 800000
4532		if (sctp) /* reassign */
4533			type = CSUM_SCTP_VALID;
4534#endif
4535		/* Did it pass? */
4536		if (!(errors & E1000_RXD_ERR_TCPE)) {
4537			mp->m_pkthdr.csum_flags |= type;
4538			if (sctp == 0)
4539				mp->m_pkthdr.csum_data = htons(0xffff);
4540		}
4541	}
4542	return;
4543}
4544
4545/*
4546 * This routine is run via an vlan
4547 * config EVENT
4548 */
4549static void
4550igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4551{
4552	struct adapter	*adapter = ifp->if_softc;
4553	u32		index, bit;
4554
4555	if (ifp->if_softc !=  arg)   /* Not our event */
4556		return;
4557
4558	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4559                return;
4560
4561	IGB_CORE_LOCK(adapter);
4562	index = (vtag >> 5) & 0x7F;
4563	bit = vtag & 0x1F;
4564	adapter->shadow_vfta[index] |= (1 << bit);
4565	++adapter->num_vlans;
4566	/* Re-init to load the changes */
4567	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4568		igb_init_locked(adapter);
4569	IGB_CORE_UNLOCK(adapter);
4570}
4571
4572/*
4573 * This routine is run via an vlan
4574 * unconfig EVENT
4575 */
4576static void
4577igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4578{
4579	struct adapter	*adapter = ifp->if_softc;
4580	u32		index, bit;
4581
4582	if (ifp->if_softc !=  arg)
4583		return;
4584
4585	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4586                return;
4587
4588	IGB_CORE_LOCK(adapter);
4589	index = (vtag >> 5) & 0x7F;
4590	bit = vtag & 0x1F;
4591	adapter->shadow_vfta[index] &= ~(1 << bit);
4592	--adapter->num_vlans;
4593	/* Re-init to load the changes */
4594	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4595		igb_init_locked(adapter);
4596	IGB_CORE_UNLOCK(adapter);
4597}
4598
4599static void
4600igb_setup_vlan_hw_support(struct adapter *adapter)
4601{
4602	struct e1000_hw *hw = &adapter->hw;
4603	u32             reg;
4604
4605	/*
4606	** We get here thru init_locked, meaning
4607	** a soft reset, this has already cleared
4608	** the VFTA and other state, so if there
4609	** have been no vlan's registered do nothing.
4610	*/
4611	if (adapter->num_vlans == 0)
4612                return;
4613
4614	/*
4615	** A soft reset zero's out the VFTA, so
4616	** we need to repopulate it now.
4617	*/
4618	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4619                if (adapter->shadow_vfta[i] != 0) {
4620			if (hw->mac.type == e1000_vfadapt)
4621				e1000_vfta_set_vf(hw,
4622				    adapter->shadow_vfta[i], TRUE);
4623			else
4624				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4625                           	 i, adapter->shadow_vfta[i]);
4626		}
4627
4628	if (hw->mac.type == e1000_vfadapt)
4629		e1000_rlpml_set_vf(hw,
4630		    adapter->max_frame_size + VLAN_TAG_SIZE);
4631	else {
4632		reg = E1000_READ_REG(hw, E1000_CTRL);
4633		reg |= E1000_CTRL_VME;
4634		E1000_WRITE_REG(hw, E1000_CTRL, reg);
4635
4636		/* Enable the Filter Table */
4637		reg = E1000_READ_REG(hw, E1000_RCTL);
4638		reg &= ~E1000_RCTL_CFIEN;
4639		reg |= E1000_RCTL_VFE;
4640		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4641
4642		/* Update the frame size */
4643		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4644		    adapter->max_frame_size + VLAN_TAG_SIZE);
4645	}
4646}
4647
4648static void
4649igb_enable_intr(struct adapter *adapter)
4650{
4651	/* With RSS set up what to auto clear */
4652	if (adapter->msix_mem) {
4653		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4654		    adapter->eims_mask);
4655		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4656		    adapter->eims_mask);
4657		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4658		    adapter->eims_mask);
4659		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4660		    E1000_IMS_LSC);
4661	} else {
4662		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4663		    IMS_ENABLE_MASK);
4664	}
4665	E1000_WRITE_FLUSH(&adapter->hw);
4666
4667	return;
4668}
4669
4670static void
4671igb_disable_intr(struct adapter *adapter)
4672{
4673	if (adapter->msix_mem) {
4674		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4675		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4676	}
4677	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4678	E1000_WRITE_FLUSH(&adapter->hw);
4679	return;
4680}
4681
4682/*
4683 * Bit of a misnomer, what this really means is
4684 * to enable OS management of the system... aka
4685 * to disable special hardware management features
4686 */
4687static void
4688igb_init_manageability(struct adapter *adapter)
4689{
4690	if (adapter->has_manage) {
4691		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4692		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4693
4694		/* disable hardware interception of ARP */
4695		manc &= ~(E1000_MANC_ARP_EN);
4696
4697                /* enable receiving management packets to the host */
4698		manc |= E1000_MANC_EN_MNG2HOST;
4699		manc2h |= 1 << 5;  /* Mng Port 623 */
4700		manc2h |= 1 << 6;  /* Mng Port 664 */
4701		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4702		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4703	}
4704}
4705
4706/*
4707 * Give control back to hardware management
4708 * controller if there is one.
4709 */
4710static void
4711igb_release_manageability(struct adapter *adapter)
4712{
4713	if (adapter->has_manage) {
4714		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4715
4716		/* re-enable hardware interception of ARP */
4717		manc |= E1000_MANC_ARP_EN;
4718		manc &= ~E1000_MANC_EN_MNG2HOST;
4719
4720		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4721	}
4722}
4723
4724/*
4725 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4726 * For ASF and Pass Through versions of f/w this means that
4727 * the driver is loaded.
4728 *
4729 */
4730static void
4731igb_get_hw_control(struct adapter *adapter)
4732{
4733	u32 ctrl_ext;
4734
4735	if (adapter->hw.mac.type == e1000_vfadapt)
4736		return;
4737
4738	/* Let firmware know the driver has taken over */
4739	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4740	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4741	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4742}
4743
4744/*
4745 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4746 * For ASF and Pass Through versions of f/w this means that the
4747 * driver is no longer loaded.
4748 *
4749 */
4750static void
4751igb_release_hw_control(struct adapter *adapter)
4752{
4753	u32 ctrl_ext;
4754
4755	if (adapter->hw.mac.type == e1000_vfadapt)
4756		return;
4757
4758	/* Let firmware taken over control of h/w */
4759	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4760	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4761	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4762}
4763
4764static int
4765igb_is_valid_ether_addr(uint8_t *addr)
4766{
4767	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4768
4769	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4770		return (FALSE);
4771	}
4772
4773	return (TRUE);
4774}
4775
4776
4777/*
4778 * Enable PCI Wake On Lan capability
4779 */
4780static void
4781igb_enable_wakeup(device_t dev)
4782{
4783	u16     cap, status;
4784	u8      id;
4785
4786	/* First find the capabilities pointer*/
4787	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4788	/* Read the PM Capabilities */
4789	id = pci_read_config(dev, cap, 1);
4790	if (id != PCIY_PMG)     /* Something wrong */
4791		return;
4792	/* OK, we have the power capabilities, so
4793	   now get the status register */
4794	cap += PCIR_POWER_STATUS;
4795	status = pci_read_config(dev, cap, 2);
4796	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4797	pci_write_config(dev, cap, status, 2);
4798	return;
4799}
4800
4801static void
4802igb_led_func(void *arg, int onoff)
4803{
4804	struct adapter	*adapter = arg;
4805
4806	IGB_CORE_LOCK(adapter);
4807	if (onoff) {
4808		e1000_setup_led(&adapter->hw);
4809		e1000_led_on(&adapter->hw);
4810	} else {
4811		e1000_led_off(&adapter->hw);
4812		e1000_cleanup_led(&adapter->hw);
4813	}
4814	IGB_CORE_UNLOCK(adapter);
4815}
4816
4817/**********************************************************************
4818 *
4819 *  Update the board statistics counters.
4820 *
4821 **********************************************************************/
4822static void
4823igb_update_stats_counters(struct adapter *adapter)
4824{
4825	struct ifnet		*ifp;
4826        struct e1000_hw		*hw = &adapter->hw;
4827	struct e1000_hw_stats	*stats;
4828
4829	/*
4830	** The virtual function adapter has only a
4831	** small controlled set of stats, do only
4832	** those and return.
4833	*/
4834	if (adapter->hw.mac.type == e1000_vfadapt) {
4835		igb_update_vf_stats_counters(adapter);
4836		return;
4837	}
4838
4839	stats = (struct e1000_hw_stats	*)adapter->stats;
4840
4841	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4842	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4843		stats->symerrs +=
4844		    E1000_READ_REG(hw,E1000_SYMERRS);
4845		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4846	}
4847
4848	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4849	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4850	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4851	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4852
4853	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4854	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4855	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4856	stats->dc += E1000_READ_REG(hw, E1000_DC);
4857	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4858	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4859	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4860	/*
4861	** For watchdog management we need to know if we have been
4862	** paused during the last interval, so capture that here.
4863	*/
4864        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4865        stats->xoffrxc += adapter->pause_frames;
4866	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4867	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4868	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4869	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4870	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4871	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4872	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4873	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4874	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4875	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4876	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4877	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4878
4879	/* For the 64-bit byte counters the low dword must be read first. */
4880	/* Both registers clear on the read of the high dword */
4881
4882	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4883	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4884	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4885	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4886
4887	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4888	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4889	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4890	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4891	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4892
4893	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4894	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4895
4896	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4897	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4898	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4899	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4900	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4901	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4902	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4903	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4904	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4905	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4906
4907	/* Interrupt Counts */
4908
4909	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4910	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4911	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4912	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4913	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4914	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4915	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4916	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4917	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4918
4919	/* Host to Card Statistics */
4920
4921	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4922	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4923	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4924	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4925	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4926	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4927	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4928	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4929	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4930	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4931	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4932	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4933	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4934	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4935
4936	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4937	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4938	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4939	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4940	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4941	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4942
4943	ifp = adapter->ifp;
4944	ifp->if_collisions = stats->colc;
4945
4946	/* Rx Errors */
4947	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4948	    stats->crcerrs + stats->algnerrc +
4949	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4950
4951	/* Tx Errors */
4952	ifp->if_oerrors = stats->ecol +
4953	    stats->latecol + adapter->watchdog_events;
4954
4955	/* Driver specific counters */
4956	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4957	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4958	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4959	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4960	adapter->packet_buf_alloc_tx =
4961	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4962	adapter->packet_buf_alloc_rx =
4963	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4964}
4965
4966
4967/**********************************************************************
4968 *
4969 *  Initialize the VF board statistics counters.
4970 *
4971 **********************************************************************/
4972static void
4973igb_vf_init_stats(struct adapter *adapter)
4974{
4975        struct e1000_hw *hw = &adapter->hw;
4976	struct e1000_vf_stats	*stats;
4977
4978	stats = (struct e1000_vf_stats	*)adapter->stats;
4979	if (stats == NULL)
4980		return;
4981        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
4982        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
4983        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
4984        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
4985        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
4986}
4987
4988/**********************************************************************
4989 *
4990 *  Update the VF board statistics counters.
4991 *
4992 **********************************************************************/
4993static void
4994igb_update_vf_stats_counters(struct adapter *adapter)
4995{
4996	struct e1000_hw *hw = &adapter->hw;
4997	struct e1000_vf_stats	*stats;
4998
4999	if (adapter->link_speed == 0)
5000		return;
5001
5002	stats = (struct e1000_vf_stats	*)adapter->stats;
5003
5004	UPDATE_VF_REG(E1000_VFGPRC,
5005	    stats->last_gprc, stats->gprc);
5006	UPDATE_VF_REG(E1000_VFGORC,
5007	    stats->last_gorc, stats->gorc);
5008	UPDATE_VF_REG(E1000_VFGPTC,
5009	    stats->last_gptc, stats->gptc);
5010	UPDATE_VF_REG(E1000_VFGOTC,
5011	    stats->last_gotc, stats->gotc);
5012	UPDATE_VF_REG(E1000_VFMPRC,
5013	    stats->last_mprc, stats->mprc);
5014}
5015
5016/* Export a single 32-bit register via a read-only sysctl. */
5017static int
5018igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5019{
5020	struct adapter *adapter;
5021	u_int val;
5022
5023	adapter = oidp->oid_arg1;
5024	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5025	return (sysctl_handle_int(oidp, &val, 0, req));
5026}
5027
5028/*
5029**  Tuneable interrupt rate handler
5030*/
5031static int
5032igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5033{
5034	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5035	int			error;
5036	u32			reg, usec, rate;
5037
5038	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5039	usec = ((reg & 0x7FFC) >> 2);
5040	if (usec > 0)
5041		rate = 1000000 / usec;
5042	else
5043		rate = 0;
5044	error = sysctl_handle_int(oidp, &rate, 0, req);
5045	if (error || !req->newptr)
5046		return error;
5047	return 0;
5048}
5049
5050/*
5051 * Add sysctl variables, one per statistic, to the system.
5052 */
5053static void
5054igb_add_hw_stats(struct adapter *adapter)
5055{
5056	device_t dev = adapter->dev;
5057
5058	struct tx_ring *txr = adapter->tx_rings;
5059	struct rx_ring *rxr = adapter->rx_rings;
5060
5061	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5062	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5063	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5064	struct e1000_hw_stats *stats = adapter->stats;
5065
5066	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5067	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5068
5069#define QUEUE_NAME_LEN 32
5070	char namebuf[QUEUE_NAME_LEN];
5071
5072	/* Driver Statistics */
5073	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "link_irq",
5074			CTLFLAG_RD, &adapter->link_irq, 0,
5075			"Link MSIX IRQ Handled");
5076	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5077			CTLFLAG_RD, &adapter->dropped_pkts,
5078			"Driver dropped packets");
5079	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5080			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5081			"Driver tx dma failure in xmit");
5082	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5083			CTLFLAG_RD, &adapter->rx_overruns,
5084			"RX overruns");
5085	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5086			CTLFLAG_RD, &adapter->watchdog_events,
5087			"Watchdog timeouts");
5088
5089	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5090			CTLFLAG_RD, &adapter->device_control,
5091			"Device Control Register");
5092	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5093			CTLFLAG_RD, &adapter->rx_control,
5094			"Receiver Control Register");
5095	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5096			CTLFLAG_RD, &adapter->int_mask,
5097			"Interrupt Mask");
5098	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5099			CTLFLAG_RD, &adapter->eint_mask,
5100			"Extended Interrupt Mask");
5101	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5102			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5103			"Transmit Buffer Packet Allocation");
5104	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5105			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5106			"Receive Buffer Packet Allocation");
5107	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5108			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5109			"Flow Control High Watermark");
5110	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5111			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5112			"Flow Control Low Watermark");
5113
5114	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5115		struct lro_ctrl *lro = &rxr->lro;
5116
5117		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5118		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5119					    CTLFLAG_RD, NULL, "Queue Name");
5120		queue_list = SYSCTL_CHILDREN(queue_node);
5121
5122		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5123				CTLFLAG_RD, &adapter->queues[i],
5124				sizeof(&adapter->queues[i]),
5125				igb_sysctl_interrupt_rate_handler,
5126				"IU", "Interrupt Rate");
5127
5128		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5129				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5130				igb_sysctl_reg_handler, "IU",
5131 				"Transmit Descriptor Head");
5132		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5133				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5134				igb_sysctl_reg_handler, "IU",
5135 				"Transmit Descriptor Tail");
5136		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5137				CTLFLAG_RD, &txr->no_desc_avail,
5138				"Queue No Descriptor Available");
5139		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5140				CTLFLAG_RD, &txr->tx_packets,
5141				"Queue Packets Transmitted");
5142
5143		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5144				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5145				igb_sysctl_reg_handler, "IU",
5146				"Receive Descriptor Head");
5147		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5148				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5149				igb_sysctl_reg_handler, "IU",
5150				"Receive Descriptor Tail");
5151		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5152				CTLFLAG_RD, &rxr->rx_packets,
5153				"Queue Packets Received");
5154		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5155				CTLFLAG_RD, &rxr->rx_bytes,
5156				"Queue Bytes Received");
5157		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5158				CTLFLAG_RD, &lro->lro_queued, 0,
5159				"LRO Queued");
5160		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5161				CTLFLAG_RD, &lro->lro_flushed, 0,
5162				"LRO Flushed");
5163	}
5164
5165	/* MAC stats get their own sub node */
5166
5167	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5168				    CTLFLAG_RD, NULL, "MAC Statistics");
5169	stat_list = SYSCTL_CHILDREN(stat_node);
5170
5171	/*
5172	** VF adapter has a very limited set of stats
5173	** since its not managing the metal, so to speak.
5174	*/
5175	if (adapter->hw.mac.type == e1000_vfadapt) {
5176	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5177			CTLFLAG_RD, &stats->gprc,
5178			"Good Packets Received");
5179	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5180			CTLFLAG_RD, &stats->gptc,
5181			"Good Packets Transmitted");
5182 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5183 			CTLFLAG_RD, &stats->gorc,
5184 			"Good Octets Received");
5185 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5186 			CTLFLAG_RD, &stats->gotc,
5187 			"Good Octets Transmitted");
5188	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5189			CTLFLAG_RD, &stats->mprc,
5190			"Multicast Packets Received");
5191		return;
5192	}
5193
5194	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5195			CTLFLAG_RD, &stats->ecol,
5196			"Excessive collisions");
5197	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5198			CTLFLAG_RD, &stats->scc,
5199			"Single collisions");
5200	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5201			CTLFLAG_RD, &stats->mcc,
5202			"Multiple collisions");
5203	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5204			CTLFLAG_RD, &stats->latecol,
5205			"Late collisions");
5206	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5207			CTLFLAG_RD, &stats->colc,
5208			"Collision Count");
5209	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5210			CTLFLAG_RD, &stats->symerrs,
5211			"Symbol Errors");
5212	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5213			CTLFLAG_RD, &stats->sec,
5214			"Sequence Errors");
5215	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5216			CTLFLAG_RD, &stats->dc,
5217			"Defer Count");
5218	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5219			CTLFLAG_RD, &stats->mpc,
5220			"Missed Packets");
5221	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5222			CTLFLAG_RD, &stats->rnbc,
5223			"Receive No Buffers");
5224	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5225			CTLFLAG_RD, &stats->ruc,
5226			"Receive Undersize");
5227	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5228			CTLFLAG_RD, &stats->rfc,
5229			"Fragmented Packets Received ");
5230	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5231			CTLFLAG_RD, &stats->roc,
5232			"Oversized Packets Received");
5233	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5234			CTLFLAG_RD, &stats->rjc,
5235			"Recevied Jabber");
5236	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5237			CTLFLAG_RD, &stats->rxerrc,
5238			"Receive Errors");
5239	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5240			CTLFLAG_RD, &stats->crcerrs,
5241			"CRC errors");
5242	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5243			CTLFLAG_RD, &stats->algnerrc,
5244			"Alignment Errors");
5245	/* On 82575 these are collision counts */
5246	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5247			CTLFLAG_RD, &stats->cexterr,
5248			"Collision/Carrier extension errors");
5249	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5250			CTLFLAG_RD, &stats->xonrxc,
5251			"XON Received");
5252	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5253			CTLFLAG_RD, &stats->xontxc,
5254			"XON Transmitted");
5255	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5256			CTLFLAG_RD, &stats->xoffrxc,
5257			"XOFF Received");
5258	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5259			CTLFLAG_RD, &stats->xofftxc,
5260			"XOFF Transmitted");
5261	/* Packet Reception Stats */
5262	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5263			CTLFLAG_RD, &stats->tpr,
5264			"Total Packets Received ");
5265	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5266			CTLFLAG_RD, &stats->gprc,
5267			"Good Packets Received");
5268	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5269			CTLFLAG_RD, &stats->bprc,
5270			"Broadcast Packets Received");
5271	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5272			CTLFLAG_RD, &stats->mprc,
5273			"Multicast Packets Received");
5274	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5275			CTLFLAG_RD, &stats->prc64,
5276			"64 byte frames received ");
5277	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5278			CTLFLAG_RD, &stats->prc127,
5279			"65-127 byte frames received");
5280	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5281			CTLFLAG_RD, &stats->prc255,
5282			"128-255 byte frames received");
5283	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5284			CTLFLAG_RD, &stats->prc511,
5285			"256-511 byte frames received");
5286	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5287			CTLFLAG_RD, &stats->prc1023,
5288			"512-1023 byte frames received");
5289	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5290			CTLFLAG_RD, &stats->prc1522,
5291			"1023-1522 byte frames received");
5292 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5293 			CTLFLAG_RD, &stats->gorc,
5294 			"Good Octets Received");
5295
5296	/* Packet Transmission Stats */
5297 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5298 			CTLFLAG_RD, &stats->gotc,
5299 			"Good Octets Transmitted");
5300	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5301			CTLFLAG_RD, &stats->tpt,
5302			"Total Packets Transmitted");
5303	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5304			CTLFLAG_RD, &stats->gptc,
5305			"Good Packets Transmitted");
5306	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5307			CTLFLAG_RD, &stats->bptc,
5308			"Broadcast Packets Transmitted");
5309	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5310			CTLFLAG_RD, &stats->mptc,
5311			"Multicast Packets Transmitted");
5312	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5313			CTLFLAG_RD, &stats->ptc64,
5314			"64 byte frames transmitted ");
5315	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5316			CTLFLAG_RD, &stats->ptc127,
5317			"65-127 byte frames transmitted");
5318	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5319			CTLFLAG_RD, &stats->ptc255,
5320			"128-255 byte frames transmitted");
5321	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5322			CTLFLAG_RD, &stats->ptc511,
5323			"256-511 byte frames transmitted");
5324	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5325			CTLFLAG_RD, &stats->ptc1023,
5326			"512-1023 byte frames transmitted");
5327	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5328			CTLFLAG_RD, &stats->ptc1522,
5329			"1024-1522 byte frames transmitted");
5330	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5331			CTLFLAG_RD, &stats->tsctc,
5332			"TSO Contexts Transmitted");
5333	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5334			CTLFLAG_RD, &stats->tsctfc,
5335			"TSO Contexts Failed");
5336
5337
5338	/* Interrupt Stats */
5339
5340	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5341				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5342	int_list = SYSCTL_CHILDREN(int_node);
5343
5344	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5345			CTLFLAG_RD, &stats->iac,
5346			"Interrupt Assertion Count");
5347
5348	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5349			CTLFLAG_RD, &stats->icrxptc,
5350			"Interrupt Cause Rx Pkt Timer Expire Count");
5351
5352	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5353			CTLFLAG_RD, &stats->icrxatc,
5354			"Interrupt Cause Rx Abs Timer Expire Count");
5355
5356	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5357			CTLFLAG_RD, &stats->ictxptc,
5358			"Interrupt Cause Tx Pkt Timer Expire Count");
5359
5360	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5361			CTLFLAG_RD, &stats->ictxatc,
5362			"Interrupt Cause Tx Abs Timer Expire Count");
5363
5364	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5365			CTLFLAG_RD, &stats->ictxqec,
5366			"Interrupt Cause Tx Queue Empty Count");
5367
5368	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5369			CTLFLAG_RD, &stats->ictxqmtc,
5370			"Interrupt Cause Tx Queue Min Thresh Count");
5371
5372	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5373			CTLFLAG_RD, &stats->icrxdmtc,
5374			"Interrupt Cause Rx Desc Min Thresh Count");
5375
5376	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5377			CTLFLAG_RD, &stats->icrxoc,
5378			"Interrupt Cause Receiver Overrun Count");
5379
5380	/* Host to Card Stats */
5381
5382	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5383				    CTLFLAG_RD, NULL,
5384				    "Host to Card Statistics");
5385
5386	host_list = SYSCTL_CHILDREN(host_node);
5387
5388	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5389			CTLFLAG_RD, &stats->cbtmpc,
5390			"Circuit Breaker Tx Packet Count");
5391
5392	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5393			CTLFLAG_RD, &stats->htdpmc,
5394			"Host Transmit Discarded Packets");
5395
5396	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5397			CTLFLAG_RD, &stats->rpthc,
5398			"Rx Packets To Host");
5399
5400	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5401			CTLFLAG_RD, &stats->cbrmpc,
5402			"Circuit Breaker Rx Packet Count");
5403
5404	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5405			CTLFLAG_RD, &stats->cbrdpc,
5406			"Circuit Breaker Rx Dropped Count");
5407
5408	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5409			CTLFLAG_RD, &stats->hgptc,
5410			"Host Good Packets Tx Count");
5411
5412	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5413			CTLFLAG_RD, &stats->htcbdpc,
5414			"Host Tx Circuit Breaker Dropped Count");
5415
5416	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5417			CTLFLAG_RD, &stats->hgorc,
5418			"Host Good Octets Received Count");
5419
5420	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5421			CTLFLAG_RD, &stats->hgotc,
5422			"Host Good Octets Transmit Count");
5423
5424	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "length_errors",
5425			CTLFLAG_RD, &stats->lenerrs,
5426			"Length Errors");
5427
5428	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5429			CTLFLAG_RD, &stats->scvpc,
5430			"SerDes/SGMII Code Violation Pkt Count");
5431
5432	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5433			CTLFLAG_RD, &stats->hrmpc,
5434			"Header Redirection Missed Packet Count");
5435}
5436
5437
5438/**********************************************************************
5439 *
5440 *  This routine provides a way to dump out the adapter eeprom,
5441 *  often a useful debug/service tool. This only dumps the first
5442 *  32 words, stuff that matters is in that extent.
5443 *
5444 **********************************************************************/
5445static int
5446igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5447{
5448	struct adapter *adapter;
5449	int error;
5450	int result;
5451
5452	result = -1;
5453	error = sysctl_handle_int(oidp, &result, 0, req);
5454
5455	if (error || !req->newptr)
5456		return (error);
5457
5458	/*
5459	 * This value will cause a hex dump of the
5460	 * first 32 16-bit words of the EEPROM to
5461	 * the screen.
5462	 */
5463	if (result == 1) {
5464		adapter = (struct adapter *)arg1;
5465		igb_print_nvm_info(adapter);
5466        }
5467
5468	return (error);
5469}
5470
5471static void
5472igb_print_nvm_info(struct adapter *adapter)
5473{
5474	u16	eeprom_data;
5475	int	i, j, row = 0;
5476
5477	/* Its a bit crude, but it gets the job done */
5478	printf("\nInterface EEPROM Dump:\n");
5479	printf("Offset\n0x0000  ");
5480	for (i = 0, j = 0; i < 32; i++, j++) {
5481		if (j == 8) { /* Make the offset block */
5482			j = 0; ++row;
5483			printf("\n0x00%x0  ",row);
5484		}
5485		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5486		printf("%04x ", eeprom_data);
5487	}
5488	printf("\n");
5489}
5490
5491static void
5492igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5493	const char *description, int *limit, int value)
5494{
5495	*limit = value;
5496	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5497	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5498	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5499}
5500