if_igb.c revision 228281
191094Sdes/******************************************************************************
291094Sdes
391094Sdes  Copyright (c) 2001-2011, Intel Corporation
491094Sdes  All rights reserved.
591094Sdes
691094Sdes  Redistribution and use in source and binary forms, with or without
791094Sdes  modification, are permitted provided that the following conditions are met:
891094Sdes
991094Sdes   1. Redistributions of source code must retain the above copyright notice,
1091094Sdes      this list of conditions and the following disclaimer.
1191094Sdes
1291094Sdes   2. Redistributions in binary form must reproduce the above copyright
1391094Sdes      notice, this list of conditions and the following disclaimer in the
1491094Sdes      documentation and/or other materials provided with the distribution.
1591094Sdes
1691094Sdes   3. Neither the name of the Intel Corporation nor the names of its
1791094Sdes      contributors may be used to endorse or promote products derived from
1891094Sdes      this software without specific prior written permission.
1991094Sdes
2091094Sdes  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2191094Sdes  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2291094Sdes  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2391094Sdes  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
2491094Sdes  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2591094Sdes  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2691094Sdes  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2791094Sdes  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2891094Sdes  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2991094Sdes  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3091094Sdes  POSSIBILITY OF SUCH DAMAGE.
3191094Sdes
3291094Sdes******************************************************************************/
3391094Sdes/*$FreeBSD: head/sys/dev/e1000/if_igb.c 228281 2011-12-05 15:33:13Z luigi $*/
3491094Sdes
3591094Sdes
3691094Sdes#ifdef HAVE_KERNEL_OPTION_HEADERS
3791094Sdes#include "opt_device_polling.h"
3891094Sdes#include "opt_inet.h"
3991100Sdes#include "opt_inet6.h"
4091100Sdes#include "opt_altq.h"
4191100Sdes#endif
4291100Sdes
4391100Sdes#include <sys/param.h>
4491100Sdes#include <sys/systm.h>
4591100Sdes#if __FreeBSD_version >= 800000
4691094Sdes#include <sys/buf_ring.h>
4791094Sdes#endif
4891094Sdes#include <sys/bus.h>
4991094Sdes#include <sys/endian.h>
5091094Sdes#include <sys/kernel.h>
5191094Sdes#include <sys/kthread.h>
5291094Sdes#include <sys/malloc.h>
5391094Sdes#include <sys/mbuf.h>
5491094Sdes#include <sys/module.h>
5591094Sdes#include <sys/rman.h>
5691094Sdes#include <sys/socket.h>
5791094Sdes#include <sys/sockio.h>
5891100Sdes#include <sys/sysctl.h>
5991100Sdes#include <sys/taskqueue.h>
6091100Sdes#include <sys/eventhandler.h>
6191100Sdes#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.2.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174#if __FreeBSD_version >= 800000
175static int	igb_mq_start(struct ifnet *, struct mbuf *);
176static int	igb_mq_start_locked(struct ifnet *,
177		    struct tx_ring *, struct mbuf *);
178static void	igb_qflush(struct ifnet *);
179static void	igb_deferred_mq_start(void *, int);
180#else
181static void	igb_start(struct ifnet *);
182static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
183#endif
184static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
185static void	igb_init(void *);
186static void	igb_init_locked(struct adapter *);
187static void	igb_stop(void *);
188static void	igb_media_status(struct ifnet *, struct ifmediareq *);
189static int	igb_media_change(struct ifnet *);
190static void	igb_identify_hardware(struct adapter *);
191static int	igb_allocate_pci_resources(struct adapter *);
192static int	igb_allocate_msix(struct adapter *);
193static int	igb_allocate_legacy(struct adapter *);
194static int	igb_setup_msix(struct adapter *);
195static void	igb_free_pci_resources(struct adapter *);
196static void	igb_local_timer(void *);
197static void	igb_reset(struct adapter *);
198static int	igb_setup_interface(device_t, struct adapter *);
199static int	igb_allocate_queues(struct adapter *);
200static void	igb_configure_queues(struct adapter *);
201
202static int	igb_allocate_transmit_buffers(struct tx_ring *);
203static void	igb_setup_transmit_structures(struct adapter *);
204static void	igb_setup_transmit_ring(struct tx_ring *);
205static void	igb_initialize_transmit_units(struct adapter *);
206static void	igb_free_transmit_structures(struct adapter *);
207static void	igb_free_transmit_buffers(struct tx_ring *);
208
209static int	igb_allocate_receive_buffers(struct rx_ring *);
210static int	igb_setup_receive_structures(struct adapter *);
211static int	igb_setup_receive_ring(struct rx_ring *);
212static void	igb_initialize_receive_units(struct adapter *);
213static void	igb_free_receive_structures(struct adapter *);
214static void	igb_free_receive_buffers(struct rx_ring *);
215static void	igb_free_receive_ring(struct rx_ring *);
216
217static void	igb_enable_intr(struct adapter *);
218static void	igb_disable_intr(struct adapter *);
219static void	igb_update_stats_counters(struct adapter *);
220static bool	igb_txeof(struct tx_ring *);
221
222static __inline	void igb_rx_discard(struct rx_ring *, int);
223static __inline void igb_rx_input(struct rx_ring *,
224		    struct ifnet *, struct mbuf *, u32);
225
226static bool	igb_rxeof(struct igb_queue *, int, int *);
227static void	igb_rx_checksum(u32, struct mbuf *, u32);
228static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
229static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
230static void	igb_set_promisc(struct adapter *);
231static void	igb_disable_promisc(struct adapter *);
232static void	igb_set_multi(struct adapter *);
233static void	igb_update_link_status(struct adapter *);
234static void	igb_refresh_mbufs(struct rx_ring *, int);
235
236static void	igb_register_vlan(void *, struct ifnet *, u16);
237static void	igb_unregister_vlan(void *, struct ifnet *, u16);
238static void	igb_setup_vlan_hw_support(struct adapter *);
239
240static int	igb_xmit(struct tx_ring *, struct mbuf **);
241static int	igb_dma_malloc(struct adapter *, bus_size_t,
242		    struct igb_dma_alloc *, int);
243static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
244static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
245static void	igb_print_nvm_info(struct adapter *);
246static int 	igb_is_valid_ether_addr(u8 *);
247static void     igb_add_hw_stats(struct adapter *);
248
249static void	igb_vf_init_stats(struct adapter *);
250static void	igb_update_vf_stats_counters(struct adapter *);
251
252/* Management and WOL Support */
253static void	igb_init_manageability(struct adapter *);
254static void	igb_release_manageability(struct adapter *);
255static void     igb_get_hw_control(struct adapter *);
256static void     igb_release_hw_control(struct adapter *);
257static void     igb_enable_wakeup(device_t);
258static void     igb_led_func(void *, int);
259
260static int	igb_irq_fast(void *);
261static void	igb_msix_que(void *);
262static void	igb_msix_link(void *);
263static void	igb_handle_que(void *context, int pending);
264static void	igb_handle_link(void *context, int pending);
265
266static void	igb_set_sysctl_value(struct adapter *, const char *,
267		    const char *, int *, int);
268static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
269static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
270
271#ifdef DEVICE_POLLING
272static poll_handler_t igb_poll;
273#endif /* POLLING */
274
275/*********************************************************************
276 *  FreeBSD Device Interface Entry Points
277 *********************************************************************/
278
279static device_method_t igb_methods[] = {
280	/* Device interface */
281	DEVMETHOD(device_probe, igb_probe),
282	DEVMETHOD(device_attach, igb_attach),
283	DEVMETHOD(device_detach, igb_detach),
284	DEVMETHOD(device_shutdown, igb_shutdown),
285	DEVMETHOD(device_suspend, igb_suspend),
286	DEVMETHOD(device_resume, igb_resume),
287	{0, 0}
288};
289
290static driver_t igb_driver = {
291	"igb", igb_methods, sizeof(struct adapter),
292};
293
294static devclass_t igb_devclass;
295DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
296MODULE_DEPEND(igb, pci, 1, 1, 1);
297MODULE_DEPEND(igb, ether, 1, 1, 1);
298
299/*********************************************************************
300 *  Tunable default values.
301 *********************************************************************/
302
303static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
304
305/* Descriptor defaults */
306static int igb_rxd = IGB_DEFAULT_RXD;
307static int igb_txd = IGB_DEFAULT_TXD;
308TUNABLE_INT("hw.igb.rxd", &igb_rxd);
309TUNABLE_INT("hw.igb.txd", &igb_txd);
310SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
311    "Number of receive descriptors per queue");
312SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
313    "Number of transmit descriptors per queue");
314
315/*
316** AIM: Adaptive Interrupt Moderation
317** which means that the interrupt rate
318** is varied over time based on the
319** traffic for that interrupt vector
320*/
321static int igb_enable_aim = TRUE;
322TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
323SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
324    "Enable adaptive interrupt moderation");
325
326/*
327 * MSIX should be the default for best performance,
328 * but this allows it to be forced off for testing.
329 */
330static int igb_enable_msix = 1;
331TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
332SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
333    "Enable MSI-X interrupts");
334
335/*
336** Tuneable Interrupt rate
337*/
338static int igb_max_interrupt_rate = 8000;
339TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
340SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
341    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
342
343/*
344** Header split causes the packet header to
345** be dma'd to a seperate mbuf from the payload.
346** this can have memory alignment benefits. But
347** another plus is that small packets often fit
348** into the header and thus use no cluster. Its
349** a very workload dependent type feature.
350*/
351static bool igb_header_split = FALSE;
352TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
353SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
354    "Enable receive mbuf header split");
355
356/*
357** This will autoconfigure based on
358** the number of CPUs if left at 0.
359*/
360static int igb_num_queues = 0;
361TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
362SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
363    "Number of queues to configure, 0 indicates autoconfigure");
364
365/* How many packets rxeof tries to clean at a time */
366static int igb_rx_process_limit = 100;
367TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
368SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
369    &igb_rx_process_limit, 0,
370    "Maximum number of received packets to process at a time, -1 means unlimited");
371
372#ifdef DEV_NETMAP	/* see ixgbe.c for details */
373#include <dev/netmap/if_igb_netmap.h>
374#endif /* DEV_NETMAP */
375/*********************************************************************
376 *  Device identification routine
377 *
378 *  igb_probe determines if the driver should be loaded on
379 *  adapter based on PCI vendor/device id of the adapter.
380 *
381 *  return BUS_PROBE_DEFAULT on success, positive on failure
382 *********************************************************************/
383
384static int
385igb_probe(device_t dev)
386{
387	char		adapter_name[60];
388	uint16_t	pci_vendor_id = 0;
389	uint16_t	pci_device_id = 0;
390	uint16_t	pci_subvendor_id = 0;
391	uint16_t	pci_subdevice_id = 0;
392	igb_vendor_info_t *ent;
393
394	INIT_DEBUGOUT("igb_probe: begin");
395
396	pci_vendor_id = pci_get_vendor(dev);
397	if (pci_vendor_id != IGB_VENDOR_ID)
398		return (ENXIO);
399
400	pci_device_id = pci_get_device(dev);
401	pci_subvendor_id = pci_get_subvendor(dev);
402	pci_subdevice_id = pci_get_subdevice(dev);
403
404	ent = igb_vendor_info_array;
405	while (ent->vendor_id != 0) {
406		if ((pci_vendor_id == ent->vendor_id) &&
407		    (pci_device_id == ent->device_id) &&
408
409		    ((pci_subvendor_id == ent->subvendor_id) ||
410		    (ent->subvendor_id == PCI_ANY_ID)) &&
411
412		    ((pci_subdevice_id == ent->subdevice_id) ||
413		    (ent->subdevice_id == PCI_ANY_ID))) {
414			sprintf(adapter_name, "%s %s",
415				igb_strings[ent->index],
416				igb_driver_version);
417			device_set_desc_copy(dev, adapter_name);
418			return (BUS_PROBE_DEFAULT);
419		}
420		ent++;
421	}
422
423	return (ENXIO);
424}
425
426/*********************************************************************
427 *  Device initialization routine
428 *
429 *  The attach entry point is called when the driver is being loaded.
430 *  This routine identifies the type of hardware, allocates all resources
431 *  and initializes the hardware.
432 *
433 *  return 0 on success, positive on failure
434 *********************************************************************/
435
436static int
437igb_attach(device_t dev)
438{
439	struct adapter	*adapter;
440	int		error = 0;
441	u16		eeprom_data;
442
443	INIT_DEBUGOUT("igb_attach: begin");
444
445	if (resource_disabled("igb", device_get_unit(dev))) {
446		device_printf(dev, "Disabled by device hint\n");
447		return (ENXIO);
448	}
449
450	adapter = device_get_softc(dev);
451	adapter->dev = adapter->osdep.dev = dev;
452	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
453
454	/* SYSCTL stuff */
455	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
456	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
457	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
458	    igb_sysctl_nvm_info, "I", "NVM Information");
459
460	igb_set_sysctl_value(adapter, "enable_aim",
461	    "Interrupt Moderation", &adapter->enable_aim,
462	    igb_enable_aim);
463
464	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
465	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
466	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
467	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
468
469	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
470
471	/* Determine hardware and mac info */
472	igb_identify_hardware(adapter);
473
474	/* Setup PCI resources */
475	if (igb_allocate_pci_resources(adapter)) {
476		device_printf(dev, "Allocation of PCI resources failed\n");
477		error = ENXIO;
478		goto err_pci;
479	}
480
481	/* Do Shared Code initialization */
482	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
483		device_printf(dev, "Setup of Shared code failed\n");
484		error = ENXIO;
485		goto err_pci;
486	}
487
488	e1000_get_bus_info(&adapter->hw);
489
490	/* Sysctl for limiting the amount of work done in the taskqueue */
491	igb_set_sysctl_value(adapter, "rx_processing_limit",
492	    "max number of rx packets to process",
493	    &adapter->rx_process_limit, igb_rx_process_limit);
494
495	/*
496	 * Validate number of transmit and receive descriptors. It
497	 * must not exceed hardware maximum, and must be multiple
498	 * of E1000_DBA_ALIGN.
499	 */
500	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
501	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
502		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
503		    IGB_DEFAULT_TXD, igb_txd);
504		adapter->num_tx_desc = IGB_DEFAULT_TXD;
505	} else
506		adapter->num_tx_desc = igb_txd;
507	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
508	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
509		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
510		    IGB_DEFAULT_RXD, igb_rxd);
511		adapter->num_rx_desc = IGB_DEFAULT_RXD;
512	} else
513		adapter->num_rx_desc = igb_rxd;
514
515	adapter->hw.mac.autoneg = DO_AUTO_NEG;
516	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
517	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
518
519	/* Copper options */
520	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
521		adapter->hw.phy.mdix = AUTO_ALL_MODES;
522		adapter->hw.phy.disable_polarity_correction = FALSE;
523		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
524	}
525
526	/*
527	 * Set the frame limits assuming
528	 * standard ethernet sized frames.
529	 */
530	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
531	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
532
533	/*
534	** Allocate and Setup Queues
535	*/
536	if (igb_allocate_queues(adapter)) {
537		error = ENOMEM;
538		goto err_pci;
539	}
540
541	/* Allocate the appropriate stats memory */
542	if (adapter->vf_ifp) {
543		adapter->stats =
544		    (struct e1000_vf_stats *)malloc(sizeof \
545		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
546		igb_vf_init_stats(adapter);
547	} else
548		adapter->stats =
549		    (struct e1000_hw_stats *)malloc(sizeof \
550		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
551	if (adapter->stats == NULL) {
552		device_printf(dev, "Can not allocate stats memory\n");
553		error = ENOMEM;
554		goto err_late;
555	}
556
557	/* Allocate multicast array memory. */
558	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
559	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
560	if (adapter->mta == NULL) {
561		device_printf(dev, "Can not allocate multicast setup array\n");
562		error = ENOMEM;
563		goto err_late;
564	}
565
566	/* Some adapter-specific advanced features */
567	if (adapter->hw.mac.type >= e1000_i350) {
568		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
569		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
570		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
571		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
572		igb_set_sysctl_value(adapter, "eee_disabled",
573		    "enable Energy Efficient Ethernet",
574		    &adapter->hw.dev_spec._82575.eee_disable,
575		    TRUE);
576		e1000_set_eee_i350(&adapter->hw);
577	}
578
579	/*
580	** Start from a known state, this is
581	** important in reading the nvm and
582	** mac from that.
583	*/
584	e1000_reset_hw(&adapter->hw);
585
586	/* Make sure we have a good EEPROM before we read from it */
587	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
588		/*
589		** Some PCI-E parts fail the first check due to
590		** the link being in sleep state, call it again,
591		** if it fails a second time its a real issue.
592		*/
593		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
594			device_printf(dev,
595			    "The EEPROM Checksum Is Not Valid\n");
596			error = EIO;
597			goto err_late;
598		}
599	}
600
601	/*
602	** Copy the permanent MAC address out of the EEPROM
603	*/
604	if (e1000_read_mac_addr(&adapter->hw) < 0) {
605		device_printf(dev, "EEPROM read error while reading MAC"
606		    " address\n");
607		error = EIO;
608		goto err_late;
609	}
610	/* Check its sanity */
611	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
612		device_printf(dev, "Invalid MAC address\n");
613		error = EIO;
614		goto err_late;
615	}
616
617	/*
618	** Configure Interrupts
619	*/
620	if ((adapter->msix > 1) && (igb_enable_msix))
621		error = igb_allocate_msix(adapter);
622	else /* MSI or Legacy */
623		error = igb_allocate_legacy(adapter);
624	if (error)
625		goto err_late;
626
627	/* Setup OS specific network interface */
628	if (igb_setup_interface(dev, adapter) != 0)
629		goto err_late;
630
631	/* Now get a good starting state */
632	igb_reset(adapter);
633
634	/* Initialize statistics */
635	igb_update_stats_counters(adapter);
636
637	adapter->hw.mac.get_link_status = 1;
638	igb_update_link_status(adapter);
639
640	/* Indicate SOL/IDER usage */
641	if (e1000_check_reset_block(&adapter->hw))
642		device_printf(dev,
643		    "PHY reset is blocked due to SOL/IDER session.\n");
644
645	/* Determine if we have to control management hardware */
646	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
647
648	/*
649	 * Setup Wake-on-Lan
650	 */
651	/* APME bit in EEPROM is mapped to WUC.APME */
652	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
653	if (eeprom_data)
654		adapter->wol = E1000_WUFC_MAG;
655
656	/* Register for VLAN events */
657	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
658	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
659	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
660	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
661
662	igb_add_hw_stats(adapter);
663
664	/* Tell the stack that the interface is not active */
665	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
666
667	adapter->led_dev = led_create(igb_led_func, adapter,
668	    device_get_nameunit(dev));
669
670#ifdef DEV_NETMAP
671	igb_netmap_attach(adapter);
672#endif /* DEV_NETMAP */
673	INIT_DEBUGOUT("igb_attach: end");
674
675	return (0);
676
677err_late:
678	igb_detach(dev);
679	igb_free_transmit_structures(adapter);
680	igb_free_receive_structures(adapter);
681	igb_release_hw_control(adapter);
682	if (adapter->ifp != NULL)
683		if_free(adapter->ifp);
684err_pci:
685	igb_free_pci_resources(adapter);
686	free(adapter->mta, M_DEVBUF);
687	IGB_CORE_LOCK_DESTROY(adapter);
688
689	return (error);
690}
691
692/*********************************************************************
693 *  Device removal routine
694 *
695 *  The detach entry point is called when the driver is being removed.
696 *  This routine stops the adapter and deallocates all the resources
697 *  that were allocated for driver operation.
698 *
699 *  return 0 on success, positive on failure
700 *********************************************************************/
701
702static int
703igb_detach(device_t dev)
704{
705	struct adapter	*adapter = device_get_softc(dev);
706	struct ifnet	*ifp = adapter->ifp;
707
708	INIT_DEBUGOUT("igb_detach: begin");
709
710	/* Make sure VLANS are not using driver */
711	if (adapter->ifp->if_vlantrunk != NULL) {
712		device_printf(dev,"Vlan in use, detach first\n");
713		return (EBUSY);
714	}
715
716	ether_ifdetach(adapter->ifp);
717
718	if (adapter->led_dev != NULL)
719		led_destroy(adapter->led_dev);
720
721#ifdef DEVICE_POLLING
722	if (ifp->if_capenable & IFCAP_POLLING)
723		ether_poll_deregister(ifp);
724#endif
725
726	IGB_CORE_LOCK(adapter);
727	adapter->in_detach = 1;
728	igb_stop(adapter);
729	IGB_CORE_UNLOCK(adapter);
730
731	e1000_phy_hw_reset(&adapter->hw);
732
733	/* Give control back to firmware */
734	igb_release_manageability(adapter);
735	igb_release_hw_control(adapter);
736
737	if (adapter->wol) {
738		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
739		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
740		igb_enable_wakeup(dev);
741	}
742
743	/* Unregister VLAN events */
744	if (adapter->vlan_attach != NULL)
745		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
746	if (adapter->vlan_detach != NULL)
747		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
748
749	callout_drain(&adapter->timer);
750
751#ifdef DEV_NETMAP
752	netmap_detach(adapter->ifp);
753#endif /* DEV_NETMAP */
754	igb_free_pci_resources(adapter);
755	bus_generic_detach(dev);
756	if_free(ifp);
757
758	igb_free_transmit_structures(adapter);
759	igb_free_receive_structures(adapter);
760	if (adapter->mta != NULL)
761		free(adapter->mta, M_DEVBUF);
762
763	IGB_CORE_LOCK_DESTROY(adapter);
764
765	return (0);
766}
767
768/*********************************************************************
769 *
770 *  Shutdown entry point
771 *
772 **********************************************************************/
773
774static int
775igb_shutdown(device_t dev)
776{
777	return igb_suspend(dev);
778}
779
780/*
781 * Suspend/resume device methods.
782 */
783static int
784igb_suspend(device_t dev)
785{
786	struct adapter *adapter = device_get_softc(dev);
787
788	IGB_CORE_LOCK(adapter);
789
790	igb_stop(adapter);
791
792        igb_release_manageability(adapter);
793	igb_release_hw_control(adapter);
794
795        if (adapter->wol) {
796                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
797                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
798                igb_enable_wakeup(dev);
799        }
800
801	IGB_CORE_UNLOCK(adapter);
802
803	return bus_generic_suspend(dev);
804}
805
806static int
807igb_resume(device_t dev)
808{
809	struct adapter *adapter = device_get_softc(dev);
810	struct ifnet *ifp = adapter->ifp;
811#if __FreeBSD_version >= 800000
812	struct tx_ring *txr = adapter->tx_rings;
813#endif
814
815	IGB_CORE_LOCK(adapter);
816	igb_init_locked(adapter);
817	igb_init_manageability(adapter);
818
819	if ((ifp->if_flags & IFF_UP) &&
820	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
821#if __FreeBSD_version < 800000
822		igb_start(ifp);
823#else
824		for (int i = 0; i < adapter->num_queues; i++, txr++) {
825			IGB_TX_LOCK(txr);
826			if (!drbr_empty(ifp, txr->br))
827				igb_mq_start_locked(ifp, txr, NULL);
828			IGB_TX_UNLOCK(txr);
829		}
830#endif
831	}
832
833	IGB_CORE_UNLOCK(adapter);
834
835	return bus_generic_resume(dev);
836}
837
838
839#if __FreeBSD_version < 800000
840/*********************************************************************
841 *  Transmit entry point
842 *
843 *  igb_start is called by the stack to initiate a transmit.
844 *  The driver will remain in this routine as long as there are
845 *  packets to transmit and transmit resources are available.
846 *  In case resources are not available stack is notified and
847 *  the packet is requeued.
848 **********************************************************************/
849
850static void
851igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
852{
853	struct adapter	*adapter = ifp->if_softc;
854	struct mbuf	*m_head;
855
856	IGB_TX_LOCK_ASSERT(txr);
857
858	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
859	    IFF_DRV_RUNNING)
860		return;
861	if (!adapter->link_active)
862		return;
863
864	/* Call cleanup if number of TX descriptors low */
865	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
866		igb_txeof(txr);
867
868	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
869		if (txr->tx_avail <= IGB_MAX_SCATTER) {
870			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
871			break;
872		}
873		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
874		if (m_head == NULL)
875			break;
876		/*
877		 *  Encapsulation can modify our pointer, and or make it
878		 *  NULL on failure.  In that event, we can't requeue.
879		 */
880		if (igb_xmit(txr, &m_head)) {
881			if (m_head == NULL)
882				break;
883			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
884			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
885			break;
886		}
887
888		/* Send a copy of the frame to the BPF listener */
889		ETHER_BPF_MTAP(ifp, m_head);
890
891		/* Set watchdog on */
892		txr->watchdog_time = ticks;
893		txr->queue_status = IGB_QUEUE_WORKING;
894	}
895}
896
897/*
898 * Legacy TX driver routine, called from the
899 * stack, always uses tx[0], and spins for it.
900 * Should not be used with multiqueue tx
901 */
902static void
903igb_start(struct ifnet *ifp)
904{
905	struct adapter	*adapter = ifp->if_softc;
906	struct tx_ring	*txr = adapter->tx_rings;
907
908	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
909		IGB_TX_LOCK(txr);
910		igb_start_locked(txr, ifp);
911		IGB_TX_UNLOCK(txr);
912	}
913	return;
914}
915
916#else /* __FreeBSD_version >= 800000 */
917/*
918** Multiqueue Transmit driver
919**
920*/
921static int
922igb_mq_start(struct ifnet *ifp, struct mbuf *m)
923{
924	struct adapter		*adapter = ifp->if_softc;
925	struct igb_queue	*que;
926	struct tx_ring		*txr;
927	int 			i = 0, err = 0;
928
929	/* Which queue to use */
930	if ((m->m_flags & M_FLOWID) != 0)
931		i = m->m_pkthdr.flowid % adapter->num_queues;
932
933	txr = &adapter->tx_rings[i];
934	que = &adapter->queues[i];
935
936	if (IGB_TX_TRYLOCK(txr)) {
937		err = igb_mq_start_locked(ifp, txr, m);
938		IGB_TX_UNLOCK(txr);
939	} else {
940		err = drbr_enqueue(ifp, txr->br, m);
941		taskqueue_enqueue(que->tq, &txr->txq_task);
942	}
943
944	return (err);
945}
946
947static int
948igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
949{
950	struct adapter  *adapter = txr->adapter;
951        struct mbuf     *next;
952        int             err = 0, enq;
953
954	IGB_TX_LOCK_ASSERT(txr);
955
956	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
957	    IFF_DRV_RUNNING || adapter->link_active == 0) {
958		if (m != NULL)
959			err = drbr_enqueue(ifp, txr->br, m);
960		return (err);
961	}
962
963	enq = 0;
964	if (m == NULL) {
965		next = drbr_dequeue(ifp, txr->br);
966	} else if (drbr_needs_enqueue(ifp, txr->br)) {
967		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
968			return (err);
969		next = drbr_dequeue(ifp, txr->br);
970	} else
971		next = m;
972
973	/* Process the queue */
974	while (next != NULL) {
975		if ((err = igb_xmit(txr, &next)) != 0) {
976			if (next != NULL)
977				err = drbr_enqueue(ifp, txr->br, next);
978			break;
979		}
980		enq++;
981		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
982		ETHER_BPF_MTAP(ifp, next);
983		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
984			break;
985		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
986			igb_txeof(txr);
987		if (txr->tx_avail <= IGB_MAX_SCATTER) {
988			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
989			break;
990		}
991		next = drbr_dequeue(ifp, txr->br);
992	}
993	if (enq > 0) {
994		/* Set the watchdog */
995		txr->queue_status = IGB_QUEUE_WORKING;
996		txr->watchdog_time = ticks;
997	}
998	return (err);
999}
1000
1001/*
1002 * Called from a taskqueue to drain queued transmit packets.
1003 */
1004static void
1005igb_deferred_mq_start(void *arg, int pending)
1006{
1007	struct tx_ring *txr = arg;
1008	struct adapter *adapter = txr->adapter;
1009	struct ifnet *ifp = adapter->ifp;
1010
1011	IGB_TX_LOCK(txr);
1012	if (!drbr_empty(ifp, txr->br))
1013		igb_mq_start_locked(ifp, txr, NULL);
1014	IGB_TX_UNLOCK(txr);
1015}
1016
1017/*
1018** Flush all ring buffers
1019*/
1020static void
1021igb_qflush(struct ifnet *ifp)
1022{
1023	struct adapter	*adapter = ifp->if_softc;
1024	struct tx_ring	*txr = adapter->tx_rings;
1025	struct mbuf	*m;
1026
1027	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1028		IGB_TX_LOCK(txr);
1029		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1030			m_freem(m);
1031		IGB_TX_UNLOCK(txr);
1032	}
1033	if_qflush(ifp);
1034}
1035#endif /* __FreeBSD_version < 800000 */
1036
1037/*********************************************************************
1038 *  Ioctl entry point
1039 *
1040 *  igb_ioctl is called when the user wants to configure the
1041 *  interface.
1042 *
1043 *  return 0 on success, positive on failure
1044 **********************************************************************/
1045
1046static int
1047igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1048{
1049	struct adapter	*adapter = ifp->if_softc;
1050	struct ifreq	*ifr = (struct ifreq *)data;
1051#if defined(INET) || defined(INET6)
1052	struct ifaddr	*ifa = (struct ifaddr *)data;
1053#endif
1054	bool		avoid_reset = FALSE;
1055	int		error = 0;
1056
1057	if (adapter->in_detach)
1058		return (error);
1059
1060	switch (command) {
1061	case SIOCSIFADDR:
1062#ifdef INET
1063		if (ifa->ifa_addr->sa_family == AF_INET)
1064			avoid_reset = TRUE;
1065#endif
1066#ifdef INET6
1067		if (ifa->ifa_addr->sa_family == AF_INET6)
1068			avoid_reset = TRUE;
1069#endif
1070		/*
1071		** Calling init results in link renegotiation,
1072		** so we avoid doing it when possible.
1073		*/
1074		if (avoid_reset) {
1075			ifp->if_flags |= IFF_UP;
1076			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1077				igb_init(adapter);
1078#ifdef INET
1079			if (!(ifp->if_flags & IFF_NOARP))
1080				arp_ifinit(ifp, ifa);
1081#endif
1082		} else
1083			error = ether_ioctl(ifp, command, data);
1084		break;
1085	case SIOCSIFMTU:
1086	    {
1087		int max_frame_size;
1088
1089		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1090
1091		IGB_CORE_LOCK(adapter);
1092		max_frame_size = 9234;
1093		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1094		    ETHER_CRC_LEN) {
1095			IGB_CORE_UNLOCK(adapter);
1096			error = EINVAL;
1097			break;
1098		}
1099
1100		ifp->if_mtu = ifr->ifr_mtu;
1101		adapter->max_frame_size =
1102		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1103		igb_init_locked(adapter);
1104		IGB_CORE_UNLOCK(adapter);
1105		break;
1106	    }
1107	case SIOCSIFFLAGS:
1108		IOCTL_DEBUGOUT("ioctl rcv'd:\
1109		    SIOCSIFFLAGS (Set Interface Flags)");
1110		IGB_CORE_LOCK(adapter);
1111		if (ifp->if_flags & IFF_UP) {
1112			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1113				if ((ifp->if_flags ^ adapter->if_flags) &
1114				    (IFF_PROMISC | IFF_ALLMULTI)) {
1115					igb_disable_promisc(adapter);
1116					igb_set_promisc(adapter);
1117				}
1118			} else
1119				igb_init_locked(adapter);
1120		} else
1121			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1122				igb_stop(adapter);
1123		adapter->if_flags = ifp->if_flags;
1124		IGB_CORE_UNLOCK(adapter);
1125		break;
1126	case SIOCADDMULTI:
1127	case SIOCDELMULTI:
1128		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1129		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1130			IGB_CORE_LOCK(adapter);
1131			igb_disable_intr(adapter);
1132			igb_set_multi(adapter);
1133#ifdef DEVICE_POLLING
1134			if (!(ifp->if_capenable & IFCAP_POLLING))
1135#endif
1136				igb_enable_intr(adapter);
1137			IGB_CORE_UNLOCK(adapter);
1138		}
1139		break;
1140	case SIOCSIFMEDIA:
1141		/*
1142		** As the speed/duplex settings are being
1143		** changed, we need toreset the PHY.
1144		*/
1145		adapter->hw.phy.reset_disable = FALSE;
1146		/* Check SOL/IDER usage */
1147		IGB_CORE_LOCK(adapter);
1148		if (e1000_check_reset_block(&adapter->hw)) {
1149			IGB_CORE_UNLOCK(adapter);
1150			device_printf(adapter->dev, "Media change is"
1151			    " blocked due to SOL/IDER session.\n");
1152			break;
1153		}
1154		IGB_CORE_UNLOCK(adapter);
1155	case SIOCGIFMEDIA:
1156		IOCTL_DEBUGOUT("ioctl rcv'd: \
1157		    SIOCxIFMEDIA (Get/Set Interface Media)");
1158		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159		break;
1160	case SIOCSIFCAP:
1161	    {
1162		int mask, reinit;
1163
1164		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165		reinit = 0;
1166		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167#ifdef DEVICE_POLLING
1168		if (mask & IFCAP_POLLING) {
1169			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170				error = ether_poll_register(igb_poll, ifp);
1171				if (error)
1172					return (error);
1173				IGB_CORE_LOCK(adapter);
1174				igb_disable_intr(adapter);
1175				ifp->if_capenable |= IFCAP_POLLING;
1176				IGB_CORE_UNLOCK(adapter);
1177			} else {
1178				error = ether_poll_deregister(ifp);
1179				/* Enable interrupt even in error case */
1180				IGB_CORE_LOCK(adapter);
1181				igb_enable_intr(adapter);
1182				ifp->if_capenable &= ~IFCAP_POLLING;
1183				IGB_CORE_UNLOCK(adapter);
1184			}
1185		}
1186#endif
1187		if (mask & IFCAP_HWCSUM) {
1188			ifp->if_capenable ^= IFCAP_HWCSUM;
1189			reinit = 1;
1190		}
1191		if (mask & IFCAP_TSO4) {
1192			ifp->if_capenable ^= IFCAP_TSO4;
1193			reinit = 1;
1194		}
1195		if (mask & IFCAP_VLAN_HWTAGGING) {
1196			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1197			reinit = 1;
1198		}
1199		if (mask & IFCAP_VLAN_HWFILTER) {
1200			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1201			reinit = 1;
1202		}
1203		if (mask & IFCAP_VLAN_HWTSO) {
1204			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1205			reinit = 1;
1206		}
1207		if (mask & IFCAP_LRO) {
1208			ifp->if_capenable ^= IFCAP_LRO;
1209			reinit = 1;
1210		}
1211		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1212			igb_init(adapter);
1213		VLAN_CAPABILITIES(ifp);
1214		break;
1215	    }
1216
1217	default:
1218		error = ether_ioctl(ifp, command, data);
1219		break;
1220	}
1221
1222	return (error);
1223}
1224
1225
1226/*********************************************************************
1227 *  Init entry point
1228 *
1229 *  This routine is used in two ways. It is used by the stack as
1230 *  init entry point in network interface structure. It is also used
1231 *  by the driver as a hw/sw initialization routine to get to a
1232 *  consistent state.
1233 *
1234 *  return 0 on success, positive on failure
1235 **********************************************************************/
1236
1237static void
1238igb_init_locked(struct adapter *adapter)
1239{
1240	struct ifnet	*ifp = adapter->ifp;
1241	device_t	dev = adapter->dev;
1242
1243	INIT_DEBUGOUT("igb_init: begin");
1244
1245	IGB_CORE_LOCK_ASSERT(adapter);
1246
1247	igb_disable_intr(adapter);
1248	callout_stop(&adapter->timer);
1249
1250	/* Get the latest mac address, User can use a LAA */
1251        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1252              ETHER_ADDR_LEN);
1253
1254	/* Put the address into the Receive Address Array */
1255	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1256
1257	igb_reset(adapter);
1258	igb_update_link_status(adapter);
1259
1260	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1261
1262	/* Set hardware offload abilities */
1263	ifp->if_hwassist = 0;
1264	if (ifp->if_capenable & IFCAP_TXCSUM) {
1265		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1266#if __FreeBSD_version >= 800000
1267		if (adapter->hw.mac.type == e1000_82576)
1268			ifp->if_hwassist |= CSUM_SCTP;
1269#endif
1270	}
1271
1272	if (ifp->if_capenable & IFCAP_TSO4)
1273		ifp->if_hwassist |= CSUM_TSO;
1274
1275	/* Configure for OS presence */
1276	igb_init_manageability(adapter);
1277
1278	/* Prepare transmit descriptors and buffers */
1279	igb_setup_transmit_structures(adapter);
1280	igb_initialize_transmit_units(adapter);
1281
1282	/* Setup Multicast table */
1283	igb_set_multi(adapter);
1284
1285	/*
1286	** Figure out the desired mbuf pool
1287	** for doing jumbo/packetsplit
1288	*/
1289	if (adapter->max_frame_size <= 2048)
1290		adapter->rx_mbuf_sz = MCLBYTES;
1291	else if (adapter->max_frame_size <= 4096)
1292		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1293	else
1294		adapter->rx_mbuf_sz = MJUM9BYTES;
1295
1296	/* Prepare receive descriptors and buffers */
1297	if (igb_setup_receive_structures(adapter)) {
1298		device_printf(dev, "Could not setup receive structures\n");
1299		return;
1300	}
1301	igb_initialize_receive_units(adapter);
1302
1303        /* Enable VLAN support */
1304	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1305		igb_setup_vlan_hw_support(adapter);
1306
1307	/* Don't lose promiscuous settings */
1308	igb_set_promisc(adapter);
1309
1310	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1311	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1312
1313	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1314	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1315
1316	if (adapter->msix > 1) /* Set up queue routing */
1317		igb_configure_queues(adapter);
1318
1319	/* this clears any pending interrupts */
1320	E1000_READ_REG(&adapter->hw, E1000_ICR);
1321#ifdef DEVICE_POLLING
1322	/*
1323	 * Only enable interrupts if we are not polling, make sure
1324	 * they are off otherwise.
1325	 */
1326	if (ifp->if_capenable & IFCAP_POLLING)
1327		igb_disable_intr(adapter);
1328	else
1329#endif /* DEVICE_POLLING */
1330	{
1331		igb_enable_intr(adapter);
1332		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1333	}
1334
1335	/* Set Energy Efficient Ethernet */
1336	e1000_set_eee_i350(&adapter->hw);
1337
1338	/* Don't reset the phy next time init gets called */
1339	adapter->hw.phy.reset_disable = TRUE;
1340}
1341
1342static void
1343igb_init(void *arg)
1344{
1345	struct adapter *adapter = arg;
1346
1347	IGB_CORE_LOCK(adapter);
1348	igb_init_locked(adapter);
1349	IGB_CORE_UNLOCK(adapter);
1350}
1351
1352
1353static void
1354igb_handle_que(void *context, int pending)
1355{
1356	struct igb_queue *que = context;
1357	struct adapter *adapter = que->adapter;
1358	struct tx_ring *txr = que->txr;
1359	struct ifnet	*ifp = adapter->ifp;
1360
1361	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1362		bool	more;
1363
1364		more = igb_rxeof(que, -1, NULL);
1365
1366		IGB_TX_LOCK(txr);
1367		if (igb_txeof(txr))
1368			more = TRUE;
1369#if __FreeBSD_version >= 800000
1370		if (!drbr_empty(ifp, txr->br))
1371			igb_mq_start_locked(ifp, txr, NULL);
1372#else
1373		igb_start_locked(txr, ifp);
1374#endif
1375		IGB_TX_UNLOCK(txr);
1376		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1377			taskqueue_enqueue(que->tq, &que->que_task);
1378			return;
1379		}
1380	}
1381
1382#ifdef DEVICE_POLLING
1383	if (ifp->if_capenable & IFCAP_POLLING)
1384		return;
1385#endif
1386	/* Reenable this interrupt */
1387	if (que->eims)
1388		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1389	else
1390		igb_enable_intr(adapter);
1391}
1392
1393/* Deal with link in a sleepable context */
1394static void
1395igb_handle_link(void *context, int pending)
1396{
1397	struct adapter *adapter = context;
1398
1399	adapter->hw.mac.get_link_status = 1;
1400	igb_update_link_status(adapter);
1401}
1402
1403/*********************************************************************
1404 *
1405 *  MSI/Legacy Deferred
1406 *  Interrupt Service routine
1407 *
1408 *********************************************************************/
1409static int
1410igb_irq_fast(void *arg)
1411{
1412	struct adapter		*adapter = arg;
1413	struct igb_queue	*que = adapter->queues;
1414	u32			reg_icr;
1415
1416
1417	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1418
1419	/* Hot eject?  */
1420	if (reg_icr == 0xffffffff)
1421		return FILTER_STRAY;
1422
1423	/* Definitely not our interrupt.  */
1424	if (reg_icr == 0x0)
1425		return FILTER_STRAY;
1426
1427	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1428		return FILTER_STRAY;
1429
1430	/*
1431	 * Mask interrupts until the taskqueue is finished running.  This is
1432	 * cheap, just assume that it is needed.  This also works around the
1433	 * MSI message reordering errata on certain systems.
1434	 */
1435	igb_disable_intr(adapter);
1436	taskqueue_enqueue(que->tq, &que->que_task);
1437
1438	/* Link status change */
1439	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1440		taskqueue_enqueue(que->tq, &adapter->link_task);
1441
1442	if (reg_icr & E1000_ICR_RXO)
1443		adapter->rx_overruns++;
1444	return FILTER_HANDLED;
1445}
1446
1447#ifdef DEVICE_POLLING
1448/*********************************************************************
1449 *
1450 *  Legacy polling routine : if using this code you MUST be sure that
1451 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1452 *
1453 *********************************************************************/
1454#if __FreeBSD_version >= 800000
1455#define POLL_RETURN_COUNT(a) (a)
1456static int
1457#else
1458#define POLL_RETURN_COUNT(a)
1459static void
1460#endif
1461igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1462{
1463	struct adapter		*adapter = ifp->if_softc;
1464	struct igb_queue	*que = adapter->queues;
1465	struct tx_ring		*txr = adapter->tx_rings;
1466	u32			reg_icr, rx_done = 0;
1467	u32			loop = IGB_MAX_LOOP;
1468	bool			more;
1469
1470	IGB_CORE_LOCK(adapter);
1471	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1472		IGB_CORE_UNLOCK(adapter);
1473		return POLL_RETURN_COUNT(rx_done);
1474	}
1475
1476	if (cmd == POLL_AND_CHECK_STATUS) {
1477		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1478		/* Link status change */
1479		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1480			igb_handle_link(adapter, 0);
1481
1482		if (reg_icr & E1000_ICR_RXO)
1483			adapter->rx_overruns++;
1484	}
1485	IGB_CORE_UNLOCK(adapter);
1486
1487	igb_rxeof(que, count, &rx_done);
1488
1489	IGB_TX_LOCK(txr);
1490	do {
1491		more = igb_txeof(txr);
1492	} while (loop-- && more);
1493#if __FreeBSD_version >= 800000
1494	if (!drbr_empty(ifp, txr->br))
1495		igb_mq_start_locked(ifp, txr, NULL);
1496#else
1497	igb_start_locked(txr, ifp);
1498#endif
1499	IGB_TX_UNLOCK(txr);
1500	return POLL_RETURN_COUNT(rx_done);
1501}
1502#endif /* DEVICE_POLLING */
1503
1504/*********************************************************************
1505 *
1506 *  MSIX TX Interrupt Service routine
1507 *
1508 **********************************************************************/
1509static void
1510igb_msix_que(void *arg)
1511{
1512	struct igb_queue *que = arg;
1513	struct adapter *adapter = que->adapter;
1514	struct tx_ring *txr = que->txr;
1515	struct rx_ring *rxr = que->rxr;
1516	u32		newitr = 0;
1517	bool		more_tx, more_rx;
1518
1519	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1520	++que->irqs;
1521
1522	IGB_TX_LOCK(txr);
1523	more_tx = igb_txeof(txr);
1524	IGB_TX_UNLOCK(txr);
1525
1526	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1527
1528	if (adapter->enable_aim == FALSE)
1529		goto no_calc;
1530	/*
1531	** Do Adaptive Interrupt Moderation:
1532        **  - Write out last calculated setting
1533	**  - Calculate based on average size over
1534	**    the last interval.
1535	*/
1536        if (que->eitr_setting)
1537                E1000_WRITE_REG(&adapter->hw,
1538                    E1000_EITR(que->msix), que->eitr_setting);
1539
1540        que->eitr_setting = 0;
1541
1542        /* Idle, do nothing */
1543        if ((txr->bytes == 0) && (rxr->bytes == 0))
1544                goto no_calc;
1545
1546        /* Used half Default if sub-gig */
1547        if (adapter->link_speed != 1000)
1548                newitr = IGB_DEFAULT_ITR / 2;
1549        else {
1550		if ((txr->bytes) && (txr->packets))
1551                	newitr = txr->bytes/txr->packets;
1552		if ((rxr->bytes) && (rxr->packets))
1553			newitr = max(newitr,
1554			    (rxr->bytes / rxr->packets));
1555                newitr += 24; /* account for hardware frame, crc */
1556		/* set an upper boundary */
1557		newitr = min(newitr, 3000);
1558		/* Be nice to the mid range */
1559                if ((newitr > 300) && (newitr < 1200))
1560                        newitr = (newitr / 3);
1561                else
1562                        newitr = (newitr / 2);
1563        }
1564        newitr &= 0x7FFC;  /* Mask invalid bits */
1565        if (adapter->hw.mac.type == e1000_82575)
1566                newitr |= newitr << 16;
1567        else
1568                newitr |= E1000_EITR_CNT_IGNR;
1569
1570        /* save for next interrupt */
1571        que->eitr_setting = newitr;
1572
1573        /* Reset state */
1574        txr->bytes = 0;
1575        txr->packets = 0;
1576        rxr->bytes = 0;
1577        rxr->packets = 0;
1578
1579no_calc:
1580	/* Schedule a clean task if needed*/
1581	if (more_tx || more_rx ||
1582	    (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1583		taskqueue_enqueue(que->tq, &que->que_task);
1584	else
1585		/* Reenable this interrupt */
1586		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1587	return;
1588}
1589
1590
1591/*********************************************************************
1592 *
1593 *  MSIX Link Interrupt Service routine
1594 *
1595 **********************************************************************/
1596
1597static void
1598igb_msix_link(void *arg)
1599{
1600	struct adapter	*adapter = arg;
1601	u32       	icr;
1602
1603	++adapter->link_irq;
1604	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1605	if (!(icr & E1000_ICR_LSC))
1606		goto spurious;
1607	igb_handle_link(adapter, 0);
1608
1609spurious:
1610	/* Rearm */
1611	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1612	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1613	return;
1614}
1615
1616
1617/*********************************************************************
1618 *
1619 *  Media Ioctl callback
1620 *
1621 *  This routine is called whenever the user queries the status of
1622 *  the interface using ifconfig.
1623 *
1624 **********************************************************************/
1625static void
1626igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1627{
1628	struct adapter *adapter = ifp->if_softc;
1629	u_char fiber_type = IFM_1000_SX;
1630
1631	INIT_DEBUGOUT("igb_media_status: begin");
1632
1633	IGB_CORE_LOCK(adapter);
1634	igb_update_link_status(adapter);
1635
1636	ifmr->ifm_status = IFM_AVALID;
1637	ifmr->ifm_active = IFM_ETHER;
1638
1639	if (!adapter->link_active) {
1640		IGB_CORE_UNLOCK(adapter);
1641		return;
1642	}
1643
1644	ifmr->ifm_status |= IFM_ACTIVE;
1645
1646	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1647	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1648		ifmr->ifm_active |= fiber_type | IFM_FDX;
1649	else {
1650		switch (adapter->link_speed) {
1651		case 10:
1652			ifmr->ifm_active |= IFM_10_T;
1653			break;
1654		case 100:
1655			ifmr->ifm_active |= IFM_100_TX;
1656			break;
1657		case 1000:
1658			ifmr->ifm_active |= IFM_1000_T;
1659			break;
1660		}
1661		if (adapter->link_duplex == FULL_DUPLEX)
1662			ifmr->ifm_active |= IFM_FDX;
1663		else
1664			ifmr->ifm_active |= IFM_HDX;
1665	}
1666	IGB_CORE_UNLOCK(adapter);
1667}
1668
1669/*********************************************************************
1670 *
1671 *  Media Ioctl callback
1672 *
1673 *  This routine is called when the user changes speed/duplex using
1674 *  media/mediopt option with ifconfig.
1675 *
1676 **********************************************************************/
1677static int
1678igb_media_change(struct ifnet *ifp)
1679{
1680	struct adapter *adapter = ifp->if_softc;
1681	struct ifmedia  *ifm = &adapter->media;
1682
1683	INIT_DEBUGOUT("igb_media_change: begin");
1684
1685	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1686		return (EINVAL);
1687
1688	IGB_CORE_LOCK(adapter);
1689	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1690	case IFM_AUTO:
1691		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1692		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1693		break;
1694	case IFM_1000_LX:
1695	case IFM_1000_SX:
1696	case IFM_1000_T:
1697		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1698		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1699		break;
1700	case IFM_100_TX:
1701		adapter->hw.mac.autoneg = FALSE;
1702		adapter->hw.phy.autoneg_advertised = 0;
1703		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1704			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1705		else
1706			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1707		break;
1708	case IFM_10_T:
1709		adapter->hw.mac.autoneg = FALSE;
1710		adapter->hw.phy.autoneg_advertised = 0;
1711		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1712			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1713		else
1714			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1715		break;
1716	default:
1717		device_printf(adapter->dev, "Unsupported media type\n");
1718	}
1719
1720	igb_init_locked(adapter);
1721	IGB_CORE_UNLOCK(adapter);
1722
1723	return (0);
1724}
1725
1726
1727/*********************************************************************
1728 *
1729 *  This routine maps the mbufs to Advanced TX descriptors.
1730 *  used by the 82575 adapter.
1731 *
1732 **********************************************************************/
1733
1734static int
1735igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1736{
1737	struct adapter		*adapter = txr->adapter;
1738	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1739	bus_dmamap_t		map;
1740	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1741	union e1000_adv_tx_desc	*txd = NULL;
1742	struct mbuf		*m_head;
1743	u32			olinfo_status = 0, cmd_type_len = 0;
1744	int			nsegs, i, j, error, first, last = 0;
1745	u32			hdrlen = 0;
1746
1747	m_head = *m_headp;
1748
1749
1750	/* Set basic descriptor constants */
1751	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1752	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1753	if (m_head->m_flags & M_VLANTAG)
1754		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1755
1756	/*
1757         * Map the packet for DMA.
1758	 *
1759	 * Capture the first descriptor index,
1760	 * this descriptor will have the index
1761	 * of the EOP which is the only one that
1762	 * now gets a DONE bit writeback.
1763	 */
1764	first = txr->next_avail_desc;
1765	tx_buffer = &txr->tx_buffers[first];
1766	tx_buffer_mapped = tx_buffer;
1767	map = tx_buffer->map;
1768
1769	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1770	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1771
1772	if (error == EFBIG) {
1773		struct mbuf *m;
1774
1775		m = m_defrag(*m_headp, M_DONTWAIT);
1776		if (m == NULL) {
1777			adapter->mbuf_defrag_failed++;
1778			m_freem(*m_headp);
1779			*m_headp = NULL;
1780			return (ENOBUFS);
1781		}
1782		*m_headp = m;
1783
1784		/* Try it again */
1785		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1786		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1787
1788		if (error == ENOMEM) {
1789			adapter->no_tx_dma_setup++;
1790			return (error);
1791		} else if (error != 0) {
1792			adapter->no_tx_dma_setup++;
1793			m_freem(*m_headp);
1794			*m_headp = NULL;
1795			return (error);
1796		}
1797	} else if (error == ENOMEM) {
1798		adapter->no_tx_dma_setup++;
1799		return (error);
1800	} else if (error != 0) {
1801		adapter->no_tx_dma_setup++;
1802		m_freem(*m_headp);
1803		*m_headp = NULL;
1804		return (error);
1805	}
1806
1807	/* Check again to be sure we have enough descriptors */
1808        if (nsegs > (txr->tx_avail - 2)) {
1809                txr->no_desc_avail++;
1810		bus_dmamap_unload(txr->txtag, map);
1811		return (ENOBUFS);
1812        }
1813	m_head = *m_headp;
1814
1815        /*
1816         * Set up the context descriptor:
1817         * used when any hardware offload is done.
1818	 * This includes CSUM, VLAN, and TSO. It
1819	 * will use the first descriptor.
1820         */
1821        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1822		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1823			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1824			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1825			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1826		} else
1827			return (ENXIO);
1828	} else if (igb_tx_ctx_setup(txr, m_head))
1829		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1830
1831	/* Calculate payload length */
1832	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1833	    << E1000_ADVTXD_PAYLEN_SHIFT);
1834
1835	/* 82575 needs the queue index added */
1836	if (adapter->hw.mac.type == e1000_82575)
1837		olinfo_status |= txr->me << 4;
1838
1839	/* Set up our transmit descriptors */
1840	i = txr->next_avail_desc;
1841	for (j = 0; j < nsegs; j++) {
1842		bus_size_t seg_len;
1843		bus_addr_t seg_addr;
1844
1845		tx_buffer = &txr->tx_buffers[i];
1846		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1847		seg_addr = segs[j].ds_addr;
1848		seg_len  = segs[j].ds_len;
1849
1850		txd->read.buffer_addr = htole64(seg_addr);
1851		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1852		txd->read.olinfo_status = htole32(olinfo_status);
1853		last = i;
1854		if (++i == adapter->num_tx_desc)
1855			i = 0;
1856		tx_buffer->m_head = NULL;
1857		tx_buffer->next_eop = -1;
1858	}
1859
1860	txr->next_avail_desc = i;
1861	txr->tx_avail -= nsegs;
1862
1863        tx_buffer->m_head = m_head;
1864	tx_buffer_mapped->map = tx_buffer->map;
1865	tx_buffer->map = map;
1866        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1867
1868        /*
1869         * Last Descriptor of Packet
1870	 * needs End Of Packet (EOP)
1871	 * and Report Status (RS)
1872         */
1873        txd->read.cmd_type_len |=
1874	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1875	/*
1876	 * Keep track in the first buffer which
1877	 * descriptor will be written back
1878	 */
1879	tx_buffer = &txr->tx_buffers[first];
1880	tx_buffer->next_eop = last;
1881	txr->watchdog_time = ticks;
1882
1883	/*
1884	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1885	 * that this frame is available to transmit.
1886	 */
1887	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1888	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1889	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1890	++txr->tx_packets;
1891
1892	return (0);
1893
1894}
1895
1896static void
1897igb_set_promisc(struct adapter *adapter)
1898{
1899	struct ifnet	*ifp = adapter->ifp;
1900	struct e1000_hw *hw = &adapter->hw;
1901	u32		reg;
1902
1903	if (adapter->vf_ifp) {
1904		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1905		return;
1906	}
1907
1908	reg = E1000_READ_REG(hw, E1000_RCTL);
1909	if (ifp->if_flags & IFF_PROMISC) {
1910		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1911		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1912	} else if (ifp->if_flags & IFF_ALLMULTI) {
1913		reg |= E1000_RCTL_MPE;
1914		reg &= ~E1000_RCTL_UPE;
1915		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1916	}
1917}
1918
1919static void
1920igb_disable_promisc(struct adapter *adapter)
1921{
1922	struct e1000_hw *hw = &adapter->hw;
1923	u32		reg;
1924
1925	if (adapter->vf_ifp) {
1926		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1927		return;
1928	}
1929	reg = E1000_READ_REG(hw, E1000_RCTL);
1930	reg &=  (~E1000_RCTL_UPE);
1931	reg &=  (~E1000_RCTL_MPE);
1932	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1933}
1934
1935
1936/*********************************************************************
1937 *  Multicast Update
1938 *
1939 *  This routine is called whenever multicast address list is updated.
1940 *
1941 **********************************************************************/
1942
1943static void
1944igb_set_multi(struct adapter *adapter)
1945{
1946	struct ifnet	*ifp = adapter->ifp;
1947	struct ifmultiaddr *ifma;
1948	u32 reg_rctl = 0;
1949	u8  *mta;
1950
1951	int mcnt = 0;
1952
1953	IOCTL_DEBUGOUT("igb_set_multi: begin");
1954
1955	mta = adapter->mta;
1956	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1957	    MAX_NUM_MULTICAST_ADDRESSES);
1958
1959#if __FreeBSD_version < 800000
1960	IF_ADDR_LOCK(ifp);
1961#else
1962	if_maddr_rlock(ifp);
1963#endif
1964	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1965		if (ifma->ifma_addr->sa_family != AF_LINK)
1966			continue;
1967
1968		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1969			break;
1970
1971		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1972		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1973		mcnt++;
1974	}
1975#if __FreeBSD_version < 800000
1976	IF_ADDR_UNLOCK(ifp);
1977#else
1978	if_maddr_runlock(ifp);
1979#endif
1980
1981	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1982		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1983		reg_rctl |= E1000_RCTL_MPE;
1984		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1985	} else
1986		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1987}
1988
1989
1990/*********************************************************************
1991 *  Timer routine:
1992 *  	This routine checks for link status,
1993 *	updates statistics, and does the watchdog.
1994 *
1995 **********************************************************************/
1996
1997static void
1998igb_local_timer(void *arg)
1999{
2000	struct adapter		*adapter = arg;
2001	device_t		dev = adapter->dev;
2002	struct tx_ring		*txr = adapter->tx_rings;
2003
2004
2005	IGB_CORE_LOCK_ASSERT(adapter);
2006
2007	igb_update_link_status(adapter);
2008	igb_update_stats_counters(adapter);
2009
2010	/*
2011	** If flow control has paused us since last checking
2012	** it invalidates the watchdog timing, so dont run it.
2013	*/
2014	if (adapter->pause_frames) {
2015		adapter->pause_frames = 0;
2016		goto out;
2017	}
2018
2019        /*
2020        ** Watchdog: check for time since any descriptor was cleaned
2021        */
2022	for (int i = 0; i < adapter->num_queues; i++, txr++)
2023		if (txr->queue_status == IGB_QUEUE_HUNG)
2024			goto timeout;
2025out:
2026	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2027#ifndef DEVICE_POLLING
2028	/* Schedule all queue interrupts - deadlock protection */
2029	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2030#endif
2031	return;
2032
2033timeout:
2034	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2035	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2036            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2037            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2038	device_printf(dev,"TX(%d) desc avail = %d,"
2039            "Next TX to Clean = %d\n",
2040            txr->me, txr->tx_avail, txr->next_to_clean);
2041	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2042	adapter->watchdog_events++;
2043	igb_init_locked(adapter);
2044}
2045
2046static void
2047igb_update_link_status(struct adapter *adapter)
2048{
2049	struct e1000_hw *hw = &adapter->hw;
2050	struct ifnet *ifp = adapter->ifp;
2051	device_t dev = adapter->dev;
2052	struct tx_ring *txr = adapter->tx_rings;
2053	u32 link_check, thstat, ctrl;
2054
2055	link_check = thstat = ctrl = 0;
2056
2057	/* Get the cached link value or read for real */
2058        switch (hw->phy.media_type) {
2059        case e1000_media_type_copper:
2060                if (hw->mac.get_link_status) {
2061			/* Do the work to read phy */
2062                        e1000_check_for_link(hw);
2063                        link_check = !hw->mac.get_link_status;
2064                } else
2065                        link_check = TRUE;
2066                break;
2067        case e1000_media_type_fiber:
2068                e1000_check_for_link(hw);
2069                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2070                                 E1000_STATUS_LU);
2071                break;
2072        case e1000_media_type_internal_serdes:
2073                e1000_check_for_link(hw);
2074                link_check = adapter->hw.mac.serdes_has_link;
2075                break;
2076	/* VF device is type_unknown */
2077        case e1000_media_type_unknown:
2078                e1000_check_for_link(hw);
2079		link_check = !hw->mac.get_link_status;
2080		/* Fall thru */
2081        default:
2082                break;
2083        }
2084
2085	/* Check for thermal downshift or shutdown */
2086	if (hw->mac.type == e1000_i350) {
2087		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2088		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2089	}
2090
2091	/* Now we check if a transition has happened */
2092	if (link_check && (adapter->link_active == 0)) {
2093		e1000_get_speed_and_duplex(&adapter->hw,
2094		    &adapter->link_speed, &adapter->link_duplex);
2095		if (bootverbose)
2096			device_printf(dev, "Link is up %d Mbps %s\n",
2097			    adapter->link_speed,
2098			    ((adapter->link_duplex == FULL_DUPLEX) ?
2099			    "Full Duplex" : "Half Duplex"));
2100		adapter->link_active = 1;
2101		ifp->if_baudrate = adapter->link_speed * 1000000;
2102		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2103		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2104			device_printf(dev, "Link: thermal downshift\n");
2105		/* This can sleep */
2106		if_link_state_change(ifp, LINK_STATE_UP);
2107	} else if (!link_check && (adapter->link_active == 1)) {
2108		ifp->if_baudrate = adapter->link_speed = 0;
2109		adapter->link_duplex = 0;
2110		if (bootverbose)
2111			device_printf(dev, "Link is Down\n");
2112		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2113		    (thstat & E1000_THSTAT_PWR_DOWN))
2114			device_printf(dev, "Link: thermal shutdown\n");
2115		adapter->link_active = 0;
2116		/* This can sleep */
2117		if_link_state_change(ifp, LINK_STATE_DOWN);
2118		/* Turn off watchdogs */
2119		for (int i = 0; i < adapter->num_queues; i++, txr++)
2120			txr->queue_status = IGB_QUEUE_IDLE;
2121	}
2122}
2123
2124/*********************************************************************
2125 *
2126 *  This routine disables all traffic on the adapter by issuing a
2127 *  global reset on the MAC and deallocates TX/RX buffers.
2128 *
2129 **********************************************************************/
2130
2131static void
2132igb_stop(void *arg)
2133{
2134	struct adapter	*adapter = arg;
2135	struct ifnet	*ifp = adapter->ifp;
2136	struct tx_ring *txr = adapter->tx_rings;
2137
2138	IGB_CORE_LOCK_ASSERT(adapter);
2139
2140	INIT_DEBUGOUT("igb_stop: begin");
2141
2142	igb_disable_intr(adapter);
2143
2144	callout_stop(&adapter->timer);
2145
2146	/* Tell the stack that the interface is no longer active */
2147	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2148
2149	/* Unarm watchdog timer. */
2150	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2151		IGB_TX_LOCK(txr);
2152		txr->queue_status = IGB_QUEUE_IDLE;
2153		IGB_TX_UNLOCK(txr);
2154	}
2155
2156	e1000_reset_hw(&adapter->hw);
2157	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2158
2159	e1000_led_off(&adapter->hw);
2160	e1000_cleanup_led(&adapter->hw);
2161}
2162
2163
2164/*********************************************************************
2165 *
2166 *  Determine hardware revision.
2167 *
2168 **********************************************************************/
2169static void
2170igb_identify_hardware(struct adapter *adapter)
2171{
2172	device_t dev = adapter->dev;
2173
2174	/* Make sure our PCI config space has the necessary stuff set */
2175	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2176	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2177	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2178		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2179		    "bits were not set!\n");
2180		adapter->hw.bus.pci_cmd_word |=
2181		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2182		pci_write_config(dev, PCIR_COMMAND,
2183		    adapter->hw.bus.pci_cmd_word, 2);
2184	}
2185
2186	/* Save off the information about this board */
2187	adapter->hw.vendor_id = pci_get_vendor(dev);
2188	adapter->hw.device_id = pci_get_device(dev);
2189	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2190	adapter->hw.subsystem_vendor_id =
2191	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2192	adapter->hw.subsystem_device_id =
2193	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2194
2195	/* Set MAC type early for PCI setup */
2196	e1000_set_mac_type(&adapter->hw);
2197
2198	/* Are we a VF device? */
2199	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2200	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2201		adapter->vf_ifp = 1;
2202	else
2203		adapter->vf_ifp = 0;
2204}
2205
2206static int
2207igb_allocate_pci_resources(struct adapter *adapter)
2208{
2209	device_t	dev = adapter->dev;
2210	int		rid;
2211
2212	rid = PCIR_BAR(0);
2213	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2214	    &rid, RF_ACTIVE);
2215	if (adapter->pci_mem == NULL) {
2216		device_printf(dev, "Unable to allocate bus resource: memory\n");
2217		return (ENXIO);
2218	}
2219	adapter->osdep.mem_bus_space_tag =
2220	    rman_get_bustag(adapter->pci_mem);
2221	adapter->osdep.mem_bus_space_handle =
2222	    rman_get_bushandle(adapter->pci_mem);
2223	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2224
2225	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2226
2227	/* This will setup either MSI/X or MSI */
2228	adapter->msix = igb_setup_msix(adapter);
2229	adapter->hw.back = &adapter->osdep;
2230
2231	return (0);
2232}
2233
2234/*********************************************************************
2235 *
2236 *  Setup the Legacy or MSI Interrupt handler
2237 *
2238 **********************************************************************/
2239static int
2240igb_allocate_legacy(struct adapter *adapter)
2241{
2242	device_t		dev = adapter->dev;
2243	struct igb_queue	*que = adapter->queues;
2244	struct tx_ring		*txr = adapter->tx_rings;
2245	int			error, rid = 0;
2246
2247	/* Turn off all interrupts */
2248	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2249
2250	/* MSI RID is 1 */
2251	if (adapter->msix == 1)
2252		rid = 1;
2253
2254	/* We allocate a single interrupt resource */
2255	adapter->res = bus_alloc_resource_any(dev,
2256	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2257	if (adapter->res == NULL) {
2258		device_printf(dev, "Unable to allocate bus resource: "
2259		    "interrupt\n");
2260		return (ENXIO);
2261	}
2262
2263#if __FreeBSD_version >= 800000
2264	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2265#endif
2266
2267	/*
2268	 * Try allocating a fast interrupt and the associated deferred
2269	 * processing contexts.
2270	 */
2271	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2272	/* Make tasklet for deferred link handling */
2273	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2274	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2275	    taskqueue_thread_enqueue, &que->tq);
2276	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2277	    device_get_nameunit(adapter->dev));
2278	if ((error = bus_setup_intr(dev, adapter->res,
2279	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2280	    adapter, &adapter->tag)) != 0) {
2281		device_printf(dev, "Failed to register fast interrupt "
2282			    "handler: %d\n", error);
2283		taskqueue_free(que->tq);
2284		que->tq = NULL;
2285		return (error);
2286	}
2287
2288	return (0);
2289}
2290
2291
2292/*********************************************************************
2293 *
2294 *  Setup the MSIX Queue Interrupt handlers:
2295 *
2296 **********************************************************************/
2297static int
2298igb_allocate_msix(struct adapter *adapter)
2299{
2300	device_t		dev = adapter->dev;
2301	struct igb_queue	*que = adapter->queues;
2302	int			error, rid, vector = 0;
2303
2304
2305	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2306		rid = vector +1;
2307		que->res = bus_alloc_resource_any(dev,
2308		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2309		if (que->res == NULL) {
2310			device_printf(dev,
2311			    "Unable to allocate bus resource: "
2312			    "MSIX Queue Interrupt\n");
2313			return (ENXIO);
2314		}
2315		error = bus_setup_intr(dev, que->res,
2316	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2317		    igb_msix_que, que, &que->tag);
2318		if (error) {
2319			que->res = NULL;
2320			device_printf(dev, "Failed to register Queue handler");
2321			return (error);
2322		}
2323#if __FreeBSD_version >= 800504
2324		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2325#endif
2326		que->msix = vector;
2327		if (adapter->hw.mac.type == e1000_82575)
2328			que->eims = E1000_EICR_TX_QUEUE0 << i;
2329		else
2330			que->eims = 1 << vector;
2331		/*
2332		** Bind the msix vector, and thus the
2333		** rings to the corresponding cpu.
2334		*/
2335		if (adapter->num_queues > 1)
2336			bus_bind_intr(dev, que->res, i);
2337#if __FreeBSD_version >= 800000
2338		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2339		    que->txr);
2340#endif
2341		/* Make tasklet for deferred handling */
2342		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2343		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2344		    taskqueue_thread_enqueue, &que->tq);
2345		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2346		    device_get_nameunit(adapter->dev));
2347	}
2348
2349	/* And Link */
2350	rid = vector + 1;
2351	adapter->res = bus_alloc_resource_any(dev,
2352	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2353	if (adapter->res == NULL) {
2354		device_printf(dev,
2355		    "Unable to allocate bus resource: "
2356		    "MSIX Link Interrupt\n");
2357		return (ENXIO);
2358	}
2359	if ((error = bus_setup_intr(dev, adapter->res,
2360	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2361	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2362		device_printf(dev, "Failed to register Link handler");
2363		return (error);
2364	}
2365#if __FreeBSD_version >= 800504
2366	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2367#endif
2368	adapter->linkvec = vector;
2369
2370	return (0);
2371}
2372
2373
2374static void
2375igb_configure_queues(struct adapter *adapter)
2376{
2377	struct	e1000_hw	*hw = &adapter->hw;
2378	struct	igb_queue	*que;
2379	u32			tmp, ivar = 0, newitr = 0;
2380
2381	/* First turn on RSS capability */
2382	if (adapter->hw.mac.type != e1000_82575)
2383		E1000_WRITE_REG(hw, E1000_GPIE,
2384		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2385		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2386
2387	/* Turn on MSIX */
2388	switch (adapter->hw.mac.type) {
2389	case e1000_82580:
2390	case e1000_i350:
2391	case e1000_vfadapt:
2392	case e1000_vfadapt_i350:
2393		/* RX entries */
2394		for (int i = 0; i < adapter->num_queues; i++) {
2395			u32 index = i >> 1;
2396			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2397			que = &adapter->queues[i];
2398			if (i & 1) {
2399				ivar &= 0xFF00FFFF;
2400				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2401			} else {
2402				ivar &= 0xFFFFFF00;
2403				ivar |= que->msix | E1000_IVAR_VALID;
2404			}
2405			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2406		}
2407		/* TX entries */
2408		for (int i = 0; i < adapter->num_queues; i++) {
2409			u32 index = i >> 1;
2410			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2411			que = &adapter->queues[i];
2412			if (i & 1) {
2413				ivar &= 0x00FFFFFF;
2414				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2415			} else {
2416				ivar &= 0xFFFF00FF;
2417				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2418			}
2419			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2420			adapter->que_mask |= que->eims;
2421		}
2422
2423		/* And for the link interrupt */
2424		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2425		adapter->link_mask = 1 << adapter->linkvec;
2426		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2427		break;
2428	case e1000_82576:
2429		/* RX entries */
2430		for (int i = 0; i < adapter->num_queues; i++) {
2431			u32 index = i & 0x7; /* Each IVAR has two entries */
2432			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2433			que = &adapter->queues[i];
2434			if (i < 8) {
2435				ivar &= 0xFFFFFF00;
2436				ivar |= que->msix | E1000_IVAR_VALID;
2437			} else {
2438				ivar &= 0xFF00FFFF;
2439				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2440			}
2441			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2442			adapter->que_mask |= que->eims;
2443		}
2444		/* TX entries */
2445		for (int i = 0; i < adapter->num_queues; i++) {
2446			u32 index = i & 0x7; /* Each IVAR has two entries */
2447			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2448			que = &adapter->queues[i];
2449			if (i < 8) {
2450				ivar &= 0xFFFF00FF;
2451				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2452			} else {
2453				ivar &= 0x00FFFFFF;
2454				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2455			}
2456			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2457			adapter->que_mask |= que->eims;
2458		}
2459
2460		/* And for the link interrupt */
2461		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2462		adapter->link_mask = 1 << adapter->linkvec;
2463		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2464		break;
2465
2466	case e1000_82575:
2467                /* enable MSI-X support*/
2468		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2469                tmp |= E1000_CTRL_EXT_PBA_CLR;
2470                /* Auto-Mask interrupts upon ICR read. */
2471                tmp |= E1000_CTRL_EXT_EIAME;
2472                tmp |= E1000_CTRL_EXT_IRCA;
2473                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2474
2475		/* Queues */
2476		for (int i = 0; i < adapter->num_queues; i++) {
2477			que = &adapter->queues[i];
2478			tmp = E1000_EICR_RX_QUEUE0 << i;
2479			tmp |= E1000_EICR_TX_QUEUE0 << i;
2480			que->eims = tmp;
2481			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2482			    i, que->eims);
2483			adapter->que_mask |= que->eims;
2484		}
2485
2486		/* Link */
2487		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2488		    E1000_EIMS_OTHER);
2489		adapter->link_mask |= E1000_EIMS_OTHER;
2490	default:
2491		break;
2492	}
2493
2494	/* Set the starting interrupt rate */
2495	if (igb_max_interrupt_rate > 0)
2496		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2497
2498        if (hw->mac.type == e1000_82575)
2499                newitr |= newitr << 16;
2500        else
2501                newitr |= E1000_EITR_CNT_IGNR;
2502
2503	for (int i = 0; i < adapter->num_queues; i++) {
2504		que = &adapter->queues[i];
2505		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2506	}
2507
2508	return;
2509}
2510
2511
2512static void
2513igb_free_pci_resources(struct adapter *adapter)
2514{
2515	struct		igb_queue *que = adapter->queues;
2516	device_t	dev = adapter->dev;
2517	int		rid;
2518
2519	/*
2520	** There is a slight possibility of a failure mode
2521	** in attach that will result in entering this function
2522	** before interrupt resources have been initialized, and
2523	** in that case we do not want to execute the loops below
2524	** We can detect this reliably by the state of the adapter
2525	** res pointer.
2526	*/
2527	if (adapter->res == NULL)
2528		goto mem;
2529
2530	/*
2531	 * First release all the interrupt resources:
2532	 */
2533	for (int i = 0; i < adapter->num_queues; i++, que++) {
2534		rid = que->msix + 1;
2535		if (que->tag != NULL) {
2536			bus_teardown_intr(dev, que->res, que->tag);
2537			que->tag = NULL;
2538		}
2539		if (que->res != NULL)
2540			bus_release_resource(dev,
2541			    SYS_RES_IRQ, rid, que->res);
2542	}
2543
2544	/* Clean the Legacy or Link interrupt last */
2545	if (adapter->linkvec) /* we are doing MSIX */
2546		rid = adapter->linkvec + 1;
2547	else
2548		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2549
2550	que = adapter->queues;
2551	if (adapter->tag != NULL) {
2552		taskqueue_drain(que->tq, &adapter->link_task);
2553		bus_teardown_intr(dev, adapter->res, adapter->tag);
2554		adapter->tag = NULL;
2555	}
2556	if (adapter->res != NULL)
2557		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2558
2559	for (int i = 0; i < adapter->num_queues; i++, que++) {
2560		if (que->tq != NULL) {
2561#if __FreeBSD_version >= 800000
2562			taskqueue_drain(que->tq, &que->txr->txq_task);
2563#endif
2564			taskqueue_drain(que->tq, &que->que_task);
2565			taskqueue_free(que->tq);
2566		}
2567	}
2568mem:
2569	if (adapter->msix)
2570		pci_release_msi(dev);
2571
2572	if (adapter->msix_mem != NULL)
2573		bus_release_resource(dev, SYS_RES_MEMORY,
2574		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2575
2576	if (adapter->pci_mem != NULL)
2577		bus_release_resource(dev, SYS_RES_MEMORY,
2578		    PCIR_BAR(0), adapter->pci_mem);
2579
2580}
2581
2582/*
2583 * Setup Either MSI/X or MSI
2584 */
2585static int
2586igb_setup_msix(struct adapter *adapter)
2587{
2588	device_t dev = adapter->dev;
2589	int rid, want, queues, msgs;
2590
2591	/* tuneable override */
2592	if (igb_enable_msix == 0)
2593		goto msi;
2594
2595	/* First try MSI/X */
2596	rid = PCIR_BAR(IGB_MSIX_BAR);
2597	adapter->msix_mem = bus_alloc_resource_any(dev,
2598	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2599       	if (!adapter->msix_mem) {
2600		/* May not be enabled */
2601		device_printf(adapter->dev,
2602		    "Unable to map MSIX table \n");
2603		goto msi;
2604	}
2605
2606	msgs = pci_msix_count(dev);
2607	if (msgs == 0) { /* system has msix disabled */
2608		bus_release_resource(dev, SYS_RES_MEMORY,
2609		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2610		adapter->msix_mem = NULL;
2611		goto msi;
2612	}
2613
2614	/* Figure out a reasonable auto config value */
2615	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2616
2617	/* Manual override */
2618	if (igb_num_queues != 0)
2619		queues = igb_num_queues;
2620	if (queues > 8)  /* max queues */
2621		queues = 8;
2622
2623	/* Can have max of 4 queues on 82575 */
2624	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2625		queues = 4;
2626
2627	/* Limit the VF devices to one queue */
2628	if (adapter->vf_ifp)
2629		queues = 1;
2630
2631	/*
2632	** One vector (RX/TX pair) per queue
2633	** plus an additional for Link interrupt
2634	*/
2635	want = queues + 1;
2636	if (msgs >= want)
2637		msgs = want;
2638	else {
2639               	device_printf(adapter->dev,
2640		    "MSIX Configuration Problem, "
2641		    "%d vectors configured, but %d queues wanted!\n",
2642		    msgs, want);
2643		return (ENXIO);
2644	}
2645	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2646               	device_printf(adapter->dev,
2647		    "Using MSIX interrupts with %d vectors\n", msgs);
2648		adapter->num_queues = queues;
2649		return (msgs);
2650	}
2651msi:
2652       	msgs = pci_msi_count(dev);
2653       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2654               	device_printf(adapter->dev,"Using MSI interrupt\n");
2655	return (msgs);
2656}
2657
2658/*********************************************************************
2659 *
2660 *  Set up an fresh starting state
2661 *
2662 **********************************************************************/
2663static void
2664igb_reset(struct adapter *adapter)
2665{
2666	device_t	dev = adapter->dev;
2667	struct e1000_hw *hw = &adapter->hw;
2668	struct e1000_fc_info *fc = &hw->fc;
2669	struct ifnet	*ifp = adapter->ifp;
2670	u32		pba = 0;
2671	u16		hwm;
2672
2673	INIT_DEBUGOUT("igb_reset: begin");
2674
2675	/* Let the firmware know the OS is in control */
2676	igb_get_hw_control(adapter);
2677
2678	/*
2679	 * Packet Buffer Allocation (PBA)
2680	 * Writing PBA sets the receive portion of the buffer
2681	 * the remainder is used for the transmit buffer.
2682	 */
2683	switch (hw->mac.type) {
2684	case e1000_82575:
2685		pba = E1000_PBA_32K;
2686		break;
2687	case e1000_82576:
2688	case e1000_vfadapt:
2689		pba = E1000_READ_REG(hw, E1000_RXPBS);
2690		pba &= E1000_RXPBS_SIZE_MASK_82576;
2691		break;
2692	case e1000_82580:
2693	case e1000_i350:
2694	case e1000_vfadapt_i350:
2695		pba = E1000_READ_REG(hw, E1000_RXPBS);
2696		pba = e1000_rxpbs_adjust_82580(pba);
2697		break;
2698		pba = E1000_PBA_35K;
2699	default:
2700		break;
2701	}
2702
2703	/* Special needs in case of Jumbo frames */
2704	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2705		u32 tx_space, min_tx, min_rx;
2706		pba = E1000_READ_REG(hw, E1000_PBA);
2707		tx_space = pba >> 16;
2708		pba &= 0xffff;
2709		min_tx = (adapter->max_frame_size +
2710		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2711		min_tx = roundup2(min_tx, 1024);
2712		min_tx >>= 10;
2713                min_rx = adapter->max_frame_size;
2714                min_rx = roundup2(min_rx, 1024);
2715                min_rx >>= 10;
2716		if (tx_space < min_tx &&
2717		    ((min_tx - tx_space) < pba)) {
2718			pba = pba - (min_tx - tx_space);
2719			/*
2720                         * if short on rx space, rx wins
2721                         * and must trump tx adjustment
2722			 */
2723                        if (pba < min_rx)
2724                                pba = min_rx;
2725		}
2726		E1000_WRITE_REG(hw, E1000_PBA, pba);
2727	}
2728
2729	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2730
2731	/*
2732	 * These parameters control the automatic generation (Tx) and
2733	 * response (Rx) to Ethernet PAUSE frames.
2734	 * - High water mark should allow for at least two frames to be
2735	 *   received after sending an XOFF.
2736	 * - Low water mark works best when it is very near the high water mark.
2737	 *   This allows the receiver to restart by sending XON when it has
2738	 *   drained a bit.
2739	 */
2740	hwm = min(((pba << 10) * 9 / 10),
2741	    ((pba << 10) - 2 * adapter->max_frame_size));
2742
2743	if (hw->mac.type < e1000_82576) {
2744		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2745		fc->low_water = fc->high_water - 8;
2746	} else {
2747		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2748		fc->low_water = fc->high_water - 16;
2749	}
2750
2751	fc->pause_time = IGB_FC_PAUSE_TIME;
2752	fc->send_xon = TRUE;
2753	if (fc->requested_mode)
2754		fc->current_mode = fc->requested_mode;
2755	else
2756		fc->current_mode = e1000_fc_full;
2757
2758	adapter->fc = fc->current_mode;
2759
2760	/* Issue a global reset */
2761	e1000_reset_hw(hw);
2762	E1000_WRITE_REG(hw, E1000_WUC, 0);
2763
2764	if (e1000_init_hw(hw) < 0)
2765		device_printf(dev, "Hardware Initialization Failed\n");
2766
2767	/* Setup DMA Coalescing */
2768	if (hw->mac.type == e1000_i350) {
2769		u32 reg = ~E1000_DMACR_DMAC_EN;
2770
2771		if (adapter->dmac == 0) { /* Disabling it */
2772			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2773			goto reset_out;
2774		}
2775
2776		hwm = (pba - 4) << 10;
2777		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2778		    & E1000_DMACR_DMACTHR_MASK);
2779
2780		/* transition to L0x or L1 if available..*/
2781		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2782
2783		/* timer = value in adapter->dmac in 32usec intervals */
2784		reg |= (adapter->dmac >> 5);
2785		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2786
2787		/* No lower threshold */
2788		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2789
2790		/* set hwm to PBA -  2 * max frame size */
2791		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2792
2793		/* Set the interval before transition */
2794		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2795		reg |= 0x800000FF; /* 255 usec */
2796		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2797
2798		/* free space in tx packet buffer to wake from DMA coal */
2799		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2800		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2801
2802		/* make low power state decision controlled by DMA coal */
2803		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2804		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2805		    reg | E1000_PCIEMISC_LX_DECISION);
2806		device_printf(dev, "DMA Coalescing enabled\n");
2807	}
2808
2809reset_out:
2810	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2811	e1000_get_phy_info(hw);
2812	e1000_check_for_link(hw);
2813	return;
2814}
2815
2816/*********************************************************************
2817 *
2818 *  Setup networking device structure and register an interface.
2819 *
2820 **********************************************************************/
2821static int
2822igb_setup_interface(device_t dev, struct adapter *adapter)
2823{
2824	struct ifnet   *ifp;
2825
2826	INIT_DEBUGOUT("igb_setup_interface: begin");
2827
2828	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2829	if (ifp == NULL) {
2830		device_printf(dev, "can not allocate ifnet structure\n");
2831		return (-1);
2832	}
2833	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2834	ifp->if_mtu = ETHERMTU;
2835	ifp->if_init =  igb_init;
2836	ifp->if_softc = adapter;
2837	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2838	ifp->if_ioctl = igb_ioctl;
2839#if __FreeBSD_version >= 800000
2840	ifp->if_transmit = igb_mq_start;
2841	ifp->if_qflush = igb_qflush;
2842#else
2843	ifp->if_start = igb_start;
2844#endif
2845	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2846	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2847	IFQ_SET_READY(&ifp->if_snd);
2848
2849	ether_ifattach(ifp, adapter->hw.mac.addr);
2850
2851	ifp->if_capabilities = ifp->if_capenable = 0;
2852
2853	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2854	ifp->if_capabilities |= IFCAP_TSO4;
2855	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2856	ifp->if_capenable = ifp->if_capabilities;
2857
2858	/* Don't enable LRO by default */
2859	ifp->if_capabilities |= IFCAP_LRO;
2860
2861#ifdef DEVICE_POLLING
2862	ifp->if_capabilities |= IFCAP_POLLING;
2863#endif
2864
2865	/*
2866	 * Tell the upper layer(s) we
2867	 * support full VLAN capability.
2868	 */
2869	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2870	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2871			     |  IFCAP_VLAN_HWTSO
2872			     |  IFCAP_VLAN_MTU;
2873	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2874			  |  IFCAP_VLAN_HWTSO
2875			  |  IFCAP_VLAN_MTU;
2876
2877	/*
2878	** Don't turn this on by default, if vlans are
2879	** created on another pseudo device (eg. lagg)
2880	** then vlan events are not passed thru, breaking
2881	** operation, but with HW FILTER off it works. If
2882	** using vlans directly on the igb driver you can
2883	** enable this and get full hardware tag filtering.
2884	*/
2885	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2886
2887	/*
2888	 * Specify the media types supported by this adapter and register
2889	 * callbacks to update media and link information
2890	 */
2891	ifmedia_init(&adapter->media, IFM_IMASK,
2892	    igb_media_change, igb_media_status);
2893	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2894	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2895		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2896			    0, NULL);
2897		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2898	} else {
2899		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2900		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2901			    0, NULL);
2902		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2903			    0, NULL);
2904		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2905			    0, NULL);
2906		if (adapter->hw.phy.type != e1000_phy_ife) {
2907			ifmedia_add(&adapter->media,
2908				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2909			ifmedia_add(&adapter->media,
2910				IFM_ETHER | IFM_1000_T, 0, NULL);
2911		}
2912	}
2913	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2914	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2915	return (0);
2916}
2917
2918
2919/*
2920 * Manage DMA'able memory.
2921 */
2922static void
2923igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2924{
2925	if (error)
2926		return;
2927	*(bus_addr_t *) arg = segs[0].ds_addr;
2928}
2929
2930static int
2931igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2932        struct igb_dma_alloc *dma, int mapflags)
2933{
2934	int error;
2935
2936	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2937				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2938				BUS_SPACE_MAXADDR,	/* lowaddr */
2939				BUS_SPACE_MAXADDR,	/* highaddr */
2940				NULL, NULL,		/* filter, filterarg */
2941				size,			/* maxsize */
2942				1,			/* nsegments */
2943				size,			/* maxsegsize */
2944				0,			/* flags */
2945				NULL,			/* lockfunc */
2946				NULL,			/* lockarg */
2947				&dma->dma_tag);
2948	if (error) {
2949		device_printf(adapter->dev,
2950		    "%s: bus_dma_tag_create failed: %d\n",
2951		    __func__, error);
2952		goto fail_0;
2953	}
2954
2955	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2956	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2957	if (error) {
2958		device_printf(adapter->dev,
2959		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2960		    __func__, (uintmax_t)size, error);
2961		goto fail_2;
2962	}
2963
2964	dma->dma_paddr = 0;
2965	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2966	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2967	if (error || dma->dma_paddr == 0) {
2968		device_printf(adapter->dev,
2969		    "%s: bus_dmamap_load failed: %d\n",
2970		    __func__, error);
2971		goto fail_3;
2972	}
2973
2974	return (0);
2975
2976fail_3:
2977	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2978fail_2:
2979	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2980	bus_dma_tag_destroy(dma->dma_tag);
2981fail_0:
2982	dma->dma_map = NULL;
2983	dma->dma_tag = NULL;
2984
2985	return (error);
2986}
2987
2988static void
2989igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2990{
2991	if (dma->dma_tag == NULL)
2992		return;
2993	if (dma->dma_map != NULL) {
2994		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2995		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2996		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2997		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2998		dma->dma_map = NULL;
2999	}
3000	bus_dma_tag_destroy(dma->dma_tag);
3001	dma->dma_tag = NULL;
3002}
3003
3004
3005/*********************************************************************
3006 *
3007 *  Allocate memory for the transmit and receive rings, and then
3008 *  the descriptors associated with each, called only once at attach.
3009 *
3010 **********************************************************************/
3011static int
3012igb_allocate_queues(struct adapter *adapter)
3013{
3014	device_t dev = adapter->dev;
3015	struct igb_queue	*que = NULL;
3016	struct tx_ring		*txr = NULL;
3017	struct rx_ring		*rxr = NULL;
3018	int rsize, tsize, error = E1000_SUCCESS;
3019	int txconf = 0, rxconf = 0;
3020
3021	/* First allocate the top level queue structs */
3022	if (!(adapter->queues =
3023	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3024	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3025		device_printf(dev, "Unable to allocate queue memory\n");
3026		error = ENOMEM;
3027		goto fail;
3028	}
3029
3030	/* Next allocate the TX ring struct memory */
3031	if (!(adapter->tx_rings =
3032	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3033	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3034		device_printf(dev, "Unable to allocate TX ring memory\n");
3035		error = ENOMEM;
3036		goto tx_fail;
3037	}
3038
3039	/* Now allocate the RX */
3040	if (!(adapter->rx_rings =
3041	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3042	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3043		device_printf(dev, "Unable to allocate RX ring memory\n");
3044		error = ENOMEM;
3045		goto rx_fail;
3046	}
3047
3048	tsize = roundup2(adapter->num_tx_desc *
3049	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3050	/*
3051	 * Now set up the TX queues, txconf is needed to handle the
3052	 * possibility that things fail midcourse and we need to
3053	 * undo memory gracefully
3054	 */
3055	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3056		/* Set up some basics */
3057		txr = &adapter->tx_rings[i];
3058		txr->adapter = adapter;
3059		txr->me = i;
3060
3061		/* Initialize the TX lock */
3062		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3063		    device_get_nameunit(dev), txr->me);
3064		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3065
3066		if (igb_dma_malloc(adapter, tsize,
3067			&txr->txdma, BUS_DMA_NOWAIT)) {
3068			device_printf(dev,
3069			    "Unable to allocate TX Descriptor memory\n");
3070			error = ENOMEM;
3071			goto err_tx_desc;
3072		}
3073		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3074		bzero((void *)txr->tx_base, tsize);
3075
3076        	/* Now allocate transmit buffers for the ring */
3077        	if (igb_allocate_transmit_buffers(txr)) {
3078			device_printf(dev,
3079			    "Critical Failure setting up transmit buffers\n");
3080			error = ENOMEM;
3081			goto err_tx_desc;
3082        	}
3083#if __FreeBSD_version >= 800000
3084		/* Allocate a buf ring */
3085		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3086		    M_WAITOK, &txr->tx_mtx);
3087#endif
3088	}
3089
3090	/*
3091	 * Next the RX queues...
3092	 */
3093	rsize = roundup2(adapter->num_rx_desc *
3094	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3095	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3096		rxr = &adapter->rx_rings[i];
3097		rxr->adapter = adapter;
3098		rxr->me = i;
3099
3100		/* Initialize the RX lock */
3101		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3102		    device_get_nameunit(dev), txr->me);
3103		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3104
3105		if (igb_dma_malloc(adapter, rsize,
3106			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3107			device_printf(dev,
3108			    "Unable to allocate RxDescriptor memory\n");
3109			error = ENOMEM;
3110			goto err_rx_desc;
3111		}
3112		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3113		bzero((void *)rxr->rx_base, rsize);
3114
3115        	/* Allocate receive buffers for the ring*/
3116		if (igb_allocate_receive_buffers(rxr)) {
3117			device_printf(dev,
3118			    "Critical Failure setting up receive buffers\n");
3119			error = ENOMEM;
3120			goto err_rx_desc;
3121		}
3122	}
3123
3124	/*
3125	** Finally set up the queue holding structs
3126	*/
3127	for (int i = 0; i < adapter->num_queues; i++) {
3128		que = &adapter->queues[i];
3129		que->adapter = adapter;
3130		que->txr = &adapter->tx_rings[i];
3131		que->rxr = &adapter->rx_rings[i];
3132	}
3133
3134	return (0);
3135
3136err_rx_desc:
3137	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3138		igb_dma_free(adapter, &rxr->rxdma);
3139err_tx_desc:
3140	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3141		igb_dma_free(adapter, &txr->txdma);
3142	free(adapter->rx_rings, M_DEVBUF);
3143rx_fail:
3144#if __FreeBSD_version >= 800000
3145	buf_ring_free(txr->br, M_DEVBUF);
3146#endif
3147	free(adapter->tx_rings, M_DEVBUF);
3148tx_fail:
3149	free(adapter->queues, M_DEVBUF);
3150fail:
3151	return (error);
3152}
3153
3154/*********************************************************************
3155 *
3156 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3157 *  the information needed to transmit a packet on the wire. This is
3158 *  called only once at attach, setup is done every reset.
3159 *
3160 **********************************************************************/
3161static int
3162igb_allocate_transmit_buffers(struct tx_ring *txr)
3163{
3164	struct adapter *adapter = txr->adapter;
3165	device_t dev = adapter->dev;
3166	struct igb_tx_buffer *txbuf;
3167	int error, i;
3168
3169	/*
3170	 * Setup DMA descriptor areas.
3171	 */
3172	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3173			       1, 0,			/* alignment, bounds */
3174			       BUS_SPACE_MAXADDR,	/* lowaddr */
3175			       BUS_SPACE_MAXADDR,	/* highaddr */
3176			       NULL, NULL,		/* filter, filterarg */
3177			       IGB_TSO_SIZE,		/* maxsize */
3178			       IGB_MAX_SCATTER,		/* nsegments */
3179			       PAGE_SIZE,		/* maxsegsize */
3180			       0,			/* flags */
3181			       NULL,			/* lockfunc */
3182			       NULL,			/* lockfuncarg */
3183			       &txr->txtag))) {
3184		device_printf(dev,"Unable to allocate TX DMA tag\n");
3185		goto fail;
3186	}
3187
3188	if (!(txr->tx_buffers =
3189	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3190	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3191		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3192		error = ENOMEM;
3193		goto fail;
3194	}
3195
3196        /* Create the descriptor buffer dma maps */
3197	txbuf = txr->tx_buffers;
3198	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3199		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3200		if (error != 0) {
3201			device_printf(dev, "Unable to create TX DMA map\n");
3202			goto fail;
3203		}
3204	}
3205
3206	return 0;
3207fail:
3208	/* We free all, it handles case where we are in the middle */
3209	igb_free_transmit_structures(adapter);
3210	return (error);
3211}
3212
3213/*********************************************************************
3214 *
3215 *  Initialize a transmit ring.
3216 *
3217 **********************************************************************/
3218static void
3219igb_setup_transmit_ring(struct tx_ring *txr)
3220{
3221	struct adapter *adapter = txr->adapter;
3222	struct igb_tx_buffer *txbuf;
3223	int i;
3224#ifdef DEV_NETMAP
3225	struct netmap_adapter *na = NA(adapter->ifp);
3226	struct netmap_slot *slot;
3227#endif /* DEV_NETMAP */
3228
3229	/* Clear the old descriptor contents */
3230	IGB_TX_LOCK(txr);
3231#ifdef DEV_NETMAP
3232	slot = netmap_reset(na, NR_TX, txr->me, 0);
3233#endif /* DEV_NETMAP */
3234	bzero((void *)txr->tx_base,
3235	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3236	/* Reset indices */
3237	txr->next_avail_desc = 0;
3238	txr->next_to_clean = 0;
3239
3240	/* Free any existing tx buffers. */
3241        txbuf = txr->tx_buffers;
3242	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3243		if (txbuf->m_head != NULL) {
3244			bus_dmamap_sync(txr->txtag, txbuf->map,
3245			    BUS_DMASYNC_POSTWRITE);
3246			bus_dmamap_unload(txr->txtag, txbuf->map);
3247			m_freem(txbuf->m_head);
3248			txbuf->m_head = NULL;
3249		}
3250#ifdef DEV_NETMAP
3251		if (slot) {
3252			/* slot si is mapped to the i-th NIC-ring entry */
3253			int si = i + na->tx_rings[txr->me].nkr_hwofs;
3254
3255			if (si < 0)
3256				si += na->num_tx_desc;
3257			netmap_load_map(txr->txtag, txbuf->map,
3258				NMB(slot + si), na->buff_size);
3259		}
3260#endif /* DEV_NETMAP */
3261		/* clear the watch index */
3262		txbuf->next_eop = -1;
3263        }
3264
3265	/* Set number of descriptors available */
3266	txr->tx_avail = adapter->num_tx_desc;
3267
3268	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3269	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3270	IGB_TX_UNLOCK(txr);
3271}
3272
3273/*********************************************************************
3274 *
3275 *  Initialize all transmit rings.
3276 *
3277 **********************************************************************/
3278static void
3279igb_setup_transmit_structures(struct adapter *adapter)
3280{
3281	struct tx_ring *txr = adapter->tx_rings;
3282
3283	for (int i = 0; i < adapter->num_queues; i++, txr++)
3284		igb_setup_transmit_ring(txr);
3285
3286	return;
3287}
3288
3289/*********************************************************************
3290 *
3291 *  Enable transmit unit.
3292 *
3293 **********************************************************************/
3294static void
3295igb_initialize_transmit_units(struct adapter *adapter)
3296{
3297	struct tx_ring	*txr = adapter->tx_rings;
3298	struct e1000_hw *hw = &adapter->hw;
3299	u32		tctl, txdctl;
3300
3301	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3302	tctl = txdctl = 0;
3303
3304	/* Setup the Tx Descriptor Rings */
3305	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3306		u64 bus_addr = txr->txdma.dma_paddr;
3307
3308		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3309		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3310		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3311		    (uint32_t)(bus_addr >> 32));
3312		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3313		    (uint32_t)bus_addr);
3314
3315		/* Setup the HW Tx Head and Tail descriptor pointers */
3316		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3317		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3318
3319		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3320		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3321		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3322
3323		txr->queue_status = IGB_QUEUE_IDLE;
3324
3325		txdctl |= IGB_TX_PTHRESH;
3326		txdctl |= IGB_TX_HTHRESH << 8;
3327		txdctl |= IGB_TX_WTHRESH << 16;
3328		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3329		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3330	}
3331
3332	if (adapter->vf_ifp)
3333		return;
3334
3335	e1000_config_collision_dist(hw);
3336
3337	/* Program the Transmit Control Register */
3338	tctl = E1000_READ_REG(hw, E1000_TCTL);
3339	tctl &= ~E1000_TCTL_CT;
3340	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3341		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3342
3343	/* This write will effectively turn on the transmit unit. */
3344	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3345}
3346
3347/*********************************************************************
3348 *
3349 *  Free all transmit rings.
3350 *
3351 **********************************************************************/
3352static void
3353igb_free_transmit_structures(struct adapter *adapter)
3354{
3355	struct tx_ring *txr = adapter->tx_rings;
3356
3357	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3358		IGB_TX_LOCK(txr);
3359		igb_free_transmit_buffers(txr);
3360		igb_dma_free(adapter, &txr->txdma);
3361		IGB_TX_UNLOCK(txr);
3362		IGB_TX_LOCK_DESTROY(txr);
3363	}
3364	free(adapter->tx_rings, M_DEVBUF);
3365}
3366
3367/*********************************************************************
3368 *
3369 *  Free transmit ring related data structures.
3370 *
3371 **********************************************************************/
3372static void
3373igb_free_transmit_buffers(struct tx_ring *txr)
3374{
3375	struct adapter *adapter = txr->adapter;
3376	struct igb_tx_buffer *tx_buffer;
3377	int             i;
3378
3379	INIT_DEBUGOUT("free_transmit_ring: begin");
3380
3381	if (txr->tx_buffers == NULL)
3382		return;
3383
3384	tx_buffer = txr->tx_buffers;
3385	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3386		if (tx_buffer->m_head != NULL) {
3387			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3388			    BUS_DMASYNC_POSTWRITE);
3389			bus_dmamap_unload(txr->txtag,
3390			    tx_buffer->map);
3391			m_freem(tx_buffer->m_head);
3392			tx_buffer->m_head = NULL;
3393			if (tx_buffer->map != NULL) {
3394				bus_dmamap_destroy(txr->txtag,
3395				    tx_buffer->map);
3396				tx_buffer->map = NULL;
3397			}
3398		} else if (tx_buffer->map != NULL) {
3399			bus_dmamap_unload(txr->txtag,
3400			    tx_buffer->map);
3401			bus_dmamap_destroy(txr->txtag,
3402			    tx_buffer->map);
3403			tx_buffer->map = NULL;
3404		}
3405	}
3406#if __FreeBSD_version >= 800000
3407	if (txr->br != NULL)
3408		buf_ring_free(txr->br, M_DEVBUF);
3409#endif
3410	if (txr->tx_buffers != NULL) {
3411		free(txr->tx_buffers, M_DEVBUF);
3412		txr->tx_buffers = NULL;
3413	}
3414	if (txr->txtag != NULL) {
3415		bus_dma_tag_destroy(txr->txtag);
3416		txr->txtag = NULL;
3417	}
3418	return;
3419}
3420
3421/**********************************************************************
3422 *
3423 *  Setup work for hardware segmentation offload (TSO)
3424 *
3425 **********************************************************************/
3426static boolean_t
3427igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3428{
3429	struct adapter *adapter = txr->adapter;
3430	struct e1000_adv_tx_context_desc *TXD;
3431	struct igb_tx_buffer        *tx_buffer;
3432	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3433	u32 mss_l4len_idx = 0;
3434	u16 vtag = 0;
3435	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3436	struct ether_vlan_header *eh;
3437	struct ip *ip;
3438	struct tcphdr *th;
3439
3440
3441	/*
3442	 * Determine where frame payload starts.
3443	 * Jump over vlan headers if already present
3444	 */
3445	eh = mtod(mp, struct ether_vlan_header *);
3446	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3447		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3448	else
3449		ehdrlen = ETHER_HDR_LEN;
3450
3451	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3452	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3453		return FALSE;
3454
3455	/* Only supports IPV4 for now */
3456	ctxd = txr->next_avail_desc;
3457	tx_buffer = &txr->tx_buffers[ctxd];
3458	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3459
3460	ip = (struct ip *)(mp->m_data + ehdrlen);
3461	if (ip->ip_p != IPPROTO_TCP)
3462                return FALSE;   /* 0 */
3463	ip->ip_sum = 0;
3464	ip_hlen = ip->ip_hl << 2;
3465	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3466	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3467	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3468	tcp_hlen = th->th_off << 2;
3469	/*
3470	 * Calculate header length, this is used
3471	 * in the transmit desc in igb_xmit
3472	 */
3473	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3474
3475	/* VLAN MACLEN IPLEN */
3476	if (mp->m_flags & M_VLANTAG) {
3477		vtag = htole16(mp->m_pkthdr.ether_vtag);
3478		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3479	}
3480
3481	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3482	vlan_macip_lens |= ip_hlen;
3483	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3484
3485	/* ADV DTYPE TUCMD */
3486	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3487	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3488	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3489	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3490
3491	/* MSS L4LEN IDX */
3492	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3493	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3494	/* 82575 needs the queue index added */
3495	if (adapter->hw.mac.type == e1000_82575)
3496		mss_l4len_idx |= txr->me << 4;
3497	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3498
3499	TXD->seqnum_seed = htole32(0);
3500	tx_buffer->m_head = NULL;
3501	tx_buffer->next_eop = -1;
3502
3503	if (++ctxd == adapter->num_tx_desc)
3504		ctxd = 0;
3505
3506	txr->tx_avail--;
3507	txr->next_avail_desc = ctxd;
3508	return TRUE;
3509}
3510
3511
3512/*********************************************************************
3513 *
3514 *  Context Descriptor setup for VLAN or CSUM
3515 *
3516 **********************************************************************/
3517
3518static bool
3519igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3520{
3521	struct adapter *adapter = txr->adapter;
3522	struct e1000_adv_tx_context_desc *TXD;
3523	struct igb_tx_buffer        *tx_buffer;
3524	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3525	struct ether_vlan_header *eh;
3526	struct ip *ip = NULL;
3527	struct ip6_hdr *ip6;
3528	int  ehdrlen, ctxd, ip_hlen = 0;
3529	u16	etype, vtag = 0;
3530	u8	ipproto = 0;
3531	bool	offload = TRUE;
3532
3533	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3534		offload = FALSE;
3535
3536	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3537	ctxd = txr->next_avail_desc;
3538	tx_buffer = &txr->tx_buffers[ctxd];
3539	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3540
3541	/*
3542	** In advanced descriptors the vlan tag must
3543	** be placed into the context descriptor, thus
3544	** we need to be here just for that setup.
3545	*/
3546	if (mp->m_flags & M_VLANTAG) {
3547		vtag = htole16(mp->m_pkthdr.ether_vtag);
3548		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3549	} else if (offload == FALSE)
3550		return FALSE;
3551
3552	/*
3553	 * Determine where frame payload starts.
3554	 * Jump over vlan headers if already present,
3555	 * helpful for QinQ too.
3556	 */
3557	eh = mtod(mp, struct ether_vlan_header *);
3558	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3559		etype = ntohs(eh->evl_proto);
3560		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3561	} else {
3562		etype = ntohs(eh->evl_encap_proto);
3563		ehdrlen = ETHER_HDR_LEN;
3564	}
3565
3566	/* Set the ether header length */
3567	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3568
3569	switch (etype) {
3570		case ETHERTYPE_IP:
3571			ip = (struct ip *)(mp->m_data + ehdrlen);
3572			ip_hlen = ip->ip_hl << 2;
3573			if (mp->m_len < ehdrlen + ip_hlen) {
3574				offload = FALSE;
3575				break;
3576			}
3577			ipproto = ip->ip_p;
3578			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3579			break;
3580		case ETHERTYPE_IPV6:
3581			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3582			ip_hlen = sizeof(struct ip6_hdr);
3583			ipproto = ip6->ip6_nxt;
3584			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3585			break;
3586		default:
3587			offload = FALSE;
3588			break;
3589	}
3590
3591	vlan_macip_lens |= ip_hlen;
3592	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3593
3594	switch (ipproto) {
3595		case IPPROTO_TCP:
3596			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3597				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3598			break;
3599		case IPPROTO_UDP:
3600			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3601				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3602			break;
3603#if __FreeBSD_version >= 800000
3604		case IPPROTO_SCTP:
3605			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3606				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3607			break;
3608#endif
3609		default:
3610			offload = FALSE;
3611			break;
3612	}
3613
3614	/* 82575 needs the queue index added */
3615	if (adapter->hw.mac.type == e1000_82575)
3616		mss_l4len_idx = txr->me << 4;
3617
3618	/* Now copy bits into descriptor */
3619	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3620	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3621	TXD->seqnum_seed = htole32(0);
3622	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3623
3624	tx_buffer->m_head = NULL;
3625	tx_buffer->next_eop = -1;
3626
3627	/* We've consumed the first desc, adjust counters */
3628	if (++ctxd == adapter->num_tx_desc)
3629		ctxd = 0;
3630	txr->next_avail_desc = ctxd;
3631	--txr->tx_avail;
3632
3633        return (offload);
3634}
3635
3636
3637/**********************************************************************
3638 *
3639 *  Examine each tx_buffer in the used queue. If the hardware is done
3640 *  processing the packet then free associated resources. The
3641 *  tx_buffer is put back on the free queue.
3642 *
3643 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3644 **********************************************************************/
3645static bool
3646igb_txeof(struct tx_ring *txr)
3647{
3648	struct adapter	*adapter = txr->adapter;
3649        int first, last, done, processed;
3650        struct igb_tx_buffer *tx_buffer;
3651        struct e1000_tx_desc   *tx_desc, *eop_desc;
3652	struct ifnet   *ifp = adapter->ifp;
3653
3654	IGB_TX_LOCK_ASSERT(txr);
3655
3656#ifdef DEV_NETMAP
3657	if (ifp->if_capenable & IFCAP_NETMAP) {
3658		struct netmap_adapter *na = NA(ifp);
3659
3660		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3661		IGB_TX_UNLOCK(txr);
3662		IGB_CORE_LOCK(adapter);
3663		selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
3664		IGB_CORE_UNLOCK(adapter);
3665		IGB_TX_LOCK(txr);
3666		return FALSE;
3667	}
3668#endif /* DEV_NETMAP */
3669        if (txr->tx_avail == adapter->num_tx_desc) {
3670		txr->queue_status = IGB_QUEUE_IDLE;
3671                return FALSE;
3672	}
3673
3674	processed = 0;
3675        first = txr->next_to_clean;
3676        tx_desc = &txr->tx_base[first];
3677        tx_buffer = &txr->tx_buffers[first];
3678	last = tx_buffer->next_eop;
3679        eop_desc = &txr->tx_base[last];
3680
3681	/*
3682	 * What this does is get the index of the
3683	 * first descriptor AFTER the EOP of the
3684	 * first packet, that way we can do the
3685	 * simple comparison on the inner while loop.
3686	 */
3687	if (++last == adapter->num_tx_desc)
3688 		last = 0;
3689	done = last;
3690
3691        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3692            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3693
3694        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3695		/* We clean the range of the packet */
3696		while (first != done) {
3697                	tx_desc->upper.data = 0;
3698                	tx_desc->lower.data = 0;
3699                	tx_desc->buffer_addr = 0;
3700                	++txr->tx_avail;
3701			++processed;
3702
3703			if (tx_buffer->m_head) {
3704				txr->bytes +=
3705				    tx_buffer->m_head->m_pkthdr.len;
3706				bus_dmamap_sync(txr->txtag,
3707				    tx_buffer->map,
3708				    BUS_DMASYNC_POSTWRITE);
3709				bus_dmamap_unload(txr->txtag,
3710				    tx_buffer->map);
3711
3712                        	m_freem(tx_buffer->m_head);
3713                        	tx_buffer->m_head = NULL;
3714                	}
3715			tx_buffer->next_eop = -1;
3716			txr->watchdog_time = ticks;
3717
3718	                if (++first == adapter->num_tx_desc)
3719				first = 0;
3720
3721	                tx_buffer = &txr->tx_buffers[first];
3722			tx_desc = &txr->tx_base[first];
3723		}
3724		++txr->packets;
3725		++ifp->if_opackets;
3726		/* See if we can continue to the next packet */
3727		last = tx_buffer->next_eop;
3728		if (last != -1) {
3729        		eop_desc = &txr->tx_base[last];
3730			/* Get new done point */
3731			if (++last == adapter->num_tx_desc) last = 0;
3732			done = last;
3733		} else
3734			break;
3735        }
3736        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3737            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3738
3739        txr->next_to_clean = first;
3740
3741	/*
3742	** Watchdog calculation, we know there's
3743	** work outstanding or the first return
3744	** would have been taken, so none processed
3745	** for too long indicates a hang.
3746	*/
3747	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3748		txr->queue_status = IGB_QUEUE_HUNG;
3749
3750        /*
3751         * If we have a minimum free, clear IFF_DRV_OACTIVE
3752         * to tell the stack that it is OK to send packets.
3753         */
3754        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3755                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3756		/* All clean, turn off the watchdog */
3757                if (txr->tx_avail == adapter->num_tx_desc) {
3758			txr->queue_status = IGB_QUEUE_IDLE;
3759			return (FALSE);
3760		}
3761        }
3762	return (TRUE);
3763}
3764
3765/*********************************************************************
3766 *
3767 *  Refresh mbuf buffers for RX descriptor rings
3768 *   - now keeps its own state so discards due to resource
3769 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3770 *     it just returns, keeping its placeholder, thus it can simply
3771 *     be recalled to try again.
3772 *
3773 **********************************************************************/
3774static void
3775igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3776{
3777	struct adapter		*adapter = rxr->adapter;
3778	bus_dma_segment_t	hseg[1];
3779	bus_dma_segment_t	pseg[1];
3780	struct igb_rx_buf	*rxbuf;
3781	struct mbuf		*mh, *mp;
3782	int			i, j, nsegs, error;
3783	bool			refreshed = FALSE;
3784
3785	i = j = rxr->next_to_refresh;
3786	/*
3787	** Get one descriptor beyond
3788	** our work mark to control
3789	** the loop.
3790        */
3791	if (++j == adapter->num_rx_desc)
3792		j = 0;
3793
3794	while (j != limit) {
3795		rxbuf = &rxr->rx_buffers[i];
3796		/* No hdr mbuf used with header split off */
3797		if (rxr->hdr_split == FALSE)
3798			goto no_split;
3799		if (rxbuf->m_head == NULL) {
3800			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3801			if (mh == NULL)
3802				goto update;
3803		} else
3804			mh = rxbuf->m_head;
3805
3806		mh->m_pkthdr.len = mh->m_len = MHLEN;
3807		mh->m_len = MHLEN;
3808		mh->m_flags |= M_PKTHDR;
3809		/* Get the memory mapping */
3810		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3811		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3812		if (error != 0) {
3813			printf("Refresh mbufs: hdr dmamap load"
3814			    " failure - %d\n", error);
3815			m_free(mh);
3816			rxbuf->m_head = NULL;
3817			goto update;
3818		}
3819		rxbuf->m_head = mh;
3820		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3821		    BUS_DMASYNC_PREREAD);
3822		rxr->rx_base[i].read.hdr_addr =
3823		    htole64(hseg[0].ds_addr);
3824no_split:
3825		if (rxbuf->m_pack == NULL) {
3826			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3827			    M_PKTHDR, adapter->rx_mbuf_sz);
3828			if (mp == NULL)
3829				goto update;
3830		} else
3831			mp = rxbuf->m_pack;
3832
3833		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3834		/* Get the memory mapping */
3835		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3836		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3837		if (error != 0) {
3838			printf("Refresh mbufs: payload dmamap load"
3839			    " failure - %d\n", error);
3840			m_free(mp);
3841			rxbuf->m_pack = NULL;
3842			goto update;
3843		}
3844		rxbuf->m_pack = mp;
3845		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3846		    BUS_DMASYNC_PREREAD);
3847		rxr->rx_base[i].read.pkt_addr =
3848		    htole64(pseg[0].ds_addr);
3849		refreshed = TRUE; /* I feel wefreshed :) */
3850
3851		i = j; /* our next is precalculated */
3852		rxr->next_to_refresh = i;
3853		if (++j == adapter->num_rx_desc)
3854			j = 0;
3855	}
3856update:
3857	if (refreshed) /* update tail */
3858		E1000_WRITE_REG(&adapter->hw,
3859		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3860	return;
3861}
3862
3863
3864/*********************************************************************
3865 *
3866 *  Allocate memory for rx_buffer structures. Since we use one
3867 *  rx_buffer per received packet, the maximum number of rx_buffer's
3868 *  that we'll need is equal to the number of receive descriptors
3869 *  that we've allocated.
3870 *
3871 **********************************************************************/
3872static int
3873igb_allocate_receive_buffers(struct rx_ring *rxr)
3874{
3875	struct	adapter 	*adapter = rxr->adapter;
3876	device_t 		dev = adapter->dev;
3877	struct igb_rx_buf	*rxbuf;
3878	int             	i, bsize, error;
3879
3880	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3881	if (!(rxr->rx_buffers =
3882	    (struct igb_rx_buf *) malloc(bsize,
3883	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3884		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3885		error = ENOMEM;
3886		goto fail;
3887	}
3888
3889	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3890				   1, 0,		/* alignment, bounds */
3891				   BUS_SPACE_MAXADDR,	/* lowaddr */
3892				   BUS_SPACE_MAXADDR,	/* highaddr */
3893				   NULL, NULL,		/* filter, filterarg */
3894				   MSIZE,		/* maxsize */
3895				   1,			/* nsegments */
3896				   MSIZE,		/* maxsegsize */
3897				   0,			/* flags */
3898				   NULL,		/* lockfunc */
3899				   NULL,		/* lockfuncarg */
3900				   &rxr->htag))) {
3901		device_printf(dev, "Unable to create RX DMA tag\n");
3902		goto fail;
3903	}
3904
3905	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3906				   1, 0,		/* alignment, bounds */
3907				   BUS_SPACE_MAXADDR,	/* lowaddr */
3908				   BUS_SPACE_MAXADDR,	/* highaddr */
3909				   NULL, NULL,		/* filter, filterarg */
3910				   MJUM9BYTES,		/* maxsize */
3911				   1,			/* nsegments */
3912				   MJUM9BYTES,		/* maxsegsize */
3913				   0,			/* flags */
3914				   NULL,		/* lockfunc */
3915				   NULL,		/* lockfuncarg */
3916				   &rxr->ptag))) {
3917		device_printf(dev, "Unable to create RX payload DMA tag\n");
3918		goto fail;
3919	}
3920
3921	for (i = 0; i < adapter->num_rx_desc; i++) {
3922		rxbuf = &rxr->rx_buffers[i];
3923		error = bus_dmamap_create(rxr->htag,
3924		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3925		if (error) {
3926			device_printf(dev,
3927			    "Unable to create RX head DMA maps\n");
3928			goto fail;
3929		}
3930		error = bus_dmamap_create(rxr->ptag,
3931		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3932		if (error) {
3933			device_printf(dev,
3934			    "Unable to create RX packet DMA maps\n");
3935			goto fail;
3936		}
3937	}
3938
3939	return (0);
3940
3941fail:
3942	/* Frees all, but can handle partial completion */
3943	igb_free_receive_structures(adapter);
3944	return (error);
3945}
3946
3947
3948static void
3949igb_free_receive_ring(struct rx_ring *rxr)
3950{
3951	struct	adapter		*adapter = rxr->adapter;
3952	struct igb_rx_buf	*rxbuf;
3953
3954
3955	for (int i = 0; i < adapter->num_rx_desc; i++) {
3956		rxbuf = &rxr->rx_buffers[i];
3957		if (rxbuf->m_head != NULL) {
3958			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3959			    BUS_DMASYNC_POSTREAD);
3960			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3961			rxbuf->m_head->m_flags |= M_PKTHDR;
3962			m_freem(rxbuf->m_head);
3963		}
3964		if (rxbuf->m_pack != NULL) {
3965			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3966			    BUS_DMASYNC_POSTREAD);
3967			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3968			rxbuf->m_pack->m_flags |= M_PKTHDR;
3969			m_freem(rxbuf->m_pack);
3970		}
3971		rxbuf->m_head = NULL;
3972		rxbuf->m_pack = NULL;
3973	}
3974}
3975
3976
3977/*********************************************************************
3978 *
3979 *  Initialize a receive ring and its buffers.
3980 *
3981 **********************************************************************/
3982static int
3983igb_setup_receive_ring(struct rx_ring *rxr)
3984{
3985	struct	adapter		*adapter;
3986	struct  ifnet		*ifp;
3987	device_t		dev;
3988	struct igb_rx_buf	*rxbuf;
3989	bus_dma_segment_t	pseg[1], hseg[1];
3990	struct lro_ctrl		*lro = &rxr->lro;
3991	int			rsize, nsegs, error = 0;
3992#ifdef DEV_NETMAP
3993	struct netmap_adapter *na = NA(rxr->adapter->ifp);
3994	struct netmap_slot *slot;
3995#endif /* DEV_NETMAP */
3996
3997	adapter = rxr->adapter;
3998	dev = adapter->dev;
3999	ifp = adapter->ifp;
4000
4001	/* Clear the ring contents */
4002	IGB_RX_LOCK(rxr);
4003#ifdef DEV_NETMAP
4004	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4005#endif /* DEV_NETMAP */
4006	rsize = roundup2(adapter->num_rx_desc *
4007	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4008	bzero((void *)rxr->rx_base, rsize);
4009
4010	/*
4011	** Free current RX buffer structures and their mbufs
4012	*/
4013	igb_free_receive_ring(rxr);
4014
4015	/* Configure for header split? */
4016	if (igb_header_split)
4017		rxr->hdr_split = TRUE;
4018
4019        /* Now replenish the ring mbufs */
4020	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4021		struct mbuf	*mh, *mp;
4022
4023		rxbuf = &rxr->rx_buffers[j];
4024#ifdef DEV_NETMAP
4025		if (slot) {
4026			/* slot sj is mapped to the i-th NIC-ring entry */
4027			int sj = j + na->rx_rings[rxr->me].nkr_hwofs;
4028			void *addr;
4029
4030			if (sj < 0)
4031				sj += na->num_rx_desc;
4032			addr = NMB(slot + sj);
4033			netmap_load_map(rxr->ptag,
4034			    rxbuf->pmap, addr, na->buff_size);
4035			/* Update descriptor */
4036			rxr->rx_base[j].read.pkt_addr = htole64(vtophys(addr));
4037			continue;
4038		}
4039#endif /* DEV_NETMAP */
4040		if (rxr->hdr_split == FALSE)
4041			goto skip_head;
4042
4043		/* First the header */
4044		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4045		if (rxbuf->m_head == NULL) {
4046			error = ENOBUFS;
4047                        goto fail;
4048		}
4049		m_adj(rxbuf->m_head, ETHER_ALIGN);
4050		mh = rxbuf->m_head;
4051		mh->m_len = mh->m_pkthdr.len = MHLEN;
4052		mh->m_flags |= M_PKTHDR;
4053		/* Get the memory mapping */
4054		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4055		    rxbuf->hmap, rxbuf->m_head, hseg,
4056		    &nsegs, BUS_DMA_NOWAIT);
4057		if (error != 0) /* Nothing elegant to do here */
4058                        goto fail;
4059		bus_dmamap_sync(rxr->htag,
4060		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4061		/* Update descriptor */
4062		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4063
4064skip_head:
4065		/* Now the payload cluster */
4066		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4067		    M_PKTHDR, adapter->rx_mbuf_sz);
4068		if (rxbuf->m_pack == NULL) {
4069			error = ENOBUFS;
4070                        goto fail;
4071		}
4072		mp = rxbuf->m_pack;
4073		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4074		/* Get the memory mapping */
4075		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4076		    rxbuf->pmap, mp, pseg,
4077		    &nsegs, BUS_DMA_NOWAIT);
4078		if (error != 0)
4079                        goto fail;
4080		bus_dmamap_sync(rxr->ptag,
4081		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4082		/* Update descriptor */
4083		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4084        }
4085
4086	/* Setup our descriptor indices */
4087	rxr->next_to_check = 0;
4088	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4089	rxr->lro_enabled = FALSE;
4090	rxr->rx_split_packets = 0;
4091	rxr->rx_bytes = 0;
4092
4093	rxr->fmp = NULL;
4094	rxr->lmp = NULL;
4095	rxr->discard = FALSE;
4096
4097	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4098	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4099
4100	/*
4101	** Now set up the LRO interface, we
4102	** also only do head split when LRO
4103	** is enabled, since so often they
4104	** are undesireable in similar setups.
4105	*/
4106	if (ifp->if_capenable & IFCAP_LRO) {
4107		error = tcp_lro_init(lro);
4108		if (error) {
4109			device_printf(dev, "LRO Initialization failed!\n");
4110			goto fail;
4111		}
4112		INIT_DEBUGOUT("RX LRO Initialized\n");
4113		rxr->lro_enabled = TRUE;
4114		lro->ifp = adapter->ifp;
4115	}
4116
4117	IGB_RX_UNLOCK(rxr);
4118	return (0);
4119
4120fail:
4121	igb_free_receive_ring(rxr);
4122	IGB_RX_UNLOCK(rxr);
4123	return (error);
4124}
4125
4126
4127/*********************************************************************
4128 *
4129 *  Initialize all receive rings.
4130 *
4131 **********************************************************************/
4132static int
4133igb_setup_receive_structures(struct adapter *adapter)
4134{
4135	struct rx_ring *rxr = adapter->rx_rings;
4136	int i;
4137
4138	for (i = 0; i < adapter->num_queues; i++, rxr++)
4139		if (igb_setup_receive_ring(rxr))
4140			goto fail;
4141
4142	return (0);
4143fail:
4144	/*
4145	 * Free RX buffers allocated so far, we will only handle
4146	 * the rings that completed, the failing case will have
4147	 * cleaned up for itself. 'i' is the endpoint.
4148	 */
4149	for (int j = 0; j > i; ++j) {
4150		rxr = &adapter->rx_rings[i];
4151		IGB_RX_LOCK(rxr);
4152		igb_free_receive_ring(rxr);
4153		IGB_RX_UNLOCK(rxr);
4154	}
4155
4156	return (ENOBUFS);
4157}
4158
4159/*********************************************************************
4160 *
4161 *  Enable receive unit.
4162 *
4163 **********************************************************************/
4164static void
4165igb_initialize_receive_units(struct adapter *adapter)
4166{
4167	struct rx_ring	*rxr = adapter->rx_rings;
4168	struct ifnet	*ifp = adapter->ifp;
4169	struct e1000_hw *hw = &adapter->hw;
4170	u32		rctl, rxcsum, psize, srrctl = 0;
4171
4172	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4173
4174	/*
4175	 * Make sure receives are disabled while setting
4176	 * up the descriptor ring
4177	 */
4178	rctl = E1000_READ_REG(hw, E1000_RCTL);
4179	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4180
4181	/*
4182	** Set up for header split
4183	*/
4184	if (igb_header_split) {
4185		/* Use a standard mbuf for the header */
4186		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4187		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4188	} else
4189		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4190
4191	/*
4192	** Set up for jumbo frames
4193	*/
4194	if (ifp->if_mtu > ETHERMTU) {
4195		rctl |= E1000_RCTL_LPE;
4196		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4197			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4198			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4199		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4200			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4201			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4202		}
4203		/* Set maximum packet len */
4204		psize = adapter->max_frame_size;
4205		/* are we on a vlan? */
4206		if (adapter->ifp->if_vlantrunk != NULL)
4207			psize += VLAN_TAG_SIZE;
4208		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4209	} else {
4210		rctl &= ~E1000_RCTL_LPE;
4211		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4212		rctl |= E1000_RCTL_SZ_2048;
4213	}
4214
4215	/* Setup the Base and Length of the Rx Descriptor Rings */
4216	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4217		u64 bus_addr = rxr->rxdma.dma_paddr;
4218		u32 rxdctl;
4219
4220		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4221		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4222		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4223		    (uint32_t)(bus_addr >> 32));
4224		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4225		    (uint32_t)bus_addr);
4226		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4227		/* Enable this Queue */
4228		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4229		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4230		rxdctl &= 0xFFF00000;
4231		rxdctl |= IGB_RX_PTHRESH;
4232		rxdctl |= IGB_RX_HTHRESH << 8;
4233		rxdctl |= IGB_RX_WTHRESH << 16;
4234		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4235	}
4236
4237	/*
4238	** Setup for RX MultiQueue
4239	*/
4240	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4241	if (adapter->num_queues >1) {
4242		u32 random[10], mrqc, shift = 0;
4243		union igb_reta {
4244			u32 dword;
4245			u8  bytes[4];
4246		} reta;
4247
4248		arc4rand(&random, sizeof(random), 0);
4249		if (adapter->hw.mac.type == e1000_82575)
4250			shift = 6;
4251		/* Warning FM follows */
4252		for (int i = 0; i < 128; i++) {
4253			reta.bytes[i & 3] =
4254			    (i % adapter->num_queues) << shift;
4255			if ((i & 3) == 3)
4256				E1000_WRITE_REG(hw,
4257				    E1000_RETA(i >> 2), reta.dword);
4258		}
4259		/* Now fill in hash table */
4260		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4261		for (int i = 0; i < 10; i++)
4262			E1000_WRITE_REG_ARRAY(hw,
4263			    E1000_RSSRK(0), i, random[i]);
4264
4265		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4266		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4267		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4268		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4269		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4270		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4271		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4272		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4273
4274		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4275
4276		/*
4277		** NOTE: Receive Full-Packet Checksum Offload
4278		** is mutually exclusive with Multiqueue. However
4279		** this is not the same as TCP/IP checksums which
4280		** still work.
4281		*/
4282		rxcsum |= E1000_RXCSUM_PCSD;
4283#if __FreeBSD_version >= 800000
4284		/* For SCTP Offload */
4285		if ((hw->mac.type == e1000_82576)
4286		    && (ifp->if_capenable & IFCAP_RXCSUM))
4287			rxcsum |= E1000_RXCSUM_CRCOFL;
4288#endif
4289	} else {
4290		/* Non RSS setup */
4291		if (ifp->if_capenable & IFCAP_RXCSUM) {
4292			rxcsum |= E1000_RXCSUM_IPPCSE;
4293#if __FreeBSD_version >= 800000
4294			if (adapter->hw.mac.type == e1000_82576)
4295				rxcsum |= E1000_RXCSUM_CRCOFL;
4296#endif
4297		} else
4298			rxcsum &= ~E1000_RXCSUM_TUOFL;
4299	}
4300	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4301
4302	/* Setup the Receive Control Register */
4303	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4304	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4305		   E1000_RCTL_RDMTS_HALF |
4306		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4307	/* Strip CRC bytes. */
4308	rctl |= E1000_RCTL_SECRC;
4309	/* Make sure VLAN Filters are off */
4310	rctl &= ~E1000_RCTL_VFE;
4311	/* Don't store bad packets */
4312	rctl &= ~E1000_RCTL_SBP;
4313
4314	/* Enable Receives */
4315	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4316
4317	/*
4318	 * Setup the HW Rx Head and Tail Descriptor Pointers
4319	 *   - needs to be after enable
4320	 */
4321	for (int i = 0; i < adapter->num_queues; i++) {
4322		rxr = &adapter->rx_rings[i];
4323		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4324#ifdef DEV_NETMAP
4325		/*
4326		 * an init() while a netmap client is active must
4327		 * preserve the rx buffers passed to userspace.
4328		 * In this driver it means we adjust RDT to
4329		 * somthing different from next_to_refresh
4330		 * (which is not used in netmap mode).
4331		 */
4332		if (ifp->if_capenable & IFCAP_NETMAP) {
4333			struct netmap_adapter *na = NA(adapter->ifp);
4334			struct netmap_kring *kring = &na->rx_rings[i];
4335			int t = rxr->next_to_refresh - kring->nr_hwavail;
4336
4337			if (t >= adapter->num_rx_desc)
4338				t -= adapter->num_rx_desc;
4339			else if (t < 0)
4340				t += adapter->num_rx_desc;
4341			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4342		} else
4343#endif /* DEV_NETMAP */
4344		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4345	}
4346	return;
4347}
4348
4349/*********************************************************************
4350 *
4351 *  Free receive rings.
4352 *
4353 **********************************************************************/
4354static void
4355igb_free_receive_structures(struct adapter *adapter)
4356{
4357	struct rx_ring *rxr = adapter->rx_rings;
4358
4359	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4360		struct lro_ctrl	*lro = &rxr->lro;
4361		igb_free_receive_buffers(rxr);
4362		tcp_lro_free(lro);
4363		igb_dma_free(adapter, &rxr->rxdma);
4364	}
4365
4366	free(adapter->rx_rings, M_DEVBUF);
4367}
4368
4369/*********************************************************************
4370 *
4371 *  Free receive ring data structures.
4372 *
4373 **********************************************************************/
4374static void
4375igb_free_receive_buffers(struct rx_ring *rxr)
4376{
4377	struct adapter		*adapter = rxr->adapter;
4378	struct igb_rx_buf	*rxbuf;
4379	int i;
4380
4381	INIT_DEBUGOUT("free_receive_structures: begin");
4382
4383	/* Cleanup any existing buffers */
4384	if (rxr->rx_buffers != NULL) {
4385		for (i = 0; i < adapter->num_rx_desc; i++) {
4386			rxbuf = &rxr->rx_buffers[i];
4387			if (rxbuf->m_head != NULL) {
4388				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4389				    BUS_DMASYNC_POSTREAD);
4390				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4391				rxbuf->m_head->m_flags |= M_PKTHDR;
4392				m_freem(rxbuf->m_head);
4393			}
4394			if (rxbuf->m_pack != NULL) {
4395				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4396				    BUS_DMASYNC_POSTREAD);
4397				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4398				rxbuf->m_pack->m_flags |= M_PKTHDR;
4399				m_freem(rxbuf->m_pack);
4400			}
4401			rxbuf->m_head = NULL;
4402			rxbuf->m_pack = NULL;
4403			if (rxbuf->hmap != NULL) {
4404				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4405				rxbuf->hmap = NULL;
4406			}
4407			if (rxbuf->pmap != NULL) {
4408				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4409				rxbuf->pmap = NULL;
4410			}
4411		}
4412		if (rxr->rx_buffers != NULL) {
4413			free(rxr->rx_buffers, M_DEVBUF);
4414			rxr->rx_buffers = NULL;
4415		}
4416	}
4417
4418	if (rxr->htag != NULL) {
4419		bus_dma_tag_destroy(rxr->htag);
4420		rxr->htag = NULL;
4421	}
4422	if (rxr->ptag != NULL) {
4423		bus_dma_tag_destroy(rxr->ptag);
4424		rxr->ptag = NULL;
4425	}
4426}
4427
4428static __inline void
4429igb_rx_discard(struct rx_ring *rxr, int i)
4430{
4431	struct igb_rx_buf	*rbuf;
4432
4433	rbuf = &rxr->rx_buffers[i];
4434
4435	/* Partially received? Free the chain */
4436	if (rxr->fmp != NULL) {
4437		rxr->fmp->m_flags |= M_PKTHDR;
4438		m_freem(rxr->fmp);
4439		rxr->fmp = NULL;
4440		rxr->lmp = NULL;
4441	}
4442
4443	/*
4444	** With advanced descriptors the writeback
4445	** clobbers the buffer addrs, so its easier
4446	** to just free the existing mbufs and take
4447	** the normal refresh path to get new buffers
4448	** and mapping.
4449	*/
4450	if (rbuf->m_head) {
4451		m_free(rbuf->m_head);
4452		rbuf->m_head = NULL;
4453	}
4454
4455	if (rbuf->m_pack) {
4456		m_free(rbuf->m_pack);
4457		rbuf->m_pack = NULL;
4458	}
4459
4460	return;
4461}
4462
4463static __inline void
4464igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4465{
4466
4467	/*
4468	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4469	 * should be computed by hardware. Also it should not have VLAN tag in
4470	 * ethernet header.
4471	 */
4472	if (rxr->lro_enabled &&
4473	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4474	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4475	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4476	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4477	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4478	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4479		/*
4480		 * Send to the stack if:
4481		 **  - LRO not enabled, or
4482		 **  - no LRO resources, or
4483		 **  - lro enqueue fails
4484		 */
4485		if (rxr->lro.lro_cnt != 0)
4486			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4487				return;
4488	}
4489	IGB_RX_UNLOCK(rxr);
4490	(*ifp->if_input)(ifp, m);
4491	IGB_RX_LOCK(rxr);
4492}
4493
4494/*********************************************************************
4495 *
4496 *  This routine executes in interrupt context. It replenishes
4497 *  the mbufs in the descriptor and sends data which has been
4498 *  dma'ed into host memory to upper layer.
4499 *
4500 *  We loop at most count times if count is > 0, or until done if
4501 *  count < 0.
4502 *
4503 *  Return TRUE if more to clean, FALSE otherwise
4504 *********************************************************************/
4505static bool
4506igb_rxeof(struct igb_queue *que, int count, int *done)
4507{
4508	struct adapter		*adapter = que->adapter;
4509	struct rx_ring		*rxr = que->rxr;
4510	struct ifnet		*ifp = adapter->ifp;
4511	struct lro_ctrl		*lro = &rxr->lro;
4512	struct lro_entry	*queued;
4513	int			i, processed = 0, rxdone = 0;
4514	u32			ptype, staterr = 0;
4515	union e1000_adv_rx_desc	*cur;
4516
4517	IGB_RX_LOCK(rxr);
4518	/* Sync the ring. */
4519	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4520	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4521
4522#ifdef DEV_NETMAP
4523	if (ifp->if_capenable & IFCAP_NETMAP) {
4524		struct netmap_adapter *na = NA(ifp);
4525
4526		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4527		IGB_RX_UNLOCK(rxr);
4528		IGB_CORE_LOCK(adapter);
4529		selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
4530		IGB_CORE_UNLOCK(adapter);
4531		return (0);
4532	}
4533#endif /* DEV_NETMAP */
4534
4535	/* Main clean loop */
4536	for (i = rxr->next_to_check; count != 0;) {
4537		struct mbuf		*sendmp, *mh, *mp;
4538		struct igb_rx_buf	*rxbuf;
4539		u16			hlen, plen, hdr, vtag;
4540		bool			eop = FALSE;
4541
4542		cur = &rxr->rx_base[i];
4543		staterr = le32toh(cur->wb.upper.status_error);
4544		if ((staterr & E1000_RXD_STAT_DD) == 0)
4545			break;
4546		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4547			break;
4548		count--;
4549		sendmp = mh = mp = NULL;
4550		cur->wb.upper.status_error = 0;
4551		rxbuf = &rxr->rx_buffers[i];
4552		plen = le16toh(cur->wb.upper.length);
4553		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4554		if ((adapter->hw.mac.type == e1000_i350) &&
4555		    (staterr & E1000_RXDEXT_STATERR_LB))
4556			vtag = be16toh(cur->wb.upper.vlan);
4557		else
4558			vtag = le16toh(cur->wb.upper.vlan);
4559		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4560		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4561
4562		/* Make sure all segments of a bad packet are discarded */
4563		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4564		    (rxr->discard)) {
4565			ifp->if_ierrors++;
4566			++rxr->rx_discarded;
4567			if (!eop) /* Catch subsequent segs */
4568				rxr->discard = TRUE;
4569			else
4570				rxr->discard = FALSE;
4571			igb_rx_discard(rxr, i);
4572			goto next_desc;
4573		}
4574
4575		/*
4576		** The way the hardware is configured to
4577		** split, it will ONLY use the header buffer
4578		** when header split is enabled, otherwise we
4579		** get normal behavior, ie, both header and
4580		** payload are DMA'd into the payload buffer.
4581		**
4582		** The fmp test is to catch the case where a
4583		** packet spans multiple descriptors, in that
4584		** case only the first header is valid.
4585		*/
4586		if (rxr->hdr_split && rxr->fmp == NULL) {
4587			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4588			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4589			if (hlen > IGB_HDR_BUF)
4590				hlen = IGB_HDR_BUF;
4591			mh = rxr->rx_buffers[i].m_head;
4592			mh->m_len = hlen;
4593			/* clear buf pointer for refresh */
4594			rxbuf->m_head = NULL;
4595			/*
4596			** Get the payload length, this
4597			** could be zero if its a small
4598			** packet.
4599			*/
4600			if (plen > 0) {
4601				mp = rxr->rx_buffers[i].m_pack;
4602				mp->m_len = plen;
4603				mh->m_next = mp;
4604				/* clear buf pointer */
4605				rxbuf->m_pack = NULL;
4606				rxr->rx_split_packets++;
4607			}
4608		} else {
4609			/*
4610			** Either no header split, or a
4611			** secondary piece of a fragmented
4612			** split packet.
4613			*/
4614			mh = rxr->rx_buffers[i].m_pack;
4615			mh->m_len = plen;
4616			/* clear buf info for refresh */
4617			rxbuf->m_pack = NULL;
4618		}
4619
4620		++processed; /* So we know when to refresh */
4621
4622		/* Initial frame - setup */
4623		if (rxr->fmp == NULL) {
4624			mh->m_pkthdr.len = mh->m_len;
4625			/* Save the head of the chain */
4626			rxr->fmp = mh;
4627			rxr->lmp = mh;
4628			if (mp != NULL) {
4629				/* Add payload if split */
4630				mh->m_pkthdr.len += mp->m_len;
4631				rxr->lmp = mh->m_next;
4632			}
4633		} else {
4634			/* Chain mbuf's together */
4635			rxr->lmp->m_next = mh;
4636			rxr->lmp = rxr->lmp->m_next;
4637			rxr->fmp->m_pkthdr.len += mh->m_len;
4638		}
4639
4640		if (eop) {
4641			rxr->fmp->m_pkthdr.rcvif = ifp;
4642			ifp->if_ipackets++;
4643			rxr->rx_packets++;
4644			/* capture data for AIM */
4645			rxr->packets++;
4646			rxr->bytes += rxr->fmp->m_pkthdr.len;
4647			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4648
4649			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4650				igb_rx_checksum(staterr, rxr->fmp, ptype);
4651
4652			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4653			    (staterr & E1000_RXD_STAT_VP) != 0) {
4654				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4655				rxr->fmp->m_flags |= M_VLANTAG;
4656			}
4657#if __FreeBSD_version >= 800000
4658			rxr->fmp->m_pkthdr.flowid = que->msix;
4659			rxr->fmp->m_flags |= M_FLOWID;
4660#endif
4661			sendmp = rxr->fmp;
4662			/* Make sure to set M_PKTHDR. */
4663			sendmp->m_flags |= M_PKTHDR;
4664			rxr->fmp = NULL;
4665			rxr->lmp = NULL;
4666		}
4667
4668next_desc:
4669		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4670		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4671
4672		/* Advance our pointers to the next descriptor. */
4673		if (++i == adapter->num_rx_desc)
4674			i = 0;
4675		/*
4676		** Send to the stack or LRO
4677		*/
4678		if (sendmp != NULL) {
4679			rxr->next_to_check = i;
4680			igb_rx_input(rxr, ifp, sendmp, ptype);
4681			i = rxr->next_to_check;
4682			rxdone++;
4683		}
4684
4685		/* Every 8 descriptors we go to refresh mbufs */
4686		if (processed == 8) {
4687                        igb_refresh_mbufs(rxr, i);
4688                        processed = 0;
4689		}
4690	}
4691
4692	/* Catch any remainders */
4693	if (igb_rx_unrefreshed(rxr))
4694		igb_refresh_mbufs(rxr, i);
4695
4696	rxr->next_to_check = i;
4697
4698	/*
4699	 * Flush any outstanding LRO work
4700	 */
4701	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4702		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4703		tcp_lro_flush(lro, queued);
4704	}
4705
4706	if (done != NULL)
4707		*done = rxdone;
4708
4709	IGB_RX_UNLOCK(rxr);
4710	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4711}
4712
4713/*********************************************************************
4714 *
4715 *  Verify that the hardware indicated that the checksum is valid.
4716 *  Inform the stack about the status of checksum so that stack
4717 *  doesn't spend time verifying the checksum.
4718 *
4719 *********************************************************************/
4720static void
4721igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4722{
4723	u16 status = (u16)staterr;
4724	u8  errors = (u8) (staterr >> 24);
4725	int sctp;
4726
4727	/* Ignore Checksum bit is set */
4728	if (status & E1000_RXD_STAT_IXSM) {
4729		mp->m_pkthdr.csum_flags = 0;
4730		return;
4731	}
4732
4733	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4734	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4735		sctp = 1;
4736	else
4737		sctp = 0;
4738	if (status & E1000_RXD_STAT_IPCS) {
4739		/* Did it pass? */
4740		if (!(errors & E1000_RXD_ERR_IPE)) {
4741			/* IP Checksum Good */
4742			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4743			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4744		} else
4745			mp->m_pkthdr.csum_flags = 0;
4746	}
4747
4748	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4749		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4750#if __FreeBSD_version >= 800000
4751		if (sctp) /* reassign */
4752			type = CSUM_SCTP_VALID;
4753#endif
4754		/* Did it pass? */
4755		if (!(errors & E1000_RXD_ERR_TCPE)) {
4756			mp->m_pkthdr.csum_flags |= type;
4757			if (sctp == 0)
4758				mp->m_pkthdr.csum_data = htons(0xffff);
4759		}
4760	}
4761	return;
4762}
4763
4764/*
4765 * This routine is run via an vlan
4766 * config EVENT
4767 */
4768static void
4769igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4770{
4771	struct adapter	*adapter = ifp->if_softc;
4772	u32		index, bit;
4773
4774	if (ifp->if_softc !=  arg)   /* Not our event */
4775		return;
4776
4777	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4778                return;
4779
4780	IGB_CORE_LOCK(adapter);
4781	index = (vtag >> 5) & 0x7F;
4782	bit = vtag & 0x1F;
4783	adapter->shadow_vfta[index] |= (1 << bit);
4784	++adapter->num_vlans;
4785	/* Change hw filter setting */
4786	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4787		igb_setup_vlan_hw_support(adapter);
4788	IGB_CORE_UNLOCK(adapter);
4789}
4790
4791/*
4792 * This routine is run via an vlan
4793 * unconfig EVENT
4794 */
4795static void
4796igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4797{
4798	struct adapter	*adapter = ifp->if_softc;
4799	u32		index, bit;
4800
4801	if (ifp->if_softc !=  arg)
4802		return;
4803
4804	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4805                return;
4806
4807	IGB_CORE_LOCK(adapter);
4808	index = (vtag >> 5) & 0x7F;
4809	bit = vtag & 0x1F;
4810	adapter->shadow_vfta[index] &= ~(1 << bit);
4811	--adapter->num_vlans;
4812	/* Change hw filter setting */
4813	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4814		igb_setup_vlan_hw_support(adapter);
4815	IGB_CORE_UNLOCK(adapter);
4816}
4817
4818static void
4819igb_setup_vlan_hw_support(struct adapter *adapter)
4820{
4821	struct e1000_hw *hw = &adapter->hw;
4822	struct ifnet	*ifp = adapter->ifp;
4823	u32             reg;
4824
4825	if (adapter->vf_ifp) {
4826		e1000_rlpml_set_vf(hw,
4827		    adapter->max_frame_size + VLAN_TAG_SIZE);
4828		return;
4829	}
4830
4831	reg = E1000_READ_REG(hw, E1000_CTRL);
4832	reg |= E1000_CTRL_VME;
4833	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4834
4835	/* Enable the Filter Table */
4836	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4837		reg = E1000_READ_REG(hw, E1000_RCTL);
4838		reg &= ~E1000_RCTL_CFIEN;
4839		reg |= E1000_RCTL_VFE;
4840		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4841	}
4842
4843	/* Update the frame size */
4844	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4845	    adapter->max_frame_size + VLAN_TAG_SIZE);
4846
4847	/* Don't bother with table if no vlans */
4848	if ((adapter->num_vlans == 0) ||
4849	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4850                return;
4851	/*
4852	** A soft reset zero's out the VFTA, so
4853	** we need to repopulate it now.
4854	*/
4855	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4856                if (adapter->shadow_vfta[i] != 0) {
4857			if (adapter->vf_ifp)
4858				e1000_vfta_set_vf(hw,
4859				    adapter->shadow_vfta[i], TRUE);
4860			else
4861				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4862                           	 i, adapter->shadow_vfta[i]);
4863		}
4864}
4865
4866static void
4867igb_enable_intr(struct adapter *adapter)
4868{
4869	/* With RSS set up what to auto clear */
4870	if (adapter->msix_mem) {
4871		u32 mask = (adapter->que_mask | adapter->link_mask);
4872		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4873		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4874		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4875		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4876		    E1000_IMS_LSC);
4877	} else {
4878		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4879		    IMS_ENABLE_MASK);
4880	}
4881	E1000_WRITE_FLUSH(&adapter->hw);
4882
4883	return;
4884}
4885
4886static void
4887igb_disable_intr(struct adapter *adapter)
4888{
4889	if (adapter->msix_mem) {
4890		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4891		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4892	}
4893	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4894	E1000_WRITE_FLUSH(&adapter->hw);
4895	return;
4896}
4897
4898/*
4899 * Bit of a misnomer, what this really means is
4900 * to enable OS management of the system... aka
4901 * to disable special hardware management features
4902 */
4903static void
4904igb_init_manageability(struct adapter *adapter)
4905{
4906	if (adapter->has_manage) {
4907		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4908		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4909
4910		/* disable hardware interception of ARP */
4911		manc &= ~(E1000_MANC_ARP_EN);
4912
4913                /* enable receiving management packets to the host */
4914		manc |= E1000_MANC_EN_MNG2HOST;
4915		manc2h |= 1 << 5;  /* Mng Port 623 */
4916		manc2h |= 1 << 6;  /* Mng Port 664 */
4917		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4918		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4919	}
4920}
4921
4922/*
4923 * Give control back to hardware management
4924 * controller if there is one.
4925 */
4926static void
4927igb_release_manageability(struct adapter *adapter)
4928{
4929	if (adapter->has_manage) {
4930		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4931
4932		/* re-enable hardware interception of ARP */
4933		manc |= E1000_MANC_ARP_EN;
4934		manc &= ~E1000_MANC_EN_MNG2HOST;
4935
4936		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4937	}
4938}
4939
4940/*
4941 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4942 * For ASF and Pass Through versions of f/w this means that
4943 * the driver is loaded.
4944 *
4945 */
4946static void
4947igb_get_hw_control(struct adapter *adapter)
4948{
4949	u32 ctrl_ext;
4950
4951	if (adapter->vf_ifp)
4952		return;
4953
4954	/* Let firmware know the driver has taken over */
4955	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4956	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4957	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4958}
4959
4960/*
4961 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4962 * For ASF and Pass Through versions of f/w this means that the
4963 * driver is no longer loaded.
4964 *
4965 */
4966static void
4967igb_release_hw_control(struct adapter *adapter)
4968{
4969	u32 ctrl_ext;
4970
4971	if (adapter->vf_ifp)
4972		return;
4973
4974	/* Let firmware taken over control of h/w */
4975	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4976	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4977	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4978}
4979
4980static int
4981igb_is_valid_ether_addr(uint8_t *addr)
4982{
4983	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4984
4985	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4986		return (FALSE);
4987	}
4988
4989	return (TRUE);
4990}
4991
4992
4993/*
4994 * Enable PCI Wake On Lan capability
4995 */
4996static void
4997igb_enable_wakeup(device_t dev)
4998{
4999	u16     cap, status;
5000	u8      id;
5001
5002	/* First find the capabilities pointer*/
5003	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5004	/* Read the PM Capabilities */
5005	id = pci_read_config(dev, cap, 1);
5006	if (id != PCIY_PMG)     /* Something wrong */
5007		return;
5008	/* OK, we have the power capabilities, so
5009	   now get the status register */
5010	cap += PCIR_POWER_STATUS;
5011	status = pci_read_config(dev, cap, 2);
5012	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5013	pci_write_config(dev, cap, status, 2);
5014	return;
5015}
5016
5017static void
5018igb_led_func(void *arg, int onoff)
5019{
5020	struct adapter	*adapter = arg;
5021
5022	IGB_CORE_LOCK(adapter);
5023	if (onoff) {
5024		e1000_setup_led(&adapter->hw);
5025		e1000_led_on(&adapter->hw);
5026	} else {
5027		e1000_led_off(&adapter->hw);
5028		e1000_cleanup_led(&adapter->hw);
5029	}
5030	IGB_CORE_UNLOCK(adapter);
5031}
5032
5033/**********************************************************************
5034 *
5035 *  Update the board statistics counters.
5036 *
5037 **********************************************************************/
5038static void
5039igb_update_stats_counters(struct adapter *adapter)
5040{
5041	struct ifnet		*ifp;
5042        struct e1000_hw		*hw = &adapter->hw;
5043	struct e1000_hw_stats	*stats;
5044
5045	/*
5046	** The virtual function adapter has only a
5047	** small controlled set of stats, do only
5048	** those and return.
5049	*/
5050	if (adapter->vf_ifp) {
5051		igb_update_vf_stats_counters(adapter);
5052		return;
5053	}
5054
5055	stats = (struct e1000_hw_stats	*)adapter->stats;
5056
5057	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5058	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5059		stats->symerrs +=
5060		    E1000_READ_REG(hw,E1000_SYMERRS);
5061		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5062	}
5063
5064	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5065	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5066	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5067	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5068
5069	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5070	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5071	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5072	stats->dc += E1000_READ_REG(hw, E1000_DC);
5073	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5074	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5075	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5076	/*
5077	** For watchdog management we need to know if we have been
5078	** paused during the last interval, so capture that here.
5079	*/
5080        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5081        stats->xoffrxc += adapter->pause_frames;
5082	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5083	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5084	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5085	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5086	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5087	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5088	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5089	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5090	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5091	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5092	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5093	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5094
5095	/* For the 64-bit byte counters the low dword must be read first. */
5096	/* Both registers clear on the read of the high dword */
5097
5098	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5099	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5100	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5101	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5102
5103	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5104	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5105	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5106	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5107	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5108
5109	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5110	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5111
5112	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5113	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5114	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5115	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5116	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5117	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5118	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5119	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5120	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5121	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5122
5123	/* Interrupt Counts */
5124
5125	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5126	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5127	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5128	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5129	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5130	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5131	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5132	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5133	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5134
5135	/* Host to Card Statistics */
5136
5137	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5138	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5139	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5140	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5141	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5142	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5143	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5144	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5145	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5146	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5147	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5148	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5149	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5150	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5151
5152	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5153	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5154	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5155	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5156	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5157	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5158
5159	ifp = adapter->ifp;
5160	ifp->if_collisions = stats->colc;
5161
5162	/* Rx Errors */
5163	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5164	    stats->crcerrs + stats->algnerrc +
5165	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5166
5167	/* Tx Errors */
5168	ifp->if_oerrors = stats->ecol +
5169	    stats->latecol + adapter->watchdog_events;
5170
5171	/* Driver specific counters */
5172	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5173	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5174	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5175	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5176	adapter->packet_buf_alloc_tx =
5177	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5178	adapter->packet_buf_alloc_rx =
5179	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5180}
5181
5182
5183/**********************************************************************
5184 *
5185 *  Initialize the VF board statistics counters.
5186 *
5187 **********************************************************************/
5188static void
5189igb_vf_init_stats(struct adapter *adapter)
5190{
5191        struct e1000_hw *hw = &adapter->hw;
5192	struct e1000_vf_stats	*stats;
5193
5194	stats = (struct e1000_vf_stats	*)adapter->stats;
5195	if (stats == NULL)
5196		return;
5197        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5198        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5199        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5200        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5201        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5202}
5203
5204/**********************************************************************
5205 *
5206 *  Update the VF board statistics counters.
5207 *
5208 **********************************************************************/
5209static void
5210igb_update_vf_stats_counters(struct adapter *adapter)
5211{
5212	struct e1000_hw *hw = &adapter->hw;
5213	struct e1000_vf_stats	*stats;
5214
5215	if (adapter->link_speed == 0)
5216		return;
5217
5218	stats = (struct e1000_vf_stats	*)adapter->stats;
5219
5220	UPDATE_VF_REG(E1000_VFGPRC,
5221	    stats->last_gprc, stats->gprc);
5222	UPDATE_VF_REG(E1000_VFGORC,
5223	    stats->last_gorc, stats->gorc);
5224	UPDATE_VF_REG(E1000_VFGPTC,
5225	    stats->last_gptc, stats->gptc);
5226	UPDATE_VF_REG(E1000_VFGOTC,
5227	    stats->last_gotc, stats->gotc);
5228	UPDATE_VF_REG(E1000_VFMPRC,
5229	    stats->last_mprc, stats->mprc);
5230}
5231
5232/* Export a single 32-bit register via a read-only sysctl. */
5233static int
5234igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5235{
5236	struct adapter *adapter;
5237	u_int val;
5238
5239	adapter = oidp->oid_arg1;
5240	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5241	return (sysctl_handle_int(oidp, &val, 0, req));
5242}
5243
5244/*
5245**  Tuneable interrupt rate handler
5246*/
5247static int
5248igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5249{
5250	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5251	int			error;
5252	u32			reg, usec, rate;
5253
5254	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5255	usec = ((reg & 0x7FFC) >> 2);
5256	if (usec > 0)
5257		rate = 1000000 / usec;
5258	else
5259		rate = 0;
5260	error = sysctl_handle_int(oidp, &rate, 0, req);
5261	if (error || !req->newptr)
5262		return error;
5263	return 0;
5264}
5265
5266/*
5267 * Add sysctl variables, one per statistic, to the system.
5268 */
5269static void
5270igb_add_hw_stats(struct adapter *adapter)
5271{
5272	device_t dev = adapter->dev;
5273
5274	struct tx_ring *txr = adapter->tx_rings;
5275	struct rx_ring *rxr = adapter->rx_rings;
5276
5277	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5278	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5279	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5280	struct e1000_hw_stats *stats = adapter->stats;
5281
5282	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5283	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5284
5285#define QUEUE_NAME_LEN 32
5286	char namebuf[QUEUE_NAME_LEN];
5287
5288	/* Driver Statistics */
5289	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5290			CTLFLAG_RD, &adapter->link_irq, 0,
5291			"Link MSIX IRQ Handled");
5292	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5293			CTLFLAG_RD, &adapter->dropped_pkts,
5294			"Driver dropped packets");
5295	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5296			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5297			"Driver tx dma failure in xmit");
5298	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5299			CTLFLAG_RD, &adapter->rx_overruns,
5300			"RX overruns");
5301	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5302			CTLFLAG_RD, &adapter->watchdog_events,
5303			"Watchdog timeouts");
5304
5305	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5306			CTLFLAG_RD, &adapter->device_control,
5307			"Device Control Register");
5308	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5309			CTLFLAG_RD, &adapter->rx_control,
5310			"Receiver Control Register");
5311	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5312			CTLFLAG_RD, &adapter->int_mask,
5313			"Interrupt Mask");
5314	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5315			CTLFLAG_RD, &adapter->eint_mask,
5316			"Extended Interrupt Mask");
5317	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5318			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5319			"Transmit Buffer Packet Allocation");
5320	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5321			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5322			"Receive Buffer Packet Allocation");
5323	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5324			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5325			"Flow Control High Watermark");
5326	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5327			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5328			"Flow Control Low Watermark");
5329
5330	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5331		struct lro_ctrl *lro = &rxr->lro;
5332
5333		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5334		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5335					    CTLFLAG_RD, NULL, "Queue Name");
5336		queue_list = SYSCTL_CHILDREN(queue_node);
5337
5338		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5339				CTLFLAG_RD, &adapter->queues[i],
5340				sizeof(&adapter->queues[i]),
5341				igb_sysctl_interrupt_rate_handler,
5342				"IU", "Interrupt Rate");
5343
5344		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5345				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5346				igb_sysctl_reg_handler, "IU",
5347 				"Transmit Descriptor Head");
5348		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5349				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5350				igb_sysctl_reg_handler, "IU",
5351 				"Transmit Descriptor Tail");
5352		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5353				CTLFLAG_RD, &txr->no_desc_avail,
5354				"Queue No Descriptor Available");
5355		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5356				CTLFLAG_RD, &txr->tx_packets,
5357				"Queue Packets Transmitted");
5358
5359		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5360				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5361				igb_sysctl_reg_handler, "IU",
5362				"Receive Descriptor Head");
5363		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5364				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5365				igb_sysctl_reg_handler, "IU",
5366				"Receive Descriptor Tail");
5367		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5368				CTLFLAG_RD, &rxr->rx_packets,
5369				"Queue Packets Received");
5370		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5371				CTLFLAG_RD, &rxr->rx_bytes,
5372				"Queue Bytes Received");
5373		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5374				CTLFLAG_RD, &lro->lro_queued, 0,
5375				"LRO Queued");
5376		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5377				CTLFLAG_RD, &lro->lro_flushed, 0,
5378				"LRO Flushed");
5379	}
5380
5381	/* MAC stats get their own sub node */
5382
5383	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5384				    CTLFLAG_RD, NULL, "MAC Statistics");
5385	stat_list = SYSCTL_CHILDREN(stat_node);
5386
5387	/*
5388	** VF adapter has a very limited set of stats
5389	** since its not managing the metal, so to speak.
5390	*/
5391	if (adapter->vf_ifp) {
5392	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5393			CTLFLAG_RD, &stats->gprc,
5394			"Good Packets Received");
5395	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5396			CTLFLAG_RD, &stats->gptc,
5397			"Good Packets Transmitted");
5398 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5399 			CTLFLAG_RD, &stats->gorc,
5400 			"Good Octets Received");
5401 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5402 			CTLFLAG_RD, &stats->gotc,
5403 			"Good Octets Transmitted");
5404	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5405			CTLFLAG_RD, &stats->mprc,
5406			"Multicast Packets Received");
5407		return;
5408	}
5409
5410	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5411			CTLFLAG_RD, &stats->ecol,
5412			"Excessive collisions");
5413	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5414			CTLFLAG_RD, &stats->scc,
5415			"Single collisions");
5416	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5417			CTLFLAG_RD, &stats->mcc,
5418			"Multiple collisions");
5419	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5420			CTLFLAG_RD, &stats->latecol,
5421			"Late collisions");
5422	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5423			CTLFLAG_RD, &stats->colc,
5424			"Collision Count");
5425	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5426			CTLFLAG_RD, &stats->symerrs,
5427			"Symbol Errors");
5428	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5429			CTLFLAG_RD, &stats->sec,
5430			"Sequence Errors");
5431	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5432			CTLFLAG_RD, &stats->dc,
5433			"Defer Count");
5434	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5435			CTLFLAG_RD, &stats->mpc,
5436			"Missed Packets");
5437	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5438			CTLFLAG_RD, &stats->rnbc,
5439			"Receive No Buffers");
5440	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5441			CTLFLAG_RD, &stats->ruc,
5442			"Receive Undersize");
5443	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5444			CTLFLAG_RD, &stats->rfc,
5445			"Fragmented Packets Received ");
5446	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5447			CTLFLAG_RD, &stats->roc,
5448			"Oversized Packets Received");
5449	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5450			CTLFLAG_RD, &stats->rjc,
5451			"Recevied Jabber");
5452	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5453			CTLFLAG_RD, &stats->rxerrc,
5454			"Receive Errors");
5455	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5456			CTLFLAG_RD, &stats->crcerrs,
5457			"CRC errors");
5458	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5459			CTLFLAG_RD, &stats->algnerrc,
5460			"Alignment Errors");
5461	/* On 82575 these are collision counts */
5462	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5463			CTLFLAG_RD, &stats->cexterr,
5464			"Collision/Carrier extension errors");
5465	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5466			CTLFLAG_RD, &stats->xonrxc,
5467			"XON Received");
5468	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5469			CTLFLAG_RD, &stats->xontxc,
5470			"XON Transmitted");
5471	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5472			CTLFLAG_RD, &stats->xoffrxc,
5473			"XOFF Received");
5474	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5475			CTLFLAG_RD, &stats->xofftxc,
5476			"XOFF Transmitted");
5477	/* Packet Reception Stats */
5478	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5479			CTLFLAG_RD, &stats->tpr,
5480			"Total Packets Received ");
5481	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5482			CTLFLAG_RD, &stats->gprc,
5483			"Good Packets Received");
5484	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5485			CTLFLAG_RD, &stats->bprc,
5486			"Broadcast Packets Received");
5487	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5488			CTLFLAG_RD, &stats->mprc,
5489			"Multicast Packets Received");
5490	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5491			CTLFLAG_RD, &stats->prc64,
5492			"64 byte frames received ");
5493	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5494			CTLFLAG_RD, &stats->prc127,
5495			"65-127 byte frames received");
5496	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5497			CTLFLAG_RD, &stats->prc255,
5498			"128-255 byte frames received");
5499	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5500			CTLFLAG_RD, &stats->prc511,
5501			"256-511 byte frames received");
5502	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5503			CTLFLAG_RD, &stats->prc1023,
5504			"512-1023 byte frames received");
5505	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5506			CTLFLAG_RD, &stats->prc1522,
5507			"1023-1522 byte frames received");
5508 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5509 			CTLFLAG_RD, &stats->gorc,
5510 			"Good Octets Received");
5511
5512	/* Packet Transmission Stats */
5513 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5514 			CTLFLAG_RD, &stats->gotc,
5515 			"Good Octets Transmitted");
5516	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5517			CTLFLAG_RD, &stats->tpt,
5518			"Total Packets Transmitted");
5519	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5520			CTLFLAG_RD, &stats->gptc,
5521			"Good Packets Transmitted");
5522	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5523			CTLFLAG_RD, &stats->bptc,
5524			"Broadcast Packets Transmitted");
5525	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5526			CTLFLAG_RD, &stats->mptc,
5527			"Multicast Packets Transmitted");
5528	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5529			CTLFLAG_RD, &stats->ptc64,
5530			"64 byte frames transmitted ");
5531	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5532			CTLFLAG_RD, &stats->ptc127,
5533			"65-127 byte frames transmitted");
5534	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5535			CTLFLAG_RD, &stats->ptc255,
5536			"128-255 byte frames transmitted");
5537	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5538			CTLFLAG_RD, &stats->ptc511,
5539			"256-511 byte frames transmitted");
5540	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5541			CTLFLAG_RD, &stats->ptc1023,
5542			"512-1023 byte frames transmitted");
5543	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5544			CTLFLAG_RD, &stats->ptc1522,
5545			"1024-1522 byte frames transmitted");
5546	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5547			CTLFLAG_RD, &stats->tsctc,
5548			"TSO Contexts Transmitted");
5549	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5550			CTLFLAG_RD, &stats->tsctfc,
5551			"TSO Contexts Failed");
5552
5553
5554	/* Interrupt Stats */
5555
5556	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5557				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5558	int_list = SYSCTL_CHILDREN(int_node);
5559
5560	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5561			CTLFLAG_RD, &stats->iac,
5562			"Interrupt Assertion Count");
5563
5564	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5565			CTLFLAG_RD, &stats->icrxptc,
5566			"Interrupt Cause Rx Pkt Timer Expire Count");
5567
5568	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5569			CTLFLAG_RD, &stats->icrxatc,
5570			"Interrupt Cause Rx Abs Timer Expire Count");
5571
5572	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5573			CTLFLAG_RD, &stats->ictxptc,
5574			"Interrupt Cause Tx Pkt Timer Expire Count");
5575
5576	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5577			CTLFLAG_RD, &stats->ictxatc,
5578			"Interrupt Cause Tx Abs Timer Expire Count");
5579
5580	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5581			CTLFLAG_RD, &stats->ictxqec,
5582			"Interrupt Cause Tx Queue Empty Count");
5583
5584	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5585			CTLFLAG_RD, &stats->ictxqmtc,
5586			"Interrupt Cause Tx Queue Min Thresh Count");
5587
5588	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5589			CTLFLAG_RD, &stats->icrxdmtc,
5590			"Interrupt Cause Rx Desc Min Thresh Count");
5591
5592	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5593			CTLFLAG_RD, &stats->icrxoc,
5594			"Interrupt Cause Receiver Overrun Count");
5595
5596	/* Host to Card Stats */
5597
5598	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5599				    CTLFLAG_RD, NULL,
5600				    "Host to Card Statistics");
5601
5602	host_list = SYSCTL_CHILDREN(host_node);
5603
5604	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5605			CTLFLAG_RD, &stats->cbtmpc,
5606			"Circuit Breaker Tx Packet Count");
5607
5608	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5609			CTLFLAG_RD, &stats->htdpmc,
5610			"Host Transmit Discarded Packets");
5611
5612	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5613			CTLFLAG_RD, &stats->rpthc,
5614			"Rx Packets To Host");
5615
5616	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5617			CTLFLAG_RD, &stats->cbrmpc,
5618			"Circuit Breaker Rx Packet Count");
5619
5620	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5621			CTLFLAG_RD, &stats->cbrdpc,
5622			"Circuit Breaker Rx Dropped Count");
5623
5624	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5625			CTLFLAG_RD, &stats->hgptc,
5626			"Host Good Packets Tx Count");
5627
5628	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5629			CTLFLAG_RD, &stats->htcbdpc,
5630			"Host Tx Circuit Breaker Dropped Count");
5631
5632	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5633			CTLFLAG_RD, &stats->hgorc,
5634			"Host Good Octets Received Count");
5635
5636	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5637			CTLFLAG_RD, &stats->hgotc,
5638			"Host Good Octets Transmit Count");
5639
5640	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5641			CTLFLAG_RD, &stats->lenerrs,
5642			"Length Errors");
5643
5644	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5645			CTLFLAG_RD, &stats->scvpc,
5646			"SerDes/SGMII Code Violation Pkt Count");
5647
5648	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5649			CTLFLAG_RD, &stats->hrmpc,
5650			"Header Redirection Missed Packet Count");
5651}
5652
5653
5654/**********************************************************************
5655 *
5656 *  This routine provides a way to dump out the adapter eeprom,
5657 *  often a useful debug/service tool. This only dumps the first
5658 *  32 words, stuff that matters is in that extent.
5659 *
5660 **********************************************************************/
5661static int
5662igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5663{
5664	struct adapter *adapter;
5665	int error;
5666	int result;
5667
5668	result = -1;
5669	error = sysctl_handle_int(oidp, &result, 0, req);
5670
5671	if (error || !req->newptr)
5672		return (error);
5673
5674	/*
5675	 * This value will cause a hex dump of the
5676	 * first 32 16-bit words of the EEPROM to
5677	 * the screen.
5678	 */
5679	if (result == 1) {
5680		adapter = (struct adapter *)arg1;
5681		igb_print_nvm_info(adapter);
5682        }
5683
5684	return (error);
5685}
5686
5687static void
5688igb_print_nvm_info(struct adapter *adapter)
5689{
5690	u16	eeprom_data;
5691	int	i, j, row = 0;
5692
5693	/* Its a bit crude, but it gets the job done */
5694	printf("\nInterface EEPROM Dump:\n");
5695	printf("Offset\n0x0000  ");
5696	for (i = 0, j = 0; i < 32; i++, j++) {
5697		if (j == 8) { /* Make the offset block */
5698			j = 0; ++row;
5699			printf("\n0x00%x0  ",row);
5700		}
5701		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5702		printf("%04x ", eeprom_data);
5703	}
5704	printf("\n");
5705}
5706
5707static void
5708igb_set_sysctl_value(struct adapter *adapter, const char *name,
5709	const char *description, int *limit, int value)
5710{
5711	*limit = value;
5712	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5713	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5714	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5715}
5716
5717/*
5718** Set flow control using sysctl:
5719** Flow control values:
5720** 	0 - off
5721**	1 - rx pause
5722**	2 - tx pause
5723**	3 - full
5724*/
5725static int
5726igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5727{
5728	int error;
5729	struct adapter *adapter = (struct adapter *) arg1;
5730
5731	error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5732
5733	if ((error) || (req->newptr == NULL))
5734		return (error);
5735
5736	switch (adapter->fc) {
5737		case e1000_fc_rx_pause:
5738		case e1000_fc_tx_pause:
5739		case e1000_fc_full:
5740			adapter->hw.fc.requested_mode = adapter->fc;
5741			break;
5742		case e1000_fc_none:
5743		default:
5744			adapter->hw.fc.requested_mode = e1000_fc_none;
5745	}
5746
5747	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5748	e1000_force_mac_fc(&adapter->hw);
5749	return (error);
5750}
5751
5752/*
5753** Manage DMA Coalesce:
5754** Control values:
5755** 	0/1 - off/on
5756**	Legal timer values are:
5757**	250,500,1000-10000 in thousands
5758*/
5759static int
5760igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5761{
5762	struct adapter *adapter = (struct adapter *) arg1;
5763	int		error;
5764
5765	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5766
5767	if ((error) || (req->newptr == NULL))
5768		return (error);
5769
5770	switch (adapter->dmac) {
5771		case 0:
5772			/*Disabling */
5773			break;
5774		case 1: /* Just enable and use default */
5775			adapter->dmac = 1000;
5776			break;
5777		case 250:
5778		case 500:
5779		case 1000:
5780		case 2000:
5781		case 3000:
5782		case 4000:
5783		case 5000:
5784		case 6000:
5785		case 7000:
5786		case 8000:
5787		case 9000:
5788		case 10000:
5789			/* Legal values - allow */
5790			break;
5791		default:
5792			/* Do nothing, illegal value */
5793			adapter->dmac = 0;
5794			return (error);
5795	}
5796	/* Reinit the interface */
5797	igb_init(adapter);
5798	return (error);
5799}
5800