if_igb.c revision 233383
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/9/sys/dev/e1000/if_igb.c 233383 2012-03-23 18:22:20Z jhb $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.2.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174#if __FreeBSD_version >= 800000
175static int	igb_mq_start(struct ifnet *, struct mbuf *);
176static int	igb_mq_start_locked(struct ifnet *,
177		    struct tx_ring *, struct mbuf *);
178static void	igb_qflush(struct ifnet *);
179static void	igb_deferred_mq_start(void *, int);
180#else
181static void	igb_start(struct ifnet *);
182static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
183#endif
184static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
185static void	igb_init(void *);
186static void	igb_init_locked(struct adapter *);
187static void	igb_stop(void *);
188static void	igb_media_status(struct ifnet *, struct ifmediareq *);
189static int	igb_media_change(struct ifnet *);
190static void	igb_identify_hardware(struct adapter *);
191static int	igb_allocate_pci_resources(struct adapter *);
192static int	igb_allocate_msix(struct adapter *);
193static int	igb_allocate_legacy(struct adapter *);
194static int	igb_setup_msix(struct adapter *);
195static void	igb_free_pci_resources(struct adapter *);
196static void	igb_local_timer(void *);
197static void	igb_reset(struct adapter *);
198static int	igb_setup_interface(device_t, struct adapter *);
199static int	igb_allocate_queues(struct adapter *);
200static void	igb_configure_queues(struct adapter *);
201
202static int	igb_allocate_transmit_buffers(struct tx_ring *);
203static void	igb_setup_transmit_structures(struct adapter *);
204static void	igb_setup_transmit_ring(struct tx_ring *);
205static void	igb_initialize_transmit_units(struct adapter *);
206static void	igb_free_transmit_structures(struct adapter *);
207static void	igb_free_transmit_buffers(struct tx_ring *);
208
209static int	igb_allocate_receive_buffers(struct rx_ring *);
210static int	igb_setup_receive_structures(struct adapter *);
211static int	igb_setup_receive_ring(struct rx_ring *);
212static void	igb_initialize_receive_units(struct adapter *);
213static void	igb_free_receive_structures(struct adapter *);
214static void	igb_free_receive_buffers(struct rx_ring *);
215static void	igb_free_receive_ring(struct rx_ring *);
216
217static void	igb_enable_intr(struct adapter *);
218static void	igb_disable_intr(struct adapter *);
219static void	igb_update_stats_counters(struct adapter *);
220static bool	igb_txeof(struct tx_ring *);
221
222static __inline	void igb_rx_discard(struct rx_ring *, int);
223static __inline void igb_rx_input(struct rx_ring *,
224		    struct ifnet *, struct mbuf *, u32);
225
226static bool	igb_rxeof(struct igb_queue *, int, int *);
227static void	igb_rx_checksum(u32, struct mbuf *, u32);
228static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
229static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
230static void	igb_set_promisc(struct adapter *);
231static void	igb_disable_promisc(struct adapter *);
232static void	igb_set_multi(struct adapter *);
233static void	igb_update_link_status(struct adapter *);
234static void	igb_refresh_mbufs(struct rx_ring *, int);
235
236static void	igb_register_vlan(void *, struct ifnet *, u16);
237static void	igb_unregister_vlan(void *, struct ifnet *, u16);
238static void	igb_setup_vlan_hw_support(struct adapter *);
239
240static int	igb_xmit(struct tx_ring *, struct mbuf **);
241static int	igb_dma_malloc(struct adapter *, bus_size_t,
242		    struct igb_dma_alloc *, int);
243static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
244static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
245static void	igb_print_nvm_info(struct adapter *);
246static int 	igb_is_valid_ether_addr(u8 *);
247static void     igb_add_hw_stats(struct adapter *);
248
249static void	igb_vf_init_stats(struct adapter *);
250static void	igb_update_vf_stats_counters(struct adapter *);
251
252/* Management and WOL Support */
253static void	igb_init_manageability(struct adapter *);
254static void	igb_release_manageability(struct adapter *);
255static void     igb_get_hw_control(struct adapter *);
256static void     igb_release_hw_control(struct adapter *);
257static void     igb_enable_wakeup(device_t);
258static void     igb_led_func(void *, int);
259
260static int	igb_irq_fast(void *);
261static void	igb_msix_que(void *);
262static void	igb_msix_link(void *);
263static void	igb_handle_que(void *context, int pending);
264static void	igb_handle_link(void *context, int pending);
265
266static void	igb_set_sysctl_value(struct adapter *, const char *,
267		    const char *, int *, int);
268static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
269static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
270
271#ifdef DEVICE_POLLING
272static poll_handler_t igb_poll;
273#endif /* POLLING */
274
275/*********************************************************************
276 *  FreeBSD Device Interface Entry Points
277 *********************************************************************/
278
279static device_method_t igb_methods[] = {
280	/* Device interface */
281	DEVMETHOD(device_probe, igb_probe),
282	DEVMETHOD(device_attach, igb_attach),
283	DEVMETHOD(device_detach, igb_detach),
284	DEVMETHOD(device_shutdown, igb_shutdown),
285	DEVMETHOD(device_suspend, igb_suspend),
286	DEVMETHOD(device_resume, igb_resume),
287	{0, 0}
288};
289
290static driver_t igb_driver = {
291	"igb", igb_methods, sizeof(struct adapter),
292};
293
294static devclass_t igb_devclass;
295DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
296MODULE_DEPEND(igb, pci, 1, 1, 1);
297MODULE_DEPEND(igb, ether, 1, 1, 1);
298
299/*********************************************************************
300 *  Tunable default values.
301 *********************************************************************/
302
303SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
304
305/* Descriptor defaults */
306static int igb_rxd = IGB_DEFAULT_RXD;
307static int igb_txd = IGB_DEFAULT_TXD;
308TUNABLE_INT("hw.igb.rxd", &igb_rxd);
309TUNABLE_INT("hw.igb.txd", &igb_txd);
310SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
311    "Number of receive descriptors per queue");
312SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
313    "Number of transmit descriptors per queue");
314
315/*
316** AIM: Adaptive Interrupt Moderation
317** which means that the interrupt rate
318** is varied over time based on the
319** traffic for that interrupt vector
320*/
321static int igb_enable_aim = TRUE;
322TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
323SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
324    "Enable adaptive interrupt moderation");
325
326/*
327 * MSIX should be the default for best performance,
328 * but this allows it to be forced off for testing.
329 */
330static int igb_enable_msix = 1;
331TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
332SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
333    "Enable MSI-X interrupts");
334
335/*
336** Tuneable Interrupt rate
337*/
338static int igb_max_interrupt_rate = 8000;
339TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
340SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
341    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
342
343/*
344** Header split causes the packet header to
345** be dma'd to a seperate mbuf from the payload.
346** this can have memory alignment benefits. But
347** another plus is that small packets often fit
348** into the header and thus use no cluster. Its
349** a very workload dependent type feature.
350*/
351static int igb_header_split = FALSE;
352TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
353SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
354    "Enable receive mbuf header split");
355
356/*
357** This will autoconfigure based on
358** the number of CPUs if left at 0.
359*/
360static int igb_num_queues = 0;
361TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
362SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
363    "Number of queues to configure, 0 indicates autoconfigure");
364
365/* How many packets rxeof tries to clean at a time */
366static int igb_rx_process_limit = 100;
367TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
368SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
369    &igb_rx_process_limit, 0,
370    "Maximum number of received packets to process at a time, -1 means unlimited");
371
372/*********************************************************************
373 *  Device identification routine
374 *
375 *  igb_probe determines if the driver should be loaded on
376 *  adapter based on PCI vendor/device id of the adapter.
377 *
378 *  return BUS_PROBE_DEFAULT on success, positive on failure
379 *********************************************************************/
380
381static int
382igb_probe(device_t dev)
383{
384	char		adapter_name[60];
385	uint16_t	pci_vendor_id = 0;
386	uint16_t	pci_device_id = 0;
387	uint16_t	pci_subvendor_id = 0;
388	uint16_t	pci_subdevice_id = 0;
389	igb_vendor_info_t *ent;
390
391	INIT_DEBUGOUT("igb_probe: begin");
392
393	pci_vendor_id = pci_get_vendor(dev);
394	if (pci_vendor_id != IGB_VENDOR_ID)
395		return (ENXIO);
396
397	pci_device_id = pci_get_device(dev);
398	pci_subvendor_id = pci_get_subvendor(dev);
399	pci_subdevice_id = pci_get_subdevice(dev);
400
401	ent = igb_vendor_info_array;
402	while (ent->vendor_id != 0) {
403		if ((pci_vendor_id == ent->vendor_id) &&
404		    (pci_device_id == ent->device_id) &&
405
406		    ((pci_subvendor_id == ent->subvendor_id) ||
407		    (ent->subvendor_id == PCI_ANY_ID)) &&
408
409		    ((pci_subdevice_id == ent->subdevice_id) ||
410		    (ent->subdevice_id == PCI_ANY_ID))) {
411			sprintf(adapter_name, "%s %s",
412				igb_strings[ent->index],
413				igb_driver_version);
414			device_set_desc_copy(dev, adapter_name);
415			return (BUS_PROBE_DEFAULT);
416		}
417		ent++;
418	}
419
420	return (ENXIO);
421}
422
423/*********************************************************************
424 *  Device initialization routine
425 *
426 *  The attach entry point is called when the driver is being loaded.
427 *  This routine identifies the type of hardware, allocates all resources
428 *  and initializes the hardware.
429 *
430 *  return 0 on success, positive on failure
431 *********************************************************************/
432
433static int
434igb_attach(device_t dev)
435{
436	struct adapter	*adapter;
437	int		error = 0;
438	u16		eeprom_data;
439
440	INIT_DEBUGOUT("igb_attach: begin");
441
442	if (resource_disabled("igb", device_get_unit(dev))) {
443		device_printf(dev, "Disabled by device hint\n");
444		return (ENXIO);
445	}
446
447	adapter = device_get_softc(dev);
448	adapter->dev = adapter->osdep.dev = dev;
449	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
450
451	/* SYSCTL stuff */
452	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
453	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
454	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
455	    igb_sysctl_nvm_info, "I", "NVM Information");
456
457	igb_set_sysctl_value(adapter, "enable_aim",
458	    "Interrupt Moderation", &adapter->enable_aim,
459	    igb_enable_aim);
460
461	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
462	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
463	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
464	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
465
466	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
467
468	/* Determine hardware and mac info */
469	igb_identify_hardware(adapter);
470
471	/* Setup PCI resources */
472	if (igb_allocate_pci_resources(adapter)) {
473		device_printf(dev, "Allocation of PCI resources failed\n");
474		error = ENXIO;
475		goto err_pci;
476	}
477
478	/* Do Shared Code initialization */
479	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
480		device_printf(dev, "Setup of Shared code failed\n");
481		error = ENXIO;
482		goto err_pci;
483	}
484
485	e1000_get_bus_info(&adapter->hw);
486
487	/* Sysctl for limiting the amount of work done in the taskqueue */
488	igb_set_sysctl_value(adapter, "rx_processing_limit",
489	    "max number of rx packets to process",
490	    &adapter->rx_process_limit, igb_rx_process_limit);
491
492	/*
493	 * Validate number of transmit and receive descriptors. It
494	 * must not exceed hardware maximum, and must be multiple
495	 * of E1000_DBA_ALIGN.
496	 */
497	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
498	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
499		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
500		    IGB_DEFAULT_TXD, igb_txd);
501		adapter->num_tx_desc = IGB_DEFAULT_TXD;
502	} else
503		adapter->num_tx_desc = igb_txd;
504	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
505	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
506		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
507		    IGB_DEFAULT_RXD, igb_rxd);
508		adapter->num_rx_desc = IGB_DEFAULT_RXD;
509	} else
510		adapter->num_rx_desc = igb_rxd;
511
512	adapter->hw.mac.autoneg = DO_AUTO_NEG;
513	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
514	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
515
516	/* Copper options */
517	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
518		adapter->hw.phy.mdix = AUTO_ALL_MODES;
519		adapter->hw.phy.disable_polarity_correction = FALSE;
520		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
521	}
522
523	/*
524	 * Set the frame limits assuming
525	 * standard ethernet sized frames.
526	 */
527	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
528	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
529
530	/*
531	** Allocate and Setup Queues
532	*/
533	if (igb_allocate_queues(adapter)) {
534		error = ENOMEM;
535		goto err_pci;
536	}
537
538	/* Allocate the appropriate stats memory */
539	if (adapter->vf_ifp) {
540		adapter->stats =
541		    (struct e1000_vf_stats *)malloc(sizeof \
542		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
543		igb_vf_init_stats(adapter);
544	} else
545		adapter->stats =
546		    (struct e1000_hw_stats *)malloc(sizeof \
547		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
548	if (adapter->stats == NULL) {
549		device_printf(dev, "Can not allocate stats memory\n");
550		error = ENOMEM;
551		goto err_late;
552	}
553
554	/* Allocate multicast array memory. */
555	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
556	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
557	if (adapter->mta == NULL) {
558		device_printf(dev, "Can not allocate multicast setup array\n");
559		error = ENOMEM;
560		goto err_late;
561	}
562
563	/* Some adapter-specific advanced features */
564	if (adapter->hw.mac.type >= e1000_i350) {
565		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
568		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
569		igb_set_sysctl_value(adapter, "eee_disabled",
570		    "enable Energy Efficient Ethernet",
571		    &adapter->hw.dev_spec._82575.eee_disable,
572		    TRUE);
573		e1000_set_eee_i350(&adapter->hw);
574	}
575
576	/*
577	** Start from a known state, this is
578	** important in reading the nvm and
579	** mac from that.
580	*/
581	e1000_reset_hw(&adapter->hw);
582
583	/* Make sure we have a good EEPROM before we read from it */
584	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
585		/*
586		** Some PCI-E parts fail the first check due to
587		** the link being in sleep state, call it again,
588		** if it fails a second time its a real issue.
589		*/
590		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
591			device_printf(dev,
592			    "The EEPROM Checksum Is Not Valid\n");
593			error = EIO;
594			goto err_late;
595		}
596	}
597
598	/*
599	** Copy the permanent MAC address out of the EEPROM
600	*/
601	if (e1000_read_mac_addr(&adapter->hw) < 0) {
602		device_printf(dev, "EEPROM read error while reading MAC"
603		    " address\n");
604		error = EIO;
605		goto err_late;
606	}
607	/* Check its sanity */
608	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
609		device_printf(dev, "Invalid MAC address\n");
610		error = EIO;
611		goto err_late;
612	}
613
614	/*
615	** Configure Interrupts
616	*/
617	if ((adapter->msix > 1) && (igb_enable_msix))
618		error = igb_allocate_msix(adapter);
619	else /* MSI or Legacy */
620		error = igb_allocate_legacy(adapter);
621	if (error)
622		goto err_late;
623
624	/* Setup OS specific network interface */
625	if (igb_setup_interface(dev, adapter) != 0)
626		goto err_late;
627
628	/* Now get a good starting state */
629	igb_reset(adapter);
630
631	/* Initialize statistics */
632	igb_update_stats_counters(adapter);
633
634	adapter->hw.mac.get_link_status = 1;
635	igb_update_link_status(adapter);
636
637	/* Indicate SOL/IDER usage */
638	if (e1000_check_reset_block(&adapter->hw))
639		device_printf(dev,
640		    "PHY reset is blocked due to SOL/IDER session.\n");
641
642	/* Determine if we have to control management hardware */
643	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
644
645	/*
646	 * Setup Wake-on-Lan
647	 */
648	/* APME bit in EEPROM is mapped to WUC.APME */
649	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
650	if (eeprom_data)
651		adapter->wol = E1000_WUFC_MAG;
652
653	/* Register for VLAN events */
654	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
655	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
656	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
657	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
658
659	igb_add_hw_stats(adapter);
660
661	/* Tell the stack that the interface is not active */
662	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
663
664	adapter->led_dev = led_create(igb_led_func, adapter,
665	    device_get_nameunit(dev));
666
667	INIT_DEBUGOUT("igb_attach: end");
668
669	return (0);
670
671err_late:
672	igb_detach(dev);
673	igb_free_transmit_structures(adapter);
674	igb_free_receive_structures(adapter);
675	igb_release_hw_control(adapter);
676	if (adapter->ifp != NULL)
677		if_free(adapter->ifp);
678err_pci:
679	igb_free_pci_resources(adapter);
680	free(adapter->mta, M_DEVBUF);
681	IGB_CORE_LOCK_DESTROY(adapter);
682
683	return (error);
684}
685
686/*********************************************************************
687 *  Device removal routine
688 *
689 *  The detach entry point is called when the driver is being removed.
690 *  This routine stops the adapter and deallocates all the resources
691 *  that were allocated for driver operation.
692 *
693 *  return 0 on success, positive on failure
694 *********************************************************************/
695
696static int
697igb_detach(device_t dev)
698{
699	struct adapter	*adapter = device_get_softc(dev);
700	struct ifnet	*ifp = adapter->ifp;
701
702	INIT_DEBUGOUT("igb_detach: begin");
703
704	/* Make sure VLANS are not using driver */
705	if (adapter->ifp->if_vlantrunk != NULL) {
706		device_printf(dev,"Vlan in use, detach first\n");
707		return (EBUSY);
708	}
709
710	ether_ifdetach(adapter->ifp);
711
712	if (adapter->led_dev != NULL)
713		led_destroy(adapter->led_dev);
714
715#ifdef DEVICE_POLLING
716	if (ifp->if_capenable & IFCAP_POLLING)
717		ether_poll_deregister(ifp);
718#endif
719
720	IGB_CORE_LOCK(adapter);
721	adapter->in_detach = 1;
722	igb_stop(adapter);
723	IGB_CORE_UNLOCK(adapter);
724
725	e1000_phy_hw_reset(&adapter->hw);
726
727	/* Give control back to firmware */
728	igb_release_manageability(adapter);
729	igb_release_hw_control(adapter);
730
731	if (adapter->wol) {
732		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
733		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
734		igb_enable_wakeup(dev);
735	}
736
737	/* Unregister VLAN events */
738	if (adapter->vlan_attach != NULL)
739		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
740	if (adapter->vlan_detach != NULL)
741		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
742
743	callout_drain(&adapter->timer);
744
745	igb_free_pci_resources(adapter);
746	bus_generic_detach(dev);
747	if_free(ifp);
748
749	igb_free_transmit_structures(adapter);
750	igb_free_receive_structures(adapter);
751	if (adapter->mta != NULL)
752		free(adapter->mta, M_DEVBUF);
753
754	IGB_CORE_LOCK_DESTROY(adapter);
755
756	return (0);
757}
758
759/*********************************************************************
760 *
761 *  Shutdown entry point
762 *
763 **********************************************************************/
764
765static int
766igb_shutdown(device_t dev)
767{
768	return igb_suspend(dev);
769}
770
771/*
772 * Suspend/resume device methods.
773 */
774static int
775igb_suspend(device_t dev)
776{
777	struct adapter *adapter = device_get_softc(dev);
778
779	IGB_CORE_LOCK(adapter);
780
781	igb_stop(adapter);
782
783        igb_release_manageability(adapter);
784	igb_release_hw_control(adapter);
785
786        if (adapter->wol) {
787                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
788                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
789                igb_enable_wakeup(dev);
790        }
791
792	IGB_CORE_UNLOCK(adapter);
793
794	return bus_generic_suspend(dev);
795}
796
797static int
798igb_resume(device_t dev)
799{
800	struct adapter *adapter = device_get_softc(dev);
801	struct ifnet *ifp = adapter->ifp;
802#if __FreeBSD_version >= 800000
803	struct tx_ring *txr = adapter->tx_rings;
804#endif
805
806	IGB_CORE_LOCK(adapter);
807	igb_init_locked(adapter);
808	igb_init_manageability(adapter);
809
810	if ((ifp->if_flags & IFF_UP) &&
811	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
812#if __FreeBSD_version < 800000
813		igb_start(ifp);
814#else
815		for (int i = 0; i < adapter->num_queues; i++, txr++) {
816			IGB_TX_LOCK(txr);
817			if (!drbr_empty(ifp, txr->br))
818				igb_mq_start_locked(ifp, txr, NULL);
819			IGB_TX_UNLOCK(txr);
820		}
821#endif
822	}
823
824	IGB_CORE_UNLOCK(adapter);
825
826	return bus_generic_resume(dev);
827}
828
829
830#if __FreeBSD_version < 800000
831/*********************************************************************
832 *  Transmit entry point
833 *
834 *  igb_start is called by the stack to initiate a transmit.
835 *  The driver will remain in this routine as long as there are
836 *  packets to transmit and transmit resources are available.
837 *  In case resources are not available stack is notified and
838 *  the packet is requeued.
839 **********************************************************************/
840
841static void
842igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
843{
844	struct adapter	*adapter = ifp->if_softc;
845	struct mbuf	*m_head;
846
847	IGB_TX_LOCK_ASSERT(txr);
848
849	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
850	    IFF_DRV_RUNNING)
851		return;
852	if (!adapter->link_active)
853		return;
854
855	/* Call cleanup if number of TX descriptors low */
856	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
857		igb_txeof(txr);
858
859	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
860		if (txr->tx_avail <= IGB_MAX_SCATTER) {
861			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862			break;
863		}
864		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
865		if (m_head == NULL)
866			break;
867		/*
868		 *  Encapsulation can modify our pointer, and or make it
869		 *  NULL on failure.  In that event, we can't requeue.
870		 */
871		if (igb_xmit(txr, &m_head)) {
872			if (m_head == NULL)
873				break;
874			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
875			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
876			break;
877		}
878
879		/* Send a copy of the frame to the BPF listener */
880		ETHER_BPF_MTAP(ifp, m_head);
881
882		/* Set watchdog on */
883		txr->watchdog_time = ticks;
884		txr->queue_status = IGB_QUEUE_WORKING;
885	}
886}
887
888/*
889 * Legacy TX driver routine, called from the
890 * stack, always uses tx[0], and spins for it.
891 * Should not be used with multiqueue tx
892 */
893static void
894igb_start(struct ifnet *ifp)
895{
896	struct adapter	*adapter = ifp->if_softc;
897	struct tx_ring	*txr = adapter->tx_rings;
898
899	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
900		IGB_TX_LOCK(txr);
901		igb_start_locked(txr, ifp);
902		IGB_TX_UNLOCK(txr);
903	}
904	return;
905}
906
907#else /* __FreeBSD_version >= 800000 */
908/*
909** Multiqueue Transmit driver
910**
911*/
912static int
913igb_mq_start(struct ifnet *ifp, struct mbuf *m)
914{
915	struct adapter		*adapter = ifp->if_softc;
916	struct igb_queue	*que;
917	struct tx_ring		*txr;
918	int 			i = 0, err = 0;
919
920	/* Which queue to use */
921	if ((m->m_flags & M_FLOWID) != 0)
922		i = m->m_pkthdr.flowid % adapter->num_queues;
923
924	txr = &adapter->tx_rings[i];
925	que = &adapter->queues[i];
926
927	if (IGB_TX_TRYLOCK(txr)) {
928		err = igb_mq_start_locked(ifp, txr, m);
929		IGB_TX_UNLOCK(txr);
930	} else {
931		err = drbr_enqueue(ifp, txr->br, m);
932		taskqueue_enqueue(que->tq, &txr->txq_task);
933	}
934
935	return (err);
936}
937
938static int
939igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
940{
941	struct adapter  *adapter = txr->adapter;
942        struct mbuf     *next;
943        int             err = 0, enq;
944
945	IGB_TX_LOCK_ASSERT(txr);
946
947	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
948	    IFF_DRV_RUNNING || adapter->link_active == 0) {
949		if (m != NULL)
950			err = drbr_enqueue(ifp, txr->br, m);
951		return (err);
952	}
953
954	enq = 0;
955	if (m == NULL) {
956		next = drbr_dequeue(ifp, txr->br);
957	} else if (drbr_needs_enqueue(ifp, txr->br)) {
958		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
959			return (err);
960		next = drbr_dequeue(ifp, txr->br);
961	} else
962		next = m;
963
964	/* Process the queue */
965	while (next != NULL) {
966		if ((err = igb_xmit(txr, &next)) != 0) {
967			if (next != NULL)
968				err = drbr_enqueue(ifp, txr->br, next);
969			break;
970		}
971		enq++;
972		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
973		ETHER_BPF_MTAP(ifp, next);
974		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
975			break;
976		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
977			igb_txeof(txr);
978		if (txr->tx_avail <= IGB_MAX_SCATTER) {
979			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
980			break;
981		}
982		next = drbr_dequeue(ifp, txr->br);
983	}
984	if (enq > 0) {
985		/* Set the watchdog */
986		txr->queue_status = IGB_QUEUE_WORKING;
987		txr->watchdog_time = ticks;
988	}
989	return (err);
990}
991
992/*
993 * Called from a taskqueue to drain queued transmit packets.
994 */
995static void
996igb_deferred_mq_start(void *arg, int pending)
997{
998	struct tx_ring *txr = arg;
999	struct adapter *adapter = txr->adapter;
1000	struct ifnet *ifp = adapter->ifp;
1001
1002	IGB_TX_LOCK(txr);
1003	if (!drbr_empty(ifp, txr->br))
1004		igb_mq_start_locked(ifp, txr, NULL);
1005	IGB_TX_UNLOCK(txr);
1006}
1007
1008/*
1009** Flush all ring buffers
1010*/
1011static void
1012igb_qflush(struct ifnet *ifp)
1013{
1014	struct adapter	*adapter = ifp->if_softc;
1015	struct tx_ring	*txr = adapter->tx_rings;
1016	struct mbuf	*m;
1017
1018	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1019		IGB_TX_LOCK(txr);
1020		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1021			m_freem(m);
1022		IGB_TX_UNLOCK(txr);
1023	}
1024	if_qflush(ifp);
1025}
1026#endif /* __FreeBSD_version < 800000 */
1027
1028/*********************************************************************
1029 *  Ioctl entry point
1030 *
1031 *  igb_ioctl is called when the user wants to configure the
1032 *  interface.
1033 *
1034 *  return 0 on success, positive on failure
1035 **********************************************************************/
1036
1037static int
1038igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1039{
1040	struct adapter	*adapter = ifp->if_softc;
1041	struct ifreq	*ifr = (struct ifreq *)data;
1042#if defined(INET) || defined(INET6)
1043	struct ifaddr	*ifa = (struct ifaddr *)data;
1044#endif
1045	bool		avoid_reset = FALSE;
1046	int		error = 0;
1047
1048	if (adapter->in_detach)
1049		return (error);
1050
1051	switch (command) {
1052	case SIOCSIFADDR:
1053#ifdef INET
1054		if (ifa->ifa_addr->sa_family == AF_INET)
1055			avoid_reset = TRUE;
1056#endif
1057#ifdef INET6
1058		if (ifa->ifa_addr->sa_family == AF_INET6)
1059			avoid_reset = TRUE;
1060#endif
1061		/*
1062		** Calling init results in link renegotiation,
1063		** so we avoid doing it when possible.
1064		*/
1065		if (avoid_reset) {
1066			ifp->if_flags |= IFF_UP;
1067			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1068				igb_init(adapter);
1069#ifdef INET
1070			if (!(ifp->if_flags & IFF_NOARP))
1071				arp_ifinit(ifp, ifa);
1072#endif
1073		} else
1074			error = ether_ioctl(ifp, command, data);
1075		break;
1076	case SIOCSIFMTU:
1077	    {
1078		int max_frame_size;
1079
1080		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1081
1082		IGB_CORE_LOCK(adapter);
1083		max_frame_size = 9234;
1084		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1085		    ETHER_CRC_LEN) {
1086			IGB_CORE_UNLOCK(adapter);
1087			error = EINVAL;
1088			break;
1089		}
1090
1091		ifp->if_mtu = ifr->ifr_mtu;
1092		adapter->max_frame_size =
1093		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1094		igb_init_locked(adapter);
1095		IGB_CORE_UNLOCK(adapter);
1096		break;
1097	    }
1098	case SIOCSIFFLAGS:
1099		IOCTL_DEBUGOUT("ioctl rcv'd:\
1100		    SIOCSIFFLAGS (Set Interface Flags)");
1101		IGB_CORE_LOCK(adapter);
1102		if (ifp->if_flags & IFF_UP) {
1103			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1104				if ((ifp->if_flags ^ adapter->if_flags) &
1105				    (IFF_PROMISC | IFF_ALLMULTI)) {
1106					igb_disable_promisc(adapter);
1107					igb_set_promisc(adapter);
1108				}
1109			} else
1110				igb_init_locked(adapter);
1111		} else
1112			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1113				igb_stop(adapter);
1114		adapter->if_flags = ifp->if_flags;
1115		IGB_CORE_UNLOCK(adapter);
1116		break;
1117	case SIOCADDMULTI:
1118	case SIOCDELMULTI:
1119		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1120		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1121			IGB_CORE_LOCK(adapter);
1122			igb_disable_intr(adapter);
1123			igb_set_multi(adapter);
1124#ifdef DEVICE_POLLING
1125			if (!(ifp->if_capenable & IFCAP_POLLING))
1126#endif
1127				igb_enable_intr(adapter);
1128			IGB_CORE_UNLOCK(adapter);
1129		}
1130		break;
1131	case SIOCSIFMEDIA:
1132		/*
1133		** As the speed/duplex settings are being
1134		** changed, we need toreset the PHY.
1135		*/
1136		adapter->hw.phy.reset_disable = FALSE;
1137		/* Check SOL/IDER usage */
1138		IGB_CORE_LOCK(adapter);
1139		if (e1000_check_reset_block(&adapter->hw)) {
1140			IGB_CORE_UNLOCK(adapter);
1141			device_printf(adapter->dev, "Media change is"
1142			    " blocked due to SOL/IDER session.\n");
1143			break;
1144		}
1145		IGB_CORE_UNLOCK(adapter);
1146	case SIOCGIFMEDIA:
1147		IOCTL_DEBUGOUT("ioctl rcv'd: \
1148		    SIOCxIFMEDIA (Get/Set Interface Media)");
1149		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1150		break;
1151	case SIOCSIFCAP:
1152	    {
1153		int mask, reinit;
1154
1155		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1156		reinit = 0;
1157		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1158#ifdef DEVICE_POLLING
1159		if (mask & IFCAP_POLLING) {
1160			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1161				error = ether_poll_register(igb_poll, ifp);
1162				if (error)
1163					return (error);
1164				IGB_CORE_LOCK(adapter);
1165				igb_disable_intr(adapter);
1166				ifp->if_capenable |= IFCAP_POLLING;
1167				IGB_CORE_UNLOCK(adapter);
1168			} else {
1169				error = ether_poll_deregister(ifp);
1170				/* Enable interrupt even in error case */
1171				IGB_CORE_LOCK(adapter);
1172				igb_enable_intr(adapter);
1173				ifp->if_capenable &= ~IFCAP_POLLING;
1174				IGB_CORE_UNLOCK(adapter);
1175			}
1176		}
1177#endif
1178		if (mask & IFCAP_HWCSUM) {
1179			ifp->if_capenable ^= IFCAP_HWCSUM;
1180			reinit = 1;
1181		}
1182		if (mask & IFCAP_TSO4) {
1183			ifp->if_capenable ^= IFCAP_TSO4;
1184			reinit = 1;
1185		}
1186		if (mask & IFCAP_VLAN_HWTAGGING) {
1187			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1188			reinit = 1;
1189		}
1190		if (mask & IFCAP_VLAN_HWFILTER) {
1191			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1192			reinit = 1;
1193		}
1194		if (mask & IFCAP_VLAN_HWTSO) {
1195			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1196			reinit = 1;
1197		}
1198		if (mask & IFCAP_LRO) {
1199			ifp->if_capenable ^= IFCAP_LRO;
1200			reinit = 1;
1201		}
1202		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1203			igb_init(adapter);
1204		VLAN_CAPABILITIES(ifp);
1205		break;
1206	    }
1207
1208	default:
1209		error = ether_ioctl(ifp, command, data);
1210		break;
1211	}
1212
1213	return (error);
1214}
1215
1216
1217/*********************************************************************
1218 *  Init entry point
1219 *
1220 *  This routine is used in two ways. It is used by the stack as
1221 *  init entry point in network interface structure. It is also used
1222 *  by the driver as a hw/sw initialization routine to get to a
1223 *  consistent state.
1224 *
1225 *  return 0 on success, positive on failure
1226 **********************************************************************/
1227
1228static void
1229igb_init_locked(struct adapter *adapter)
1230{
1231	struct ifnet	*ifp = adapter->ifp;
1232	device_t	dev = adapter->dev;
1233
1234	INIT_DEBUGOUT("igb_init: begin");
1235
1236	IGB_CORE_LOCK_ASSERT(adapter);
1237
1238	igb_disable_intr(adapter);
1239	callout_stop(&adapter->timer);
1240
1241	/* Get the latest mac address, User can use a LAA */
1242        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1243              ETHER_ADDR_LEN);
1244
1245	/* Put the address into the Receive Address Array */
1246	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1247
1248	igb_reset(adapter);
1249	igb_update_link_status(adapter);
1250
1251	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1252
1253	/* Set hardware offload abilities */
1254	ifp->if_hwassist = 0;
1255	if (ifp->if_capenable & IFCAP_TXCSUM) {
1256		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1257#if __FreeBSD_version >= 800000
1258		if (adapter->hw.mac.type == e1000_82576)
1259			ifp->if_hwassist |= CSUM_SCTP;
1260#endif
1261	}
1262
1263	if (ifp->if_capenable & IFCAP_TSO4)
1264		ifp->if_hwassist |= CSUM_TSO;
1265
1266	/* Configure for OS presence */
1267	igb_init_manageability(adapter);
1268
1269	/* Prepare transmit descriptors and buffers */
1270	igb_setup_transmit_structures(adapter);
1271	igb_initialize_transmit_units(adapter);
1272
1273	/* Setup Multicast table */
1274	igb_set_multi(adapter);
1275
1276	/*
1277	** Figure out the desired mbuf pool
1278	** for doing jumbo/packetsplit
1279	*/
1280	if (adapter->max_frame_size <= 2048)
1281		adapter->rx_mbuf_sz = MCLBYTES;
1282	else if (adapter->max_frame_size <= 4096)
1283		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1284	else
1285		adapter->rx_mbuf_sz = MJUM9BYTES;
1286
1287	/* Prepare receive descriptors and buffers */
1288	if (igb_setup_receive_structures(adapter)) {
1289		device_printf(dev, "Could not setup receive structures\n");
1290		return;
1291	}
1292	igb_initialize_receive_units(adapter);
1293
1294        /* Enable VLAN support */
1295	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1296		igb_setup_vlan_hw_support(adapter);
1297
1298	/* Don't lose promiscuous settings */
1299	igb_set_promisc(adapter);
1300
1301	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1302	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1303
1304	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1305	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1306
1307	if (adapter->msix > 1) /* Set up queue routing */
1308		igb_configure_queues(adapter);
1309
1310	/* this clears any pending interrupts */
1311	E1000_READ_REG(&adapter->hw, E1000_ICR);
1312#ifdef DEVICE_POLLING
1313	/*
1314	 * Only enable interrupts if we are not polling, make sure
1315	 * they are off otherwise.
1316	 */
1317	if (ifp->if_capenable & IFCAP_POLLING)
1318		igb_disable_intr(adapter);
1319	else
1320#endif /* DEVICE_POLLING */
1321	{
1322		igb_enable_intr(adapter);
1323		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1324	}
1325
1326	/* Set Energy Efficient Ethernet */
1327	e1000_set_eee_i350(&adapter->hw);
1328
1329	/* Don't reset the phy next time init gets called */
1330	adapter->hw.phy.reset_disable = TRUE;
1331}
1332
1333static void
1334igb_init(void *arg)
1335{
1336	struct adapter *adapter = arg;
1337
1338	IGB_CORE_LOCK(adapter);
1339	igb_init_locked(adapter);
1340	IGB_CORE_UNLOCK(adapter);
1341}
1342
1343
1344static void
1345igb_handle_que(void *context, int pending)
1346{
1347	struct igb_queue *que = context;
1348	struct adapter *adapter = que->adapter;
1349	struct tx_ring *txr = que->txr;
1350	struct ifnet	*ifp = adapter->ifp;
1351
1352	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1353		bool	more;
1354
1355		more = igb_rxeof(que, -1, NULL);
1356
1357		IGB_TX_LOCK(txr);
1358		if (igb_txeof(txr))
1359			more = TRUE;
1360#if __FreeBSD_version >= 800000
1361		if (!drbr_empty(ifp, txr->br))
1362			igb_mq_start_locked(ifp, txr, NULL);
1363#else
1364		igb_start_locked(txr, ifp);
1365#endif
1366		IGB_TX_UNLOCK(txr);
1367		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1368			taskqueue_enqueue(que->tq, &que->que_task);
1369			return;
1370		}
1371	}
1372
1373#ifdef DEVICE_POLLING
1374	if (ifp->if_capenable & IFCAP_POLLING)
1375		return;
1376#endif
1377	/* Reenable this interrupt */
1378	if (que->eims)
1379		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1380	else
1381		igb_enable_intr(adapter);
1382}
1383
1384/* Deal with link in a sleepable context */
1385static void
1386igb_handle_link(void *context, int pending)
1387{
1388	struct adapter *adapter = context;
1389
1390	adapter->hw.mac.get_link_status = 1;
1391	igb_update_link_status(adapter);
1392}
1393
1394/*********************************************************************
1395 *
1396 *  MSI/Legacy Deferred
1397 *  Interrupt Service routine
1398 *
1399 *********************************************************************/
1400static int
1401igb_irq_fast(void *arg)
1402{
1403	struct adapter		*adapter = arg;
1404	struct igb_queue	*que = adapter->queues;
1405	u32			reg_icr;
1406
1407
1408	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1409
1410	/* Hot eject?  */
1411	if (reg_icr == 0xffffffff)
1412		return FILTER_STRAY;
1413
1414	/* Definitely not our interrupt.  */
1415	if (reg_icr == 0x0)
1416		return FILTER_STRAY;
1417
1418	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1419		return FILTER_STRAY;
1420
1421	/*
1422	 * Mask interrupts until the taskqueue is finished running.  This is
1423	 * cheap, just assume that it is needed.  This also works around the
1424	 * MSI message reordering errata on certain systems.
1425	 */
1426	igb_disable_intr(adapter);
1427	taskqueue_enqueue(que->tq, &que->que_task);
1428
1429	/* Link status change */
1430	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1431		taskqueue_enqueue(que->tq, &adapter->link_task);
1432
1433	if (reg_icr & E1000_ICR_RXO)
1434		adapter->rx_overruns++;
1435	return FILTER_HANDLED;
1436}
1437
1438#ifdef DEVICE_POLLING
1439/*********************************************************************
1440 *
1441 *  Legacy polling routine : if using this code you MUST be sure that
1442 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1443 *
1444 *********************************************************************/
1445#if __FreeBSD_version >= 800000
1446#define POLL_RETURN_COUNT(a) (a)
1447static int
1448#else
1449#define POLL_RETURN_COUNT(a)
1450static void
1451#endif
1452igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1453{
1454	struct adapter		*adapter = ifp->if_softc;
1455	struct igb_queue	*que = adapter->queues;
1456	struct tx_ring		*txr = adapter->tx_rings;
1457	u32			reg_icr, rx_done = 0;
1458	u32			loop = IGB_MAX_LOOP;
1459	bool			more;
1460
1461	IGB_CORE_LOCK(adapter);
1462	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1463		IGB_CORE_UNLOCK(adapter);
1464		return POLL_RETURN_COUNT(rx_done);
1465	}
1466
1467	if (cmd == POLL_AND_CHECK_STATUS) {
1468		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1469		/* Link status change */
1470		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1471			igb_handle_link(adapter, 0);
1472
1473		if (reg_icr & E1000_ICR_RXO)
1474			adapter->rx_overruns++;
1475	}
1476	IGB_CORE_UNLOCK(adapter);
1477
1478	igb_rxeof(que, count, &rx_done);
1479
1480	IGB_TX_LOCK(txr);
1481	do {
1482		more = igb_txeof(txr);
1483	} while (loop-- && more);
1484#if __FreeBSD_version >= 800000
1485	if (!drbr_empty(ifp, txr->br))
1486		igb_mq_start_locked(ifp, txr, NULL);
1487#else
1488	igb_start_locked(txr, ifp);
1489#endif
1490	IGB_TX_UNLOCK(txr);
1491	return POLL_RETURN_COUNT(rx_done);
1492}
1493#endif /* DEVICE_POLLING */
1494
1495/*********************************************************************
1496 *
1497 *  MSIX TX Interrupt Service routine
1498 *
1499 **********************************************************************/
1500static void
1501igb_msix_que(void *arg)
1502{
1503	struct igb_queue *que = arg;
1504	struct adapter *adapter = que->adapter;
1505	struct tx_ring *txr = que->txr;
1506	struct rx_ring *rxr = que->rxr;
1507	u32		newitr = 0;
1508	bool		more_tx, more_rx;
1509
1510	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1511	++que->irqs;
1512
1513	IGB_TX_LOCK(txr);
1514	more_tx = igb_txeof(txr);
1515	IGB_TX_UNLOCK(txr);
1516
1517	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1518
1519	if (adapter->enable_aim == FALSE)
1520		goto no_calc;
1521	/*
1522	** Do Adaptive Interrupt Moderation:
1523        **  - Write out last calculated setting
1524	**  - Calculate based on average size over
1525	**    the last interval.
1526	*/
1527        if (que->eitr_setting)
1528                E1000_WRITE_REG(&adapter->hw,
1529                    E1000_EITR(que->msix), que->eitr_setting);
1530
1531        que->eitr_setting = 0;
1532
1533        /* Idle, do nothing */
1534        if ((txr->bytes == 0) && (rxr->bytes == 0))
1535                goto no_calc;
1536
1537        /* Used half Default if sub-gig */
1538        if (adapter->link_speed != 1000)
1539                newitr = IGB_DEFAULT_ITR / 2;
1540        else {
1541		if ((txr->bytes) && (txr->packets))
1542                	newitr = txr->bytes/txr->packets;
1543		if ((rxr->bytes) && (rxr->packets))
1544			newitr = max(newitr,
1545			    (rxr->bytes / rxr->packets));
1546                newitr += 24; /* account for hardware frame, crc */
1547		/* set an upper boundary */
1548		newitr = min(newitr, 3000);
1549		/* Be nice to the mid range */
1550                if ((newitr > 300) && (newitr < 1200))
1551                        newitr = (newitr / 3);
1552                else
1553                        newitr = (newitr / 2);
1554        }
1555        newitr &= 0x7FFC;  /* Mask invalid bits */
1556        if (adapter->hw.mac.type == e1000_82575)
1557                newitr |= newitr << 16;
1558        else
1559                newitr |= E1000_EITR_CNT_IGNR;
1560
1561        /* save for next interrupt */
1562        que->eitr_setting = newitr;
1563
1564        /* Reset state */
1565        txr->bytes = 0;
1566        txr->packets = 0;
1567        rxr->bytes = 0;
1568        rxr->packets = 0;
1569
1570no_calc:
1571	/* Schedule a clean task if needed*/
1572	if (more_tx || more_rx ||
1573	    (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1574		taskqueue_enqueue(que->tq, &que->que_task);
1575	else
1576		/* Reenable this interrupt */
1577		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1578	return;
1579}
1580
1581
1582/*********************************************************************
1583 *
1584 *  MSIX Link Interrupt Service routine
1585 *
1586 **********************************************************************/
1587
1588static void
1589igb_msix_link(void *arg)
1590{
1591	struct adapter	*adapter = arg;
1592	u32       	icr;
1593
1594	++adapter->link_irq;
1595	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1596	if (!(icr & E1000_ICR_LSC))
1597		goto spurious;
1598	igb_handle_link(adapter, 0);
1599
1600spurious:
1601	/* Rearm */
1602	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1603	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1604	return;
1605}
1606
1607
1608/*********************************************************************
1609 *
1610 *  Media Ioctl callback
1611 *
1612 *  This routine is called whenever the user queries the status of
1613 *  the interface using ifconfig.
1614 *
1615 **********************************************************************/
1616static void
1617igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1618{
1619	struct adapter *adapter = ifp->if_softc;
1620	u_char fiber_type = IFM_1000_SX;
1621
1622	INIT_DEBUGOUT("igb_media_status: begin");
1623
1624	IGB_CORE_LOCK(adapter);
1625	igb_update_link_status(adapter);
1626
1627	ifmr->ifm_status = IFM_AVALID;
1628	ifmr->ifm_active = IFM_ETHER;
1629
1630	if (!adapter->link_active) {
1631		IGB_CORE_UNLOCK(adapter);
1632		return;
1633	}
1634
1635	ifmr->ifm_status |= IFM_ACTIVE;
1636
1637	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1638	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1639		ifmr->ifm_active |= fiber_type | IFM_FDX;
1640	else {
1641		switch (adapter->link_speed) {
1642		case 10:
1643			ifmr->ifm_active |= IFM_10_T;
1644			break;
1645		case 100:
1646			ifmr->ifm_active |= IFM_100_TX;
1647			break;
1648		case 1000:
1649			ifmr->ifm_active |= IFM_1000_T;
1650			break;
1651		}
1652		if (adapter->link_duplex == FULL_DUPLEX)
1653			ifmr->ifm_active |= IFM_FDX;
1654		else
1655			ifmr->ifm_active |= IFM_HDX;
1656	}
1657	IGB_CORE_UNLOCK(adapter);
1658}
1659
1660/*********************************************************************
1661 *
1662 *  Media Ioctl callback
1663 *
1664 *  This routine is called when the user changes speed/duplex using
1665 *  media/mediopt option with ifconfig.
1666 *
1667 **********************************************************************/
1668static int
1669igb_media_change(struct ifnet *ifp)
1670{
1671	struct adapter *adapter = ifp->if_softc;
1672	struct ifmedia  *ifm = &adapter->media;
1673
1674	INIT_DEBUGOUT("igb_media_change: begin");
1675
1676	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1677		return (EINVAL);
1678
1679	IGB_CORE_LOCK(adapter);
1680	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1681	case IFM_AUTO:
1682		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1683		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1684		break;
1685	case IFM_1000_LX:
1686	case IFM_1000_SX:
1687	case IFM_1000_T:
1688		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1689		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1690		break;
1691	case IFM_100_TX:
1692		adapter->hw.mac.autoneg = FALSE;
1693		adapter->hw.phy.autoneg_advertised = 0;
1694		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1695			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1696		else
1697			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1698		break;
1699	case IFM_10_T:
1700		adapter->hw.mac.autoneg = FALSE;
1701		adapter->hw.phy.autoneg_advertised = 0;
1702		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1703			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1704		else
1705			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1706		break;
1707	default:
1708		device_printf(adapter->dev, "Unsupported media type\n");
1709	}
1710
1711	igb_init_locked(adapter);
1712	IGB_CORE_UNLOCK(adapter);
1713
1714	return (0);
1715}
1716
1717
1718/*********************************************************************
1719 *
1720 *  This routine maps the mbufs to Advanced TX descriptors.
1721 *  used by the 82575 adapter.
1722 *
1723 **********************************************************************/
1724
1725static int
1726igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1727{
1728	struct adapter		*adapter = txr->adapter;
1729	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1730	bus_dmamap_t		map;
1731	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1732	union e1000_adv_tx_desc	*txd = NULL;
1733	struct mbuf		*m_head;
1734	u32			olinfo_status = 0, cmd_type_len = 0;
1735	int			nsegs, i, j, error, first, last = 0;
1736	u32			hdrlen = 0;
1737
1738	m_head = *m_headp;
1739
1740
1741	/* Set basic descriptor constants */
1742	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1743	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1744	if (m_head->m_flags & M_VLANTAG)
1745		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1746
1747	/*
1748         * Map the packet for DMA.
1749	 *
1750	 * Capture the first descriptor index,
1751	 * this descriptor will have the index
1752	 * of the EOP which is the only one that
1753	 * now gets a DONE bit writeback.
1754	 */
1755	first = txr->next_avail_desc;
1756	tx_buffer = &txr->tx_buffers[first];
1757	tx_buffer_mapped = tx_buffer;
1758	map = tx_buffer->map;
1759
1760	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1761	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1762
1763	if (error == EFBIG) {
1764		struct mbuf *m;
1765
1766		m = m_defrag(*m_headp, M_DONTWAIT);
1767		if (m == NULL) {
1768			adapter->mbuf_defrag_failed++;
1769			m_freem(*m_headp);
1770			*m_headp = NULL;
1771			return (ENOBUFS);
1772		}
1773		*m_headp = m;
1774
1775		/* Try it again */
1776		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1777		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1778
1779		if (error == ENOMEM) {
1780			adapter->no_tx_dma_setup++;
1781			return (error);
1782		} else if (error != 0) {
1783			adapter->no_tx_dma_setup++;
1784			m_freem(*m_headp);
1785			*m_headp = NULL;
1786			return (error);
1787		}
1788	} else if (error == ENOMEM) {
1789		adapter->no_tx_dma_setup++;
1790		return (error);
1791	} else if (error != 0) {
1792		adapter->no_tx_dma_setup++;
1793		m_freem(*m_headp);
1794		*m_headp = NULL;
1795		return (error);
1796	}
1797
1798	/* Check again to be sure we have enough descriptors */
1799        if (nsegs > (txr->tx_avail - 2)) {
1800                txr->no_desc_avail++;
1801		bus_dmamap_unload(txr->txtag, map);
1802		return (ENOBUFS);
1803        }
1804	m_head = *m_headp;
1805
1806        /*
1807         * Set up the context descriptor:
1808         * used when any hardware offload is done.
1809	 * This includes CSUM, VLAN, and TSO. It
1810	 * will use the first descriptor.
1811         */
1812        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1813		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1814			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1815			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1816			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1817		} else
1818			return (ENXIO);
1819	} else if (igb_tx_ctx_setup(txr, m_head))
1820		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1821
1822	/* Calculate payload length */
1823	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1824	    << E1000_ADVTXD_PAYLEN_SHIFT);
1825
1826	/* 82575 needs the queue index added */
1827	if (adapter->hw.mac.type == e1000_82575)
1828		olinfo_status |= txr->me << 4;
1829
1830	/* Set up our transmit descriptors */
1831	i = txr->next_avail_desc;
1832	for (j = 0; j < nsegs; j++) {
1833		bus_size_t seg_len;
1834		bus_addr_t seg_addr;
1835
1836		tx_buffer = &txr->tx_buffers[i];
1837		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1838		seg_addr = segs[j].ds_addr;
1839		seg_len  = segs[j].ds_len;
1840
1841		txd->read.buffer_addr = htole64(seg_addr);
1842		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1843		txd->read.olinfo_status = htole32(olinfo_status);
1844		last = i;
1845		if (++i == adapter->num_tx_desc)
1846			i = 0;
1847		tx_buffer->m_head = NULL;
1848		tx_buffer->next_eop = -1;
1849	}
1850
1851	txr->next_avail_desc = i;
1852	txr->tx_avail -= nsegs;
1853
1854        tx_buffer->m_head = m_head;
1855	tx_buffer_mapped->map = tx_buffer->map;
1856	tx_buffer->map = map;
1857        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1858
1859        /*
1860         * Last Descriptor of Packet
1861	 * needs End Of Packet (EOP)
1862	 * and Report Status (RS)
1863         */
1864        txd->read.cmd_type_len |=
1865	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1866	/*
1867	 * Keep track in the first buffer which
1868	 * descriptor will be written back
1869	 */
1870	tx_buffer = &txr->tx_buffers[first];
1871	tx_buffer->next_eop = last;
1872	txr->watchdog_time = ticks;
1873
1874	/*
1875	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1876	 * that this frame is available to transmit.
1877	 */
1878	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1879	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1880	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1881	++txr->tx_packets;
1882
1883	return (0);
1884
1885}
1886
1887static void
1888igb_set_promisc(struct adapter *adapter)
1889{
1890	struct ifnet	*ifp = adapter->ifp;
1891	struct e1000_hw *hw = &adapter->hw;
1892	u32		reg;
1893
1894	if (adapter->vf_ifp) {
1895		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1896		return;
1897	}
1898
1899	reg = E1000_READ_REG(hw, E1000_RCTL);
1900	if (ifp->if_flags & IFF_PROMISC) {
1901		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1902		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1903	} else if (ifp->if_flags & IFF_ALLMULTI) {
1904		reg |= E1000_RCTL_MPE;
1905		reg &= ~E1000_RCTL_UPE;
1906		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1907	}
1908}
1909
1910static void
1911igb_disable_promisc(struct adapter *adapter)
1912{
1913	struct e1000_hw *hw = &adapter->hw;
1914	u32		reg;
1915
1916	if (adapter->vf_ifp) {
1917		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1918		return;
1919	}
1920	reg = E1000_READ_REG(hw, E1000_RCTL);
1921	reg &=  (~E1000_RCTL_UPE);
1922	reg &=  (~E1000_RCTL_MPE);
1923	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1924}
1925
1926
1927/*********************************************************************
1928 *  Multicast Update
1929 *
1930 *  This routine is called whenever multicast address list is updated.
1931 *
1932 **********************************************************************/
1933
1934static void
1935igb_set_multi(struct adapter *adapter)
1936{
1937	struct ifnet	*ifp = adapter->ifp;
1938	struct ifmultiaddr *ifma;
1939	u32 reg_rctl = 0;
1940	u8  *mta;
1941
1942	int mcnt = 0;
1943
1944	IOCTL_DEBUGOUT("igb_set_multi: begin");
1945
1946	mta = adapter->mta;
1947	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1948	    MAX_NUM_MULTICAST_ADDRESSES);
1949
1950#if __FreeBSD_version < 800000
1951	IF_ADDR_LOCK(ifp);
1952#else
1953	if_maddr_rlock(ifp);
1954#endif
1955	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1956		if (ifma->ifma_addr->sa_family != AF_LINK)
1957			continue;
1958
1959		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1960			break;
1961
1962		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1963		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1964		mcnt++;
1965	}
1966#if __FreeBSD_version < 800000
1967	IF_ADDR_UNLOCK(ifp);
1968#else
1969	if_maddr_runlock(ifp);
1970#endif
1971
1972	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1973		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1974		reg_rctl |= E1000_RCTL_MPE;
1975		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1976	} else
1977		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1978}
1979
1980
1981/*********************************************************************
1982 *  Timer routine:
1983 *  	This routine checks for link status,
1984 *	updates statistics, and does the watchdog.
1985 *
1986 **********************************************************************/
1987
1988static void
1989igb_local_timer(void *arg)
1990{
1991	struct adapter		*adapter = arg;
1992	device_t		dev = adapter->dev;
1993	struct tx_ring		*txr = adapter->tx_rings;
1994
1995
1996	IGB_CORE_LOCK_ASSERT(adapter);
1997
1998	igb_update_link_status(adapter);
1999	igb_update_stats_counters(adapter);
2000
2001	/*
2002	** If flow control has paused us since last checking
2003	** it invalidates the watchdog timing, so dont run it.
2004	*/
2005	if (adapter->pause_frames) {
2006		adapter->pause_frames = 0;
2007		goto out;
2008	}
2009
2010        /*
2011        ** Watchdog: check for time since any descriptor was cleaned
2012        */
2013	for (int i = 0; i < adapter->num_queues; i++, txr++)
2014		if (txr->queue_status == IGB_QUEUE_HUNG)
2015			goto timeout;
2016out:
2017	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2018#ifndef DEVICE_POLLING
2019	/* Schedule all queue interrupts - deadlock protection */
2020	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2021#endif
2022	return;
2023
2024timeout:
2025	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2026	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2027            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2028            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2029	device_printf(dev,"TX(%d) desc avail = %d,"
2030            "Next TX to Clean = %d\n",
2031            txr->me, txr->tx_avail, txr->next_to_clean);
2032	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2033	adapter->watchdog_events++;
2034	igb_init_locked(adapter);
2035}
2036
2037static void
2038igb_update_link_status(struct adapter *adapter)
2039{
2040	struct e1000_hw *hw = &adapter->hw;
2041	struct ifnet *ifp = adapter->ifp;
2042	device_t dev = adapter->dev;
2043	struct tx_ring *txr = adapter->tx_rings;
2044	u32 link_check, thstat, ctrl;
2045
2046	link_check = thstat = ctrl = 0;
2047
2048	/* Get the cached link value or read for real */
2049        switch (hw->phy.media_type) {
2050        case e1000_media_type_copper:
2051                if (hw->mac.get_link_status) {
2052			/* Do the work to read phy */
2053                        e1000_check_for_link(hw);
2054                        link_check = !hw->mac.get_link_status;
2055                } else
2056                        link_check = TRUE;
2057                break;
2058        case e1000_media_type_fiber:
2059                e1000_check_for_link(hw);
2060                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2061                                 E1000_STATUS_LU);
2062                break;
2063        case e1000_media_type_internal_serdes:
2064                e1000_check_for_link(hw);
2065                link_check = adapter->hw.mac.serdes_has_link;
2066                break;
2067	/* VF device is type_unknown */
2068        case e1000_media_type_unknown:
2069                e1000_check_for_link(hw);
2070		link_check = !hw->mac.get_link_status;
2071		/* Fall thru */
2072        default:
2073                break;
2074        }
2075
2076	/* Check for thermal downshift or shutdown */
2077	if (hw->mac.type == e1000_i350) {
2078		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2079		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2080	}
2081
2082	/* Now we check if a transition has happened */
2083	if (link_check && (adapter->link_active == 0)) {
2084		e1000_get_speed_and_duplex(&adapter->hw,
2085		    &adapter->link_speed, &adapter->link_duplex);
2086		if (bootverbose)
2087			device_printf(dev, "Link is up %d Mbps %s\n",
2088			    adapter->link_speed,
2089			    ((adapter->link_duplex == FULL_DUPLEX) ?
2090			    "Full Duplex" : "Half Duplex"));
2091		adapter->link_active = 1;
2092		ifp->if_baudrate = adapter->link_speed * 1000000;
2093		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2094		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2095			device_printf(dev, "Link: thermal downshift\n");
2096		/* This can sleep */
2097		if_link_state_change(ifp, LINK_STATE_UP);
2098	} else if (!link_check && (adapter->link_active == 1)) {
2099		ifp->if_baudrate = adapter->link_speed = 0;
2100		adapter->link_duplex = 0;
2101		if (bootverbose)
2102			device_printf(dev, "Link is Down\n");
2103		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2104		    (thstat & E1000_THSTAT_PWR_DOWN))
2105			device_printf(dev, "Link: thermal shutdown\n");
2106		adapter->link_active = 0;
2107		/* This can sleep */
2108		if_link_state_change(ifp, LINK_STATE_DOWN);
2109		/* Turn off watchdogs */
2110		for (int i = 0; i < adapter->num_queues; i++, txr++)
2111			txr->queue_status = IGB_QUEUE_IDLE;
2112	}
2113}
2114
2115/*********************************************************************
2116 *
2117 *  This routine disables all traffic on the adapter by issuing a
2118 *  global reset on the MAC and deallocates TX/RX buffers.
2119 *
2120 **********************************************************************/
2121
2122static void
2123igb_stop(void *arg)
2124{
2125	struct adapter	*adapter = arg;
2126	struct ifnet	*ifp = adapter->ifp;
2127	struct tx_ring *txr = adapter->tx_rings;
2128
2129	IGB_CORE_LOCK_ASSERT(adapter);
2130
2131	INIT_DEBUGOUT("igb_stop: begin");
2132
2133	igb_disable_intr(adapter);
2134
2135	callout_stop(&adapter->timer);
2136
2137	/* Tell the stack that the interface is no longer active */
2138	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2139
2140	/* Unarm watchdog timer. */
2141	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2142		IGB_TX_LOCK(txr);
2143		txr->queue_status = IGB_QUEUE_IDLE;
2144		IGB_TX_UNLOCK(txr);
2145	}
2146
2147	e1000_reset_hw(&adapter->hw);
2148	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2149
2150	e1000_led_off(&adapter->hw);
2151	e1000_cleanup_led(&adapter->hw);
2152}
2153
2154
2155/*********************************************************************
2156 *
2157 *  Determine hardware revision.
2158 *
2159 **********************************************************************/
2160static void
2161igb_identify_hardware(struct adapter *adapter)
2162{
2163	device_t dev = adapter->dev;
2164
2165	/* Make sure our PCI config space has the necessary stuff set */
2166	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2167	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2168	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2169		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2170		    "bits were not set!\n");
2171		adapter->hw.bus.pci_cmd_word |=
2172		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2173		pci_write_config(dev, PCIR_COMMAND,
2174		    adapter->hw.bus.pci_cmd_word, 2);
2175	}
2176
2177	/* Save off the information about this board */
2178	adapter->hw.vendor_id = pci_get_vendor(dev);
2179	adapter->hw.device_id = pci_get_device(dev);
2180	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2181	adapter->hw.subsystem_vendor_id =
2182	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2183	adapter->hw.subsystem_device_id =
2184	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2185
2186	/* Set MAC type early for PCI setup */
2187	e1000_set_mac_type(&adapter->hw);
2188
2189	/* Are we a VF device? */
2190	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2191	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2192		adapter->vf_ifp = 1;
2193	else
2194		adapter->vf_ifp = 0;
2195}
2196
2197static int
2198igb_allocate_pci_resources(struct adapter *adapter)
2199{
2200	device_t	dev = adapter->dev;
2201	int		rid;
2202
2203	rid = PCIR_BAR(0);
2204	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2205	    &rid, RF_ACTIVE);
2206	if (adapter->pci_mem == NULL) {
2207		device_printf(dev, "Unable to allocate bus resource: memory\n");
2208		return (ENXIO);
2209	}
2210	adapter->osdep.mem_bus_space_tag =
2211	    rman_get_bustag(adapter->pci_mem);
2212	adapter->osdep.mem_bus_space_handle =
2213	    rman_get_bushandle(adapter->pci_mem);
2214	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2215
2216	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2217
2218	/* This will setup either MSI/X or MSI */
2219	adapter->msix = igb_setup_msix(adapter);
2220	adapter->hw.back = &adapter->osdep;
2221
2222	return (0);
2223}
2224
2225/*********************************************************************
2226 *
2227 *  Setup the Legacy or MSI Interrupt handler
2228 *
2229 **********************************************************************/
2230static int
2231igb_allocate_legacy(struct adapter *adapter)
2232{
2233	device_t		dev = adapter->dev;
2234	struct igb_queue	*que = adapter->queues;
2235	struct tx_ring		*txr = adapter->tx_rings;
2236	int			error, rid = 0;
2237
2238	/* Turn off all interrupts */
2239	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2240
2241	/* MSI RID is 1 */
2242	if (adapter->msix == 1)
2243		rid = 1;
2244
2245	/* We allocate a single interrupt resource */
2246	adapter->res = bus_alloc_resource_any(dev,
2247	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2248	if (adapter->res == NULL) {
2249		device_printf(dev, "Unable to allocate bus resource: "
2250		    "interrupt\n");
2251		return (ENXIO);
2252	}
2253
2254#if __FreeBSD_version >= 800000
2255	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2256#endif
2257
2258	/*
2259	 * Try allocating a fast interrupt and the associated deferred
2260	 * processing contexts.
2261	 */
2262	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2263	/* Make tasklet for deferred link handling */
2264	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2265	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2266	    taskqueue_thread_enqueue, &que->tq);
2267	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2268	    device_get_nameunit(adapter->dev));
2269	if ((error = bus_setup_intr(dev, adapter->res,
2270	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2271	    adapter, &adapter->tag)) != 0) {
2272		device_printf(dev, "Failed to register fast interrupt "
2273			    "handler: %d\n", error);
2274		taskqueue_free(que->tq);
2275		que->tq = NULL;
2276		return (error);
2277	}
2278
2279	return (0);
2280}
2281
2282
2283/*********************************************************************
2284 *
2285 *  Setup the MSIX Queue Interrupt handlers:
2286 *
2287 **********************************************************************/
2288static int
2289igb_allocate_msix(struct adapter *adapter)
2290{
2291	device_t		dev = adapter->dev;
2292	struct igb_queue	*que = adapter->queues;
2293	int			error, rid, vector = 0;
2294
2295
2296	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2297		rid = vector +1;
2298		que->res = bus_alloc_resource_any(dev,
2299		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2300		if (que->res == NULL) {
2301			device_printf(dev,
2302			    "Unable to allocate bus resource: "
2303			    "MSIX Queue Interrupt\n");
2304			return (ENXIO);
2305		}
2306		error = bus_setup_intr(dev, que->res,
2307	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2308		    igb_msix_que, que, &que->tag);
2309		if (error) {
2310			que->res = NULL;
2311			device_printf(dev, "Failed to register Queue handler");
2312			return (error);
2313		}
2314#if __FreeBSD_version >= 800504
2315		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2316#endif
2317		que->msix = vector;
2318		if (adapter->hw.mac.type == e1000_82575)
2319			que->eims = E1000_EICR_TX_QUEUE0 << i;
2320		else
2321			que->eims = 1 << vector;
2322		/*
2323		** Bind the msix vector, and thus the
2324		** rings to the corresponding cpu.
2325		*/
2326		if (adapter->num_queues > 1)
2327			bus_bind_intr(dev, que->res, i);
2328#if __FreeBSD_version >= 800000
2329		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2330		    que->txr);
2331#endif
2332		/* Make tasklet for deferred handling */
2333		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2334		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2335		    taskqueue_thread_enqueue, &que->tq);
2336		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2337		    device_get_nameunit(adapter->dev));
2338	}
2339
2340	/* And Link */
2341	rid = vector + 1;
2342	adapter->res = bus_alloc_resource_any(dev,
2343	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2344	if (adapter->res == NULL) {
2345		device_printf(dev,
2346		    "Unable to allocate bus resource: "
2347		    "MSIX Link Interrupt\n");
2348		return (ENXIO);
2349	}
2350	if ((error = bus_setup_intr(dev, adapter->res,
2351	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2352	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2353		device_printf(dev, "Failed to register Link handler");
2354		return (error);
2355	}
2356#if __FreeBSD_version >= 800504
2357	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2358#endif
2359	adapter->linkvec = vector;
2360
2361	return (0);
2362}
2363
2364
2365static void
2366igb_configure_queues(struct adapter *adapter)
2367{
2368	struct	e1000_hw	*hw = &adapter->hw;
2369	struct	igb_queue	*que;
2370	u32			tmp, ivar = 0, newitr = 0;
2371
2372	/* First turn on RSS capability */
2373	if (adapter->hw.mac.type != e1000_82575)
2374		E1000_WRITE_REG(hw, E1000_GPIE,
2375		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2376		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2377
2378	/* Turn on MSIX */
2379	switch (adapter->hw.mac.type) {
2380	case e1000_82580:
2381	case e1000_i350:
2382	case e1000_vfadapt:
2383	case e1000_vfadapt_i350:
2384		/* RX entries */
2385		for (int i = 0; i < adapter->num_queues; i++) {
2386			u32 index = i >> 1;
2387			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2388			que = &adapter->queues[i];
2389			if (i & 1) {
2390				ivar &= 0xFF00FFFF;
2391				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2392			} else {
2393				ivar &= 0xFFFFFF00;
2394				ivar |= que->msix | E1000_IVAR_VALID;
2395			}
2396			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2397		}
2398		/* TX entries */
2399		for (int i = 0; i < adapter->num_queues; i++) {
2400			u32 index = i >> 1;
2401			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2402			que = &adapter->queues[i];
2403			if (i & 1) {
2404				ivar &= 0x00FFFFFF;
2405				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2406			} else {
2407				ivar &= 0xFFFF00FF;
2408				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2409			}
2410			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2411			adapter->que_mask |= que->eims;
2412		}
2413
2414		/* And for the link interrupt */
2415		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2416		adapter->link_mask = 1 << adapter->linkvec;
2417		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2418		break;
2419	case e1000_82576:
2420		/* RX entries */
2421		for (int i = 0; i < adapter->num_queues; i++) {
2422			u32 index = i & 0x7; /* Each IVAR has two entries */
2423			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2424			que = &adapter->queues[i];
2425			if (i < 8) {
2426				ivar &= 0xFFFFFF00;
2427				ivar |= que->msix | E1000_IVAR_VALID;
2428			} else {
2429				ivar &= 0xFF00FFFF;
2430				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2431			}
2432			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2433			adapter->que_mask |= que->eims;
2434		}
2435		/* TX entries */
2436		for (int i = 0; i < adapter->num_queues; i++) {
2437			u32 index = i & 0x7; /* Each IVAR has two entries */
2438			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2439			que = &adapter->queues[i];
2440			if (i < 8) {
2441				ivar &= 0xFFFF00FF;
2442				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2443			} else {
2444				ivar &= 0x00FFFFFF;
2445				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2446			}
2447			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2448			adapter->que_mask |= que->eims;
2449		}
2450
2451		/* And for the link interrupt */
2452		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2453		adapter->link_mask = 1 << adapter->linkvec;
2454		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2455		break;
2456
2457	case e1000_82575:
2458                /* enable MSI-X support*/
2459		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2460                tmp |= E1000_CTRL_EXT_PBA_CLR;
2461                /* Auto-Mask interrupts upon ICR read. */
2462                tmp |= E1000_CTRL_EXT_EIAME;
2463                tmp |= E1000_CTRL_EXT_IRCA;
2464                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2465
2466		/* Queues */
2467		for (int i = 0; i < adapter->num_queues; i++) {
2468			que = &adapter->queues[i];
2469			tmp = E1000_EICR_RX_QUEUE0 << i;
2470			tmp |= E1000_EICR_TX_QUEUE0 << i;
2471			que->eims = tmp;
2472			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2473			    i, que->eims);
2474			adapter->que_mask |= que->eims;
2475		}
2476
2477		/* Link */
2478		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2479		    E1000_EIMS_OTHER);
2480		adapter->link_mask |= E1000_EIMS_OTHER;
2481	default:
2482		break;
2483	}
2484
2485	/* Set the starting interrupt rate */
2486	if (igb_max_interrupt_rate > 0)
2487		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2488
2489        if (hw->mac.type == e1000_82575)
2490                newitr |= newitr << 16;
2491        else
2492                newitr |= E1000_EITR_CNT_IGNR;
2493
2494	for (int i = 0; i < adapter->num_queues; i++) {
2495		que = &adapter->queues[i];
2496		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2497	}
2498
2499	return;
2500}
2501
2502
2503static void
2504igb_free_pci_resources(struct adapter *adapter)
2505{
2506	struct		igb_queue *que = adapter->queues;
2507	device_t	dev = adapter->dev;
2508	int		rid;
2509
2510	/*
2511	** There is a slight possibility of a failure mode
2512	** in attach that will result in entering this function
2513	** before interrupt resources have been initialized, and
2514	** in that case we do not want to execute the loops below
2515	** We can detect this reliably by the state of the adapter
2516	** res pointer.
2517	*/
2518	if (adapter->res == NULL)
2519		goto mem;
2520
2521	/*
2522	 * First release all the interrupt resources:
2523	 */
2524	for (int i = 0; i < adapter->num_queues; i++, que++) {
2525		rid = que->msix + 1;
2526		if (que->tag != NULL) {
2527			bus_teardown_intr(dev, que->res, que->tag);
2528			que->tag = NULL;
2529		}
2530		if (que->res != NULL)
2531			bus_release_resource(dev,
2532			    SYS_RES_IRQ, rid, que->res);
2533	}
2534
2535	/* Clean the Legacy or Link interrupt last */
2536	if (adapter->linkvec) /* we are doing MSIX */
2537		rid = adapter->linkvec + 1;
2538	else
2539		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2540
2541	que = adapter->queues;
2542	if (adapter->tag != NULL) {
2543		taskqueue_drain(que->tq, &adapter->link_task);
2544		bus_teardown_intr(dev, adapter->res, adapter->tag);
2545		adapter->tag = NULL;
2546	}
2547	if (adapter->res != NULL)
2548		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2549
2550	for (int i = 0; i < adapter->num_queues; i++, que++) {
2551		if (que->tq != NULL) {
2552#if __FreeBSD_version >= 800000
2553			taskqueue_drain(que->tq, &que->txr->txq_task);
2554#endif
2555			taskqueue_drain(que->tq, &que->que_task);
2556			taskqueue_free(que->tq);
2557		}
2558	}
2559mem:
2560	if (adapter->msix)
2561		pci_release_msi(dev);
2562
2563	if (adapter->msix_mem != NULL)
2564		bus_release_resource(dev, SYS_RES_MEMORY,
2565		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2566
2567	if (adapter->pci_mem != NULL)
2568		bus_release_resource(dev, SYS_RES_MEMORY,
2569		    PCIR_BAR(0), adapter->pci_mem);
2570
2571}
2572
2573/*
2574 * Setup Either MSI/X or MSI
2575 */
2576static int
2577igb_setup_msix(struct adapter *adapter)
2578{
2579	device_t dev = adapter->dev;
2580	int rid, want, queues, msgs;
2581
2582	/* tuneable override */
2583	if (igb_enable_msix == 0)
2584		goto msi;
2585
2586	/* First try MSI/X */
2587	rid = PCIR_BAR(IGB_MSIX_BAR);
2588	adapter->msix_mem = bus_alloc_resource_any(dev,
2589	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2590       	if (!adapter->msix_mem) {
2591		/* May not be enabled */
2592		device_printf(adapter->dev,
2593		    "Unable to map MSIX table \n");
2594		goto msi;
2595	}
2596
2597	msgs = pci_msix_count(dev);
2598	if (msgs == 0) { /* system has msix disabled */
2599		bus_release_resource(dev, SYS_RES_MEMORY,
2600		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2601		adapter->msix_mem = NULL;
2602		goto msi;
2603	}
2604
2605	/* Figure out a reasonable auto config value */
2606	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2607
2608	/* Manual override */
2609	if (igb_num_queues != 0)
2610		queues = igb_num_queues;
2611	if (queues > 8)  /* max queues */
2612		queues = 8;
2613
2614	/* Can have max of 4 queues on 82575 */
2615	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2616		queues = 4;
2617
2618	/* Limit the VF devices to one queue */
2619	if (adapter->vf_ifp)
2620		queues = 1;
2621
2622	/*
2623	** One vector (RX/TX pair) per queue
2624	** plus an additional for Link interrupt
2625	*/
2626	want = queues + 1;
2627	if (msgs >= want)
2628		msgs = want;
2629	else {
2630               	device_printf(adapter->dev,
2631		    "MSIX Configuration Problem, "
2632		    "%d vectors configured, but %d queues wanted!\n",
2633		    msgs, want);
2634		return (0);
2635	}
2636	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2637               	device_printf(adapter->dev,
2638		    "Using MSIX interrupts with %d vectors\n", msgs);
2639		adapter->num_queues = queues;
2640		return (msgs);
2641	}
2642msi:
2643       	msgs = pci_msi_count(dev);
2644	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2645		device_printf(adapter->dev," Using MSI interrupt\n");
2646		return (msgs);
2647	}
2648	return (0);
2649}
2650
2651/*********************************************************************
2652 *
2653 *  Set up an fresh starting state
2654 *
2655 **********************************************************************/
2656static void
2657igb_reset(struct adapter *adapter)
2658{
2659	device_t	dev = adapter->dev;
2660	struct e1000_hw *hw = &adapter->hw;
2661	struct e1000_fc_info *fc = &hw->fc;
2662	struct ifnet	*ifp = adapter->ifp;
2663	u32		pba = 0;
2664	u16		hwm;
2665
2666	INIT_DEBUGOUT("igb_reset: begin");
2667
2668	/* Let the firmware know the OS is in control */
2669	igb_get_hw_control(adapter);
2670
2671	/*
2672	 * Packet Buffer Allocation (PBA)
2673	 * Writing PBA sets the receive portion of the buffer
2674	 * the remainder is used for the transmit buffer.
2675	 */
2676	switch (hw->mac.type) {
2677	case e1000_82575:
2678		pba = E1000_PBA_32K;
2679		break;
2680	case e1000_82576:
2681	case e1000_vfadapt:
2682		pba = E1000_READ_REG(hw, E1000_RXPBS);
2683		pba &= E1000_RXPBS_SIZE_MASK_82576;
2684		break;
2685	case e1000_82580:
2686	case e1000_i350:
2687	case e1000_vfadapt_i350:
2688		pba = E1000_READ_REG(hw, E1000_RXPBS);
2689		pba = e1000_rxpbs_adjust_82580(pba);
2690		break;
2691		pba = E1000_PBA_35K;
2692	default:
2693		break;
2694	}
2695
2696	/* Special needs in case of Jumbo frames */
2697	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2698		u32 tx_space, min_tx, min_rx;
2699		pba = E1000_READ_REG(hw, E1000_PBA);
2700		tx_space = pba >> 16;
2701		pba &= 0xffff;
2702		min_tx = (adapter->max_frame_size +
2703		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2704		min_tx = roundup2(min_tx, 1024);
2705		min_tx >>= 10;
2706                min_rx = adapter->max_frame_size;
2707                min_rx = roundup2(min_rx, 1024);
2708                min_rx >>= 10;
2709		if (tx_space < min_tx &&
2710		    ((min_tx - tx_space) < pba)) {
2711			pba = pba - (min_tx - tx_space);
2712			/*
2713                         * if short on rx space, rx wins
2714                         * and must trump tx adjustment
2715			 */
2716                        if (pba < min_rx)
2717                                pba = min_rx;
2718		}
2719		E1000_WRITE_REG(hw, E1000_PBA, pba);
2720	}
2721
2722	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2723
2724	/*
2725	 * These parameters control the automatic generation (Tx) and
2726	 * response (Rx) to Ethernet PAUSE frames.
2727	 * - High water mark should allow for at least two frames to be
2728	 *   received after sending an XOFF.
2729	 * - Low water mark works best when it is very near the high water mark.
2730	 *   This allows the receiver to restart by sending XON when it has
2731	 *   drained a bit.
2732	 */
2733	hwm = min(((pba << 10) * 9 / 10),
2734	    ((pba << 10) - 2 * adapter->max_frame_size));
2735
2736	if (hw->mac.type < e1000_82576) {
2737		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2738		fc->low_water = fc->high_water - 8;
2739	} else {
2740		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2741		fc->low_water = fc->high_water - 16;
2742	}
2743
2744	fc->pause_time = IGB_FC_PAUSE_TIME;
2745	fc->send_xon = TRUE;
2746	if (fc->requested_mode)
2747		fc->current_mode = fc->requested_mode;
2748	else
2749		fc->current_mode = e1000_fc_full;
2750
2751	adapter->fc = fc->current_mode;
2752
2753	/* Issue a global reset */
2754	e1000_reset_hw(hw);
2755	E1000_WRITE_REG(hw, E1000_WUC, 0);
2756
2757	if (e1000_init_hw(hw) < 0)
2758		device_printf(dev, "Hardware Initialization Failed\n");
2759
2760	/* Setup DMA Coalescing */
2761	if (hw->mac.type == e1000_i350) {
2762		u32 reg = ~E1000_DMACR_DMAC_EN;
2763
2764		if (adapter->dmac == 0) { /* Disabling it */
2765			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2766			goto reset_out;
2767		}
2768
2769		hwm = (pba - 4) << 10;
2770		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2771		    & E1000_DMACR_DMACTHR_MASK);
2772
2773		/* transition to L0x or L1 if available..*/
2774		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2775
2776		/* timer = value in adapter->dmac in 32usec intervals */
2777		reg |= (adapter->dmac >> 5);
2778		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2779
2780		/* No lower threshold */
2781		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2782
2783		/* set hwm to PBA -  2 * max frame size */
2784		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2785
2786		/* Set the interval before transition */
2787		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2788		reg |= 0x800000FF; /* 255 usec */
2789		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2790
2791		/* free space in tx packet buffer to wake from DMA coal */
2792		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2793		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2794
2795		/* make low power state decision controlled by DMA coal */
2796		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2797		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2798		    reg | E1000_PCIEMISC_LX_DECISION);
2799		device_printf(dev, "DMA Coalescing enabled\n");
2800	}
2801
2802reset_out:
2803	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2804	e1000_get_phy_info(hw);
2805	e1000_check_for_link(hw);
2806	return;
2807}
2808
2809/*********************************************************************
2810 *
2811 *  Setup networking device structure and register an interface.
2812 *
2813 **********************************************************************/
2814static int
2815igb_setup_interface(device_t dev, struct adapter *adapter)
2816{
2817	struct ifnet   *ifp;
2818
2819	INIT_DEBUGOUT("igb_setup_interface: begin");
2820
2821	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2822	if (ifp == NULL) {
2823		device_printf(dev, "can not allocate ifnet structure\n");
2824		return (-1);
2825	}
2826	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2827	ifp->if_mtu = ETHERMTU;
2828	ifp->if_init =  igb_init;
2829	ifp->if_softc = adapter;
2830	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2831	ifp->if_ioctl = igb_ioctl;
2832#if __FreeBSD_version >= 800000
2833	ifp->if_transmit = igb_mq_start;
2834	ifp->if_qflush = igb_qflush;
2835#else
2836	ifp->if_start = igb_start;
2837#endif
2838	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2839	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2840	IFQ_SET_READY(&ifp->if_snd);
2841
2842	ether_ifattach(ifp, adapter->hw.mac.addr);
2843
2844	ifp->if_capabilities = ifp->if_capenable = 0;
2845
2846	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2847	ifp->if_capabilities |= IFCAP_TSO4;
2848	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2849	ifp->if_capenable = ifp->if_capabilities;
2850
2851	/* Don't enable LRO by default */
2852	ifp->if_capabilities |= IFCAP_LRO;
2853
2854#ifdef DEVICE_POLLING
2855	ifp->if_capabilities |= IFCAP_POLLING;
2856#endif
2857
2858	/*
2859	 * Tell the upper layer(s) we
2860	 * support full VLAN capability.
2861	 */
2862	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2863	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2864			     |  IFCAP_VLAN_HWTSO
2865			     |  IFCAP_VLAN_MTU;
2866	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2867			  |  IFCAP_VLAN_HWTSO
2868			  |  IFCAP_VLAN_MTU;
2869
2870	/*
2871	** Don't turn this on by default, if vlans are
2872	** created on another pseudo device (eg. lagg)
2873	** then vlan events are not passed thru, breaking
2874	** operation, but with HW FILTER off it works. If
2875	** using vlans directly on the igb driver you can
2876	** enable this and get full hardware tag filtering.
2877	*/
2878	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2879
2880	/*
2881	 * Specify the media types supported by this adapter and register
2882	 * callbacks to update media and link information
2883	 */
2884	ifmedia_init(&adapter->media, IFM_IMASK,
2885	    igb_media_change, igb_media_status);
2886	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2887	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2888		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2889			    0, NULL);
2890		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2891	} else {
2892		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2893		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2894			    0, NULL);
2895		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2896			    0, NULL);
2897		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2898			    0, NULL);
2899		if (adapter->hw.phy.type != e1000_phy_ife) {
2900			ifmedia_add(&adapter->media,
2901				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2902			ifmedia_add(&adapter->media,
2903				IFM_ETHER | IFM_1000_T, 0, NULL);
2904		}
2905	}
2906	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2907	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2908	return (0);
2909}
2910
2911
2912/*
2913 * Manage DMA'able memory.
2914 */
2915static void
2916igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2917{
2918	if (error)
2919		return;
2920	*(bus_addr_t *) arg = segs[0].ds_addr;
2921}
2922
2923static int
2924igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2925        struct igb_dma_alloc *dma, int mapflags)
2926{
2927	int error;
2928
2929	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2930				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2931				BUS_SPACE_MAXADDR,	/* lowaddr */
2932				BUS_SPACE_MAXADDR,	/* highaddr */
2933				NULL, NULL,		/* filter, filterarg */
2934				size,			/* maxsize */
2935				1,			/* nsegments */
2936				size,			/* maxsegsize */
2937				0,			/* flags */
2938				NULL,			/* lockfunc */
2939				NULL,			/* lockarg */
2940				&dma->dma_tag);
2941	if (error) {
2942		device_printf(adapter->dev,
2943		    "%s: bus_dma_tag_create failed: %d\n",
2944		    __func__, error);
2945		goto fail_0;
2946	}
2947
2948	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2949	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2950	if (error) {
2951		device_printf(adapter->dev,
2952		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2953		    __func__, (uintmax_t)size, error);
2954		goto fail_2;
2955	}
2956
2957	dma->dma_paddr = 0;
2958	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2959	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2960	if (error || dma->dma_paddr == 0) {
2961		device_printf(adapter->dev,
2962		    "%s: bus_dmamap_load failed: %d\n",
2963		    __func__, error);
2964		goto fail_3;
2965	}
2966
2967	return (0);
2968
2969fail_3:
2970	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2971fail_2:
2972	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2973	bus_dma_tag_destroy(dma->dma_tag);
2974fail_0:
2975	dma->dma_map = NULL;
2976	dma->dma_tag = NULL;
2977
2978	return (error);
2979}
2980
2981static void
2982igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2983{
2984	if (dma->dma_tag == NULL)
2985		return;
2986	if (dma->dma_map != NULL) {
2987		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2988		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2989		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2990		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2991		dma->dma_map = NULL;
2992	}
2993	bus_dma_tag_destroy(dma->dma_tag);
2994	dma->dma_tag = NULL;
2995}
2996
2997
2998/*********************************************************************
2999 *
3000 *  Allocate memory for the transmit and receive rings, and then
3001 *  the descriptors associated with each, called only once at attach.
3002 *
3003 **********************************************************************/
3004static int
3005igb_allocate_queues(struct adapter *adapter)
3006{
3007	device_t dev = adapter->dev;
3008	struct igb_queue	*que = NULL;
3009	struct tx_ring		*txr = NULL;
3010	struct rx_ring		*rxr = NULL;
3011	int rsize, tsize, error = E1000_SUCCESS;
3012	int txconf = 0, rxconf = 0;
3013
3014	/* First allocate the top level queue structs */
3015	if (!(adapter->queues =
3016	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3017	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3018		device_printf(dev, "Unable to allocate queue memory\n");
3019		error = ENOMEM;
3020		goto fail;
3021	}
3022
3023	/* Next allocate the TX ring struct memory */
3024	if (!(adapter->tx_rings =
3025	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3026	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3027		device_printf(dev, "Unable to allocate TX ring memory\n");
3028		error = ENOMEM;
3029		goto tx_fail;
3030	}
3031
3032	/* Now allocate the RX */
3033	if (!(adapter->rx_rings =
3034	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3035	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3036		device_printf(dev, "Unable to allocate RX ring memory\n");
3037		error = ENOMEM;
3038		goto rx_fail;
3039	}
3040
3041	tsize = roundup2(adapter->num_tx_desc *
3042	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3043	/*
3044	 * Now set up the TX queues, txconf is needed to handle the
3045	 * possibility that things fail midcourse and we need to
3046	 * undo memory gracefully
3047	 */
3048	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3049		/* Set up some basics */
3050		txr = &adapter->tx_rings[i];
3051		txr->adapter = adapter;
3052		txr->me = i;
3053
3054		/* Initialize the TX lock */
3055		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3056		    device_get_nameunit(dev), txr->me);
3057		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3058
3059		if (igb_dma_malloc(adapter, tsize,
3060			&txr->txdma, BUS_DMA_NOWAIT)) {
3061			device_printf(dev,
3062			    "Unable to allocate TX Descriptor memory\n");
3063			error = ENOMEM;
3064			goto err_tx_desc;
3065		}
3066		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3067		bzero((void *)txr->tx_base, tsize);
3068
3069        	/* Now allocate transmit buffers for the ring */
3070        	if (igb_allocate_transmit_buffers(txr)) {
3071			device_printf(dev,
3072			    "Critical Failure setting up transmit buffers\n");
3073			error = ENOMEM;
3074			goto err_tx_desc;
3075        	}
3076#if __FreeBSD_version >= 800000
3077		/* Allocate a buf ring */
3078		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3079		    M_WAITOK, &txr->tx_mtx);
3080#endif
3081	}
3082
3083	/*
3084	 * Next the RX queues...
3085	 */
3086	rsize = roundup2(adapter->num_rx_desc *
3087	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3088	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3089		rxr = &adapter->rx_rings[i];
3090		rxr->adapter = adapter;
3091		rxr->me = i;
3092
3093		/* Initialize the RX lock */
3094		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3095		    device_get_nameunit(dev), txr->me);
3096		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3097
3098		if (igb_dma_malloc(adapter, rsize,
3099			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3100			device_printf(dev,
3101			    "Unable to allocate RxDescriptor memory\n");
3102			error = ENOMEM;
3103			goto err_rx_desc;
3104		}
3105		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3106		bzero((void *)rxr->rx_base, rsize);
3107
3108        	/* Allocate receive buffers for the ring*/
3109		if (igb_allocate_receive_buffers(rxr)) {
3110			device_printf(dev,
3111			    "Critical Failure setting up receive buffers\n");
3112			error = ENOMEM;
3113			goto err_rx_desc;
3114		}
3115	}
3116
3117	/*
3118	** Finally set up the queue holding structs
3119	*/
3120	for (int i = 0; i < adapter->num_queues; i++) {
3121		que = &adapter->queues[i];
3122		que->adapter = adapter;
3123		que->txr = &adapter->tx_rings[i];
3124		que->rxr = &adapter->rx_rings[i];
3125	}
3126
3127	return (0);
3128
3129err_rx_desc:
3130	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3131		igb_dma_free(adapter, &rxr->rxdma);
3132err_tx_desc:
3133	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3134		igb_dma_free(adapter, &txr->txdma);
3135	free(adapter->rx_rings, M_DEVBUF);
3136rx_fail:
3137#if __FreeBSD_version >= 800000
3138	buf_ring_free(txr->br, M_DEVBUF);
3139#endif
3140	free(adapter->tx_rings, M_DEVBUF);
3141tx_fail:
3142	free(adapter->queues, M_DEVBUF);
3143fail:
3144	return (error);
3145}
3146
3147/*********************************************************************
3148 *
3149 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3150 *  the information needed to transmit a packet on the wire. This is
3151 *  called only once at attach, setup is done every reset.
3152 *
3153 **********************************************************************/
3154static int
3155igb_allocate_transmit_buffers(struct tx_ring *txr)
3156{
3157	struct adapter *adapter = txr->adapter;
3158	device_t dev = adapter->dev;
3159	struct igb_tx_buffer *txbuf;
3160	int error, i;
3161
3162	/*
3163	 * Setup DMA descriptor areas.
3164	 */
3165	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3166			       1, 0,			/* alignment, bounds */
3167			       BUS_SPACE_MAXADDR,	/* lowaddr */
3168			       BUS_SPACE_MAXADDR,	/* highaddr */
3169			       NULL, NULL,		/* filter, filterarg */
3170			       IGB_TSO_SIZE,		/* maxsize */
3171			       IGB_MAX_SCATTER,		/* nsegments */
3172			       PAGE_SIZE,		/* maxsegsize */
3173			       0,			/* flags */
3174			       NULL,			/* lockfunc */
3175			       NULL,			/* lockfuncarg */
3176			       &txr->txtag))) {
3177		device_printf(dev,"Unable to allocate TX DMA tag\n");
3178		goto fail;
3179	}
3180
3181	if (!(txr->tx_buffers =
3182	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3183	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3185		error = ENOMEM;
3186		goto fail;
3187	}
3188
3189        /* Create the descriptor buffer dma maps */
3190	txbuf = txr->tx_buffers;
3191	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3192		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3193		if (error != 0) {
3194			device_printf(dev, "Unable to create TX DMA map\n");
3195			goto fail;
3196		}
3197	}
3198
3199	return 0;
3200fail:
3201	/* We free all, it handles case where we are in the middle */
3202	igb_free_transmit_structures(adapter);
3203	return (error);
3204}
3205
3206/*********************************************************************
3207 *
3208 *  Initialize a transmit ring.
3209 *
3210 **********************************************************************/
3211static void
3212igb_setup_transmit_ring(struct tx_ring *txr)
3213{
3214	struct adapter *adapter = txr->adapter;
3215	struct igb_tx_buffer *txbuf;
3216	int i;
3217
3218	/* Clear the old descriptor contents */
3219	IGB_TX_LOCK(txr);
3220	bzero((void *)txr->tx_base,
3221	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3222	/* Reset indices */
3223	txr->next_avail_desc = 0;
3224	txr->next_to_clean = 0;
3225
3226	/* Free any existing tx buffers. */
3227        txbuf = txr->tx_buffers;
3228	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3229		if (txbuf->m_head != NULL) {
3230			bus_dmamap_sync(txr->txtag, txbuf->map,
3231			    BUS_DMASYNC_POSTWRITE);
3232			bus_dmamap_unload(txr->txtag, txbuf->map);
3233			m_freem(txbuf->m_head);
3234			txbuf->m_head = NULL;
3235		}
3236		/* clear the watch index */
3237		txbuf->next_eop = -1;
3238        }
3239
3240	/* Set number of descriptors available */
3241	txr->tx_avail = adapter->num_tx_desc;
3242
3243	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3244	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3245	IGB_TX_UNLOCK(txr);
3246}
3247
3248/*********************************************************************
3249 *
3250 *  Initialize all transmit rings.
3251 *
3252 **********************************************************************/
3253static void
3254igb_setup_transmit_structures(struct adapter *adapter)
3255{
3256	struct tx_ring *txr = adapter->tx_rings;
3257
3258	for (int i = 0; i < adapter->num_queues; i++, txr++)
3259		igb_setup_transmit_ring(txr);
3260
3261	return;
3262}
3263
3264/*********************************************************************
3265 *
3266 *  Enable transmit unit.
3267 *
3268 **********************************************************************/
3269static void
3270igb_initialize_transmit_units(struct adapter *adapter)
3271{
3272	struct tx_ring	*txr = adapter->tx_rings;
3273	struct e1000_hw *hw = &adapter->hw;
3274	u32		tctl, txdctl;
3275
3276	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3277	tctl = txdctl = 0;
3278
3279	/* Setup the Tx Descriptor Rings */
3280	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3281		u64 bus_addr = txr->txdma.dma_paddr;
3282
3283		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3284		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3285		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3286		    (uint32_t)(bus_addr >> 32));
3287		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3288		    (uint32_t)bus_addr);
3289
3290		/* Setup the HW Tx Head and Tail descriptor pointers */
3291		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3292		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3293
3294		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3295		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3296		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3297
3298		txr->queue_status = IGB_QUEUE_IDLE;
3299
3300		txdctl |= IGB_TX_PTHRESH;
3301		txdctl |= IGB_TX_HTHRESH << 8;
3302		txdctl |= IGB_TX_WTHRESH << 16;
3303		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3304		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3305	}
3306
3307	if (adapter->vf_ifp)
3308		return;
3309
3310	e1000_config_collision_dist(hw);
3311
3312	/* Program the Transmit Control Register */
3313	tctl = E1000_READ_REG(hw, E1000_TCTL);
3314	tctl &= ~E1000_TCTL_CT;
3315	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3316		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3317
3318	/* This write will effectively turn on the transmit unit. */
3319	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3320}
3321
3322/*********************************************************************
3323 *
3324 *  Free all transmit rings.
3325 *
3326 **********************************************************************/
3327static void
3328igb_free_transmit_structures(struct adapter *adapter)
3329{
3330	struct tx_ring *txr = adapter->tx_rings;
3331
3332	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3333		IGB_TX_LOCK(txr);
3334		igb_free_transmit_buffers(txr);
3335		igb_dma_free(adapter, &txr->txdma);
3336		IGB_TX_UNLOCK(txr);
3337		IGB_TX_LOCK_DESTROY(txr);
3338	}
3339	free(adapter->tx_rings, M_DEVBUF);
3340}
3341
3342/*********************************************************************
3343 *
3344 *  Free transmit ring related data structures.
3345 *
3346 **********************************************************************/
3347static void
3348igb_free_transmit_buffers(struct tx_ring *txr)
3349{
3350	struct adapter *adapter = txr->adapter;
3351	struct igb_tx_buffer *tx_buffer;
3352	int             i;
3353
3354	INIT_DEBUGOUT("free_transmit_ring: begin");
3355
3356	if (txr->tx_buffers == NULL)
3357		return;
3358
3359	tx_buffer = txr->tx_buffers;
3360	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3361		if (tx_buffer->m_head != NULL) {
3362			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3363			    BUS_DMASYNC_POSTWRITE);
3364			bus_dmamap_unload(txr->txtag,
3365			    tx_buffer->map);
3366			m_freem(tx_buffer->m_head);
3367			tx_buffer->m_head = NULL;
3368			if (tx_buffer->map != NULL) {
3369				bus_dmamap_destroy(txr->txtag,
3370				    tx_buffer->map);
3371				tx_buffer->map = NULL;
3372			}
3373		} else if (tx_buffer->map != NULL) {
3374			bus_dmamap_unload(txr->txtag,
3375			    tx_buffer->map);
3376			bus_dmamap_destroy(txr->txtag,
3377			    tx_buffer->map);
3378			tx_buffer->map = NULL;
3379		}
3380	}
3381#if __FreeBSD_version >= 800000
3382	if (txr->br != NULL)
3383		buf_ring_free(txr->br, M_DEVBUF);
3384#endif
3385	if (txr->tx_buffers != NULL) {
3386		free(txr->tx_buffers, M_DEVBUF);
3387		txr->tx_buffers = NULL;
3388	}
3389	if (txr->txtag != NULL) {
3390		bus_dma_tag_destroy(txr->txtag);
3391		txr->txtag = NULL;
3392	}
3393	return;
3394}
3395
3396/**********************************************************************
3397 *
3398 *  Setup work for hardware segmentation offload (TSO)
3399 *
3400 **********************************************************************/
3401static bool
3402igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3403{
3404	struct adapter *adapter = txr->adapter;
3405	struct e1000_adv_tx_context_desc *TXD;
3406	struct igb_tx_buffer        *tx_buffer;
3407	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3408	u32 mss_l4len_idx = 0;
3409	u16 vtag = 0;
3410	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3411	struct ether_vlan_header *eh;
3412	struct ip *ip;
3413	struct tcphdr *th;
3414
3415
3416	/*
3417	 * Determine where frame payload starts.
3418	 * Jump over vlan headers if already present
3419	 */
3420	eh = mtod(mp, struct ether_vlan_header *);
3421	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3422		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3423	else
3424		ehdrlen = ETHER_HDR_LEN;
3425
3426	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3427	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3428		return FALSE;
3429
3430	/* Only supports IPV4 for now */
3431	ctxd = txr->next_avail_desc;
3432	tx_buffer = &txr->tx_buffers[ctxd];
3433	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3434
3435	ip = (struct ip *)(mp->m_data + ehdrlen);
3436	if (ip->ip_p != IPPROTO_TCP)
3437                return FALSE;   /* 0 */
3438	ip->ip_sum = 0;
3439	ip_hlen = ip->ip_hl << 2;
3440	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3441	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3442	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3443	tcp_hlen = th->th_off << 2;
3444	/*
3445	 * Calculate header length, this is used
3446	 * in the transmit desc in igb_xmit
3447	 */
3448	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3449
3450	/* VLAN MACLEN IPLEN */
3451	if (mp->m_flags & M_VLANTAG) {
3452		vtag = htole16(mp->m_pkthdr.ether_vtag);
3453		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3454	}
3455
3456	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3457	vlan_macip_lens |= ip_hlen;
3458	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3459
3460	/* ADV DTYPE TUCMD */
3461	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3462	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3463	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3464	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3465
3466	/* MSS L4LEN IDX */
3467	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3468	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3469	/* 82575 needs the queue index added */
3470	if (adapter->hw.mac.type == e1000_82575)
3471		mss_l4len_idx |= txr->me << 4;
3472	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3473
3474	TXD->seqnum_seed = htole32(0);
3475	tx_buffer->m_head = NULL;
3476	tx_buffer->next_eop = -1;
3477
3478	if (++ctxd == adapter->num_tx_desc)
3479		ctxd = 0;
3480
3481	txr->tx_avail--;
3482	txr->next_avail_desc = ctxd;
3483	return TRUE;
3484}
3485
3486
3487/*********************************************************************
3488 *
3489 *  Context Descriptor setup for VLAN or CSUM
3490 *
3491 **********************************************************************/
3492
3493static bool
3494igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3495{
3496	struct adapter *adapter = txr->adapter;
3497	struct e1000_adv_tx_context_desc *TXD;
3498	struct igb_tx_buffer        *tx_buffer;
3499	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3500	struct ether_vlan_header *eh;
3501	struct ip *ip = NULL;
3502	struct ip6_hdr *ip6;
3503	int  ehdrlen, ctxd, ip_hlen = 0;
3504	u16	etype, vtag = 0;
3505	u8	ipproto = 0;
3506	bool	offload = TRUE;
3507
3508	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3509		offload = FALSE;
3510
3511	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3512	ctxd = txr->next_avail_desc;
3513	tx_buffer = &txr->tx_buffers[ctxd];
3514	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3515
3516	/*
3517	** In advanced descriptors the vlan tag must
3518	** be placed into the context descriptor, thus
3519	** we need to be here just for that setup.
3520	*/
3521	if (mp->m_flags & M_VLANTAG) {
3522		vtag = htole16(mp->m_pkthdr.ether_vtag);
3523		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3524	} else if (offload == FALSE)
3525		return FALSE;
3526
3527	/*
3528	 * Determine where frame payload starts.
3529	 * Jump over vlan headers if already present,
3530	 * helpful for QinQ too.
3531	 */
3532	eh = mtod(mp, struct ether_vlan_header *);
3533	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3534		etype = ntohs(eh->evl_proto);
3535		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3536	} else {
3537		etype = ntohs(eh->evl_encap_proto);
3538		ehdrlen = ETHER_HDR_LEN;
3539	}
3540
3541	/* Set the ether header length */
3542	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3543
3544	switch (etype) {
3545		case ETHERTYPE_IP:
3546			ip = (struct ip *)(mp->m_data + ehdrlen);
3547			ip_hlen = ip->ip_hl << 2;
3548			if (mp->m_len < ehdrlen + ip_hlen) {
3549				offload = FALSE;
3550				break;
3551			}
3552			ipproto = ip->ip_p;
3553			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3554			break;
3555		case ETHERTYPE_IPV6:
3556			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3557			ip_hlen = sizeof(struct ip6_hdr);
3558			ipproto = ip6->ip6_nxt;
3559			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3560			break;
3561		default:
3562			offload = FALSE;
3563			break;
3564	}
3565
3566	vlan_macip_lens |= ip_hlen;
3567	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3568
3569	switch (ipproto) {
3570		case IPPROTO_TCP:
3571			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3572				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3573			break;
3574		case IPPROTO_UDP:
3575			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3576				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3577			break;
3578#if __FreeBSD_version >= 800000
3579		case IPPROTO_SCTP:
3580			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3581				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3582			break;
3583#endif
3584		default:
3585			offload = FALSE;
3586			break;
3587	}
3588
3589	/* 82575 needs the queue index added */
3590	if (adapter->hw.mac.type == e1000_82575)
3591		mss_l4len_idx = txr->me << 4;
3592
3593	/* Now copy bits into descriptor */
3594	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3595	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3596	TXD->seqnum_seed = htole32(0);
3597	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3598
3599	tx_buffer->m_head = NULL;
3600	tx_buffer->next_eop = -1;
3601
3602	/* We've consumed the first desc, adjust counters */
3603	if (++ctxd == adapter->num_tx_desc)
3604		ctxd = 0;
3605	txr->next_avail_desc = ctxd;
3606	--txr->tx_avail;
3607
3608        return (offload);
3609}
3610
3611
3612/**********************************************************************
3613 *
3614 *  Examine each tx_buffer in the used queue. If the hardware is done
3615 *  processing the packet then free associated resources. The
3616 *  tx_buffer is put back on the free queue.
3617 *
3618 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3619 **********************************************************************/
3620static bool
3621igb_txeof(struct tx_ring *txr)
3622{
3623	struct adapter	*adapter = txr->adapter;
3624        int first, last, done, processed;
3625        struct igb_tx_buffer *tx_buffer;
3626        struct e1000_tx_desc   *tx_desc, *eop_desc;
3627	struct ifnet   *ifp = adapter->ifp;
3628
3629	IGB_TX_LOCK_ASSERT(txr);
3630
3631        if (txr->tx_avail == adapter->num_tx_desc) {
3632		txr->queue_status = IGB_QUEUE_IDLE;
3633                return FALSE;
3634	}
3635
3636	processed = 0;
3637        first = txr->next_to_clean;
3638        tx_desc = &txr->tx_base[first];
3639        tx_buffer = &txr->tx_buffers[first];
3640	last = tx_buffer->next_eop;
3641        eop_desc = &txr->tx_base[last];
3642
3643	/*
3644	 * What this does is get the index of the
3645	 * first descriptor AFTER the EOP of the
3646	 * first packet, that way we can do the
3647	 * simple comparison on the inner while loop.
3648	 */
3649	if (++last == adapter->num_tx_desc)
3650 		last = 0;
3651	done = last;
3652
3653        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3654            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3655
3656        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3657		/* We clean the range of the packet */
3658		while (first != done) {
3659                	tx_desc->upper.data = 0;
3660                	tx_desc->lower.data = 0;
3661                	tx_desc->buffer_addr = 0;
3662                	++txr->tx_avail;
3663			++processed;
3664
3665			if (tx_buffer->m_head) {
3666				txr->bytes +=
3667				    tx_buffer->m_head->m_pkthdr.len;
3668				bus_dmamap_sync(txr->txtag,
3669				    tx_buffer->map,
3670				    BUS_DMASYNC_POSTWRITE);
3671				bus_dmamap_unload(txr->txtag,
3672				    tx_buffer->map);
3673
3674                        	m_freem(tx_buffer->m_head);
3675                        	tx_buffer->m_head = NULL;
3676                	}
3677			tx_buffer->next_eop = -1;
3678			txr->watchdog_time = ticks;
3679
3680	                if (++first == adapter->num_tx_desc)
3681				first = 0;
3682
3683	                tx_buffer = &txr->tx_buffers[first];
3684			tx_desc = &txr->tx_base[first];
3685		}
3686		++txr->packets;
3687		++ifp->if_opackets;
3688		/* See if we can continue to the next packet */
3689		last = tx_buffer->next_eop;
3690		if (last != -1) {
3691        		eop_desc = &txr->tx_base[last];
3692			/* Get new done point */
3693			if (++last == adapter->num_tx_desc) last = 0;
3694			done = last;
3695		} else
3696			break;
3697        }
3698        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3699            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3700
3701        txr->next_to_clean = first;
3702
3703	/*
3704	** Watchdog calculation, we know there's
3705	** work outstanding or the first return
3706	** would have been taken, so none processed
3707	** for too long indicates a hang.
3708	*/
3709	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3710		txr->queue_status = IGB_QUEUE_HUNG;
3711
3712        /*
3713         * If we have a minimum free, clear IFF_DRV_OACTIVE
3714         * to tell the stack that it is OK to send packets.
3715         */
3716        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3717                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3718		/* All clean, turn off the watchdog */
3719                if (txr->tx_avail == adapter->num_tx_desc) {
3720			txr->queue_status = IGB_QUEUE_IDLE;
3721			return (FALSE);
3722		}
3723        }
3724	return (TRUE);
3725}
3726
3727/*********************************************************************
3728 *
3729 *  Refresh mbuf buffers for RX descriptor rings
3730 *   - now keeps its own state so discards due to resource
3731 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3732 *     it just returns, keeping its placeholder, thus it can simply
3733 *     be recalled to try again.
3734 *
3735 **********************************************************************/
3736static void
3737igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3738{
3739	struct adapter		*adapter = rxr->adapter;
3740	bus_dma_segment_t	hseg[1];
3741	bus_dma_segment_t	pseg[1];
3742	struct igb_rx_buf	*rxbuf;
3743	struct mbuf		*mh, *mp;
3744	int			i, j, nsegs, error;
3745	bool			refreshed = FALSE;
3746
3747	i = j = rxr->next_to_refresh;
3748	/*
3749	** Get one descriptor beyond
3750	** our work mark to control
3751	** the loop.
3752        */
3753	if (++j == adapter->num_rx_desc)
3754		j = 0;
3755
3756	while (j != limit) {
3757		rxbuf = &rxr->rx_buffers[i];
3758		/* No hdr mbuf used with header split off */
3759		if (rxr->hdr_split == FALSE)
3760			goto no_split;
3761		if (rxbuf->m_head == NULL) {
3762			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3763			if (mh == NULL)
3764				goto update;
3765		} else
3766			mh = rxbuf->m_head;
3767
3768		mh->m_pkthdr.len = mh->m_len = MHLEN;
3769		mh->m_len = MHLEN;
3770		mh->m_flags |= M_PKTHDR;
3771		/* Get the memory mapping */
3772		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3773		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3774		if (error != 0) {
3775			printf("Refresh mbufs: hdr dmamap load"
3776			    " failure - %d\n", error);
3777			m_free(mh);
3778			rxbuf->m_head = NULL;
3779			goto update;
3780		}
3781		rxbuf->m_head = mh;
3782		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3783		    BUS_DMASYNC_PREREAD);
3784		rxr->rx_base[i].read.hdr_addr =
3785		    htole64(hseg[0].ds_addr);
3786no_split:
3787		if (rxbuf->m_pack == NULL) {
3788			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3789			    M_PKTHDR, adapter->rx_mbuf_sz);
3790			if (mp == NULL)
3791				goto update;
3792		} else
3793			mp = rxbuf->m_pack;
3794
3795		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3796		/* Get the memory mapping */
3797		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3798		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3799		if (error != 0) {
3800			printf("Refresh mbufs: payload dmamap load"
3801			    " failure - %d\n", error);
3802			m_free(mp);
3803			rxbuf->m_pack = NULL;
3804			goto update;
3805		}
3806		rxbuf->m_pack = mp;
3807		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3808		    BUS_DMASYNC_PREREAD);
3809		rxr->rx_base[i].read.pkt_addr =
3810		    htole64(pseg[0].ds_addr);
3811		refreshed = TRUE; /* I feel wefreshed :) */
3812
3813		i = j; /* our next is precalculated */
3814		rxr->next_to_refresh = i;
3815		if (++j == adapter->num_rx_desc)
3816			j = 0;
3817	}
3818update:
3819	if (refreshed) /* update tail */
3820		E1000_WRITE_REG(&adapter->hw,
3821		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3822	return;
3823}
3824
3825
3826/*********************************************************************
3827 *
3828 *  Allocate memory for rx_buffer structures. Since we use one
3829 *  rx_buffer per received packet, the maximum number of rx_buffer's
3830 *  that we'll need is equal to the number of receive descriptors
3831 *  that we've allocated.
3832 *
3833 **********************************************************************/
3834static int
3835igb_allocate_receive_buffers(struct rx_ring *rxr)
3836{
3837	struct	adapter 	*adapter = rxr->adapter;
3838	device_t 		dev = adapter->dev;
3839	struct igb_rx_buf	*rxbuf;
3840	int             	i, bsize, error;
3841
3842	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3843	if (!(rxr->rx_buffers =
3844	    (struct igb_rx_buf *) malloc(bsize,
3845	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3846		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3847		error = ENOMEM;
3848		goto fail;
3849	}
3850
3851	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3852				   1, 0,		/* alignment, bounds */
3853				   BUS_SPACE_MAXADDR,	/* lowaddr */
3854				   BUS_SPACE_MAXADDR,	/* highaddr */
3855				   NULL, NULL,		/* filter, filterarg */
3856				   MSIZE,		/* maxsize */
3857				   1,			/* nsegments */
3858				   MSIZE,		/* maxsegsize */
3859				   0,			/* flags */
3860				   NULL,		/* lockfunc */
3861				   NULL,		/* lockfuncarg */
3862				   &rxr->htag))) {
3863		device_printf(dev, "Unable to create RX DMA tag\n");
3864		goto fail;
3865	}
3866
3867	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3868				   1, 0,		/* alignment, bounds */
3869				   BUS_SPACE_MAXADDR,	/* lowaddr */
3870				   BUS_SPACE_MAXADDR,	/* highaddr */
3871				   NULL, NULL,		/* filter, filterarg */
3872				   MJUM9BYTES,		/* maxsize */
3873				   1,			/* nsegments */
3874				   MJUM9BYTES,		/* maxsegsize */
3875				   0,			/* flags */
3876				   NULL,		/* lockfunc */
3877				   NULL,		/* lockfuncarg */
3878				   &rxr->ptag))) {
3879		device_printf(dev, "Unable to create RX payload DMA tag\n");
3880		goto fail;
3881	}
3882
3883	for (i = 0; i < adapter->num_rx_desc; i++) {
3884		rxbuf = &rxr->rx_buffers[i];
3885		error = bus_dmamap_create(rxr->htag,
3886		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3887		if (error) {
3888			device_printf(dev,
3889			    "Unable to create RX head DMA maps\n");
3890			goto fail;
3891		}
3892		error = bus_dmamap_create(rxr->ptag,
3893		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3894		if (error) {
3895			device_printf(dev,
3896			    "Unable to create RX packet DMA maps\n");
3897			goto fail;
3898		}
3899	}
3900
3901	return (0);
3902
3903fail:
3904	/* Frees all, but can handle partial completion */
3905	igb_free_receive_structures(adapter);
3906	return (error);
3907}
3908
3909
3910static void
3911igb_free_receive_ring(struct rx_ring *rxr)
3912{
3913	struct	adapter		*adapter = rxr->adapter;
3914	struct igb_rx_buf	*rxbuf;
3915
3916
3917	for (int i = 0; i < adapter->num_rx_desc; i++) {
3918		rxbuf = &rxr->rx_buffers[i];
3919		if (rxbuf->m_head != NULL) {
3920			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3921			    BUS_DMASYNC_POSTREAD);
3922			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3923			rxbuf->m_head->m_flags |= M_PKTHDR;
3924			m_freem(rxbuf->m_head);
3925		}
3926		if (rxbuf->m_pack != NULL) {
3927			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3928			    BUS_DMASYNC_POSTREAD);
3929			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3930			rxbuf->m_pack->m_flags |= M_PKTHDR;
3931			m_freem(rxbuf->m_pack);
3932		}
3933		rxbuf->m_head = NULL;
3934		rxbuf->m_pack = NULL;
3935	}
3936}
3937
3938
3939/*********************************************************************
3940 *
3941 *  Initialize a receive ring and its buffers.
3942 *
3943 **********************************************************************/
3944static int
3945igb_setup_receive_ring(struct rx_ring *rxr)
3946{
3947	struct	adapter		*adapter;
3948	struct  ifnet		*ifp;
3949	device_t		dev;
3950	struct igb_rx_buf	*rxbuf;
3951	bus_dma_segment_t	pseg[1], hseg[1];
3952	struct lro_ctrl		*lro = &rxr->lro;
3953	int			rsize, nsegs, error = 0;
3954
3955	adapter = rxr->adapter;
3956	dev = adapter->dev;
3957	ifp = adapter->ifp;
3958
3959	/* Clear the ring contents */
3960	IGB_RX_LOCK(rxr);
3961	rsize = roundup2(adapter->num_rx_desc *
3962	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3963	bzero((void *)rxr->rx_base, rsize);
3964
3965	/*
3966	** Free current RX buffer structures and their mbufs
3967	*/
3968	igb_free_receive_ring(rxr);
3969
3970	/* Configure for header split? */
3971	if (igb_header_split)
3972		rxr->hdr_split = TRUE;
3973
3974        /* Now replenish the ring mbufs */
3975	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3976		struct mbuf	*mh, *mp;
3977
3978		rxbuf = &rxr->rx_buffers[j];
3979		if (rxr->hdr_split == FALSE)
3980			goto skip_head;
3981
3982		/* First the header */
3983		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3984		if (rxbuf->m_head == NULL) {
3985			error = ENOBUFS;
3986                        goto fail;
3987		}
3988		m_adj(rxbuf->m_head, ETHER_ALIGN);
3989		mh = rxbuf->m_head;
3990		mh->m_len = mh->m_pkthdr.len = MHLEN;
3991		mh->m_flags |= M_PKTHDR;
3992		/* Get the memory mapping */
3993		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3994		    rxbuf->hmap, rxbuf->m_head, hseg,
3995		    &nsegs, BUS_DMA_NOWAIT);
3996		if (error != 0) /* Nothing elegant to do here */
3997                        goto fail;
3998		bus_dmamap_sync(rxr->htag,
3999		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4000		/* Update descriptor */
4001		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4002
4003skip_head:
4004		/* Now the payload cluster */
4005		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4006		    M_PKTHDR, adapter->rx_mbuf_sz);
4007		if (rxbuf->m_pack == NULL) {
4008			error = ENOBUFS;
4009                        goto fail;
4010		}
4011		mp = rxbuf->m_pack;
4012		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4013		/* Get the memory mapping */
4014		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4015		    rxbuf->pmap, mp, pseg,
4016		    &nsegs, BUS_DMA_NOWAIT);
4017		if (error != 0)
4018                        goto fail;
4019		bus_dmamap_sync(rxr->ptag,
4020		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4021		/* Update descriptor */
4022		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4023        }
4024
4025	/* Setup our descriptor indices */
4026	rxr->next_to_check = 0;
4027	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4028	rxr->lro_enabled = FALSE;
4029	rxr->rx_split_packets = 0;
4030	rxr->rx_bytes = 0;
4031
4032	rxr->fmp = NULL;
4033	rxr->lmp = NULL;
4034	rxr->discard = FALSE;
4035
4036	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4037	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4038
4039	/*
4040	** Now set up the LRO interface, we
4041	** also only do head split when LRO
4042	** is enabled, since so often they
4043	** are undesireable in similar setups.
4044	*/
4045	if (ifp->if_capenable & IFCAP_LRO) {
4046		error = tcp_lro_init(lro);
4047		if (error) {
4048			device_printf(dev, "LRO Initialization failed!\n");
4049			goto fail;
4050		}
4051		INIT_DEBUGOUT("RX LRO Initialized\n");
4052		rxr->lro_enabled = TRUE;
4053		lro->ifp = adapter->ifp;
4054	}
4055
4056	IGB_RX_UNLOCK(rxr);
4057	return (0);
4058
4059fail:
4060	igb_free_receive_ring(rxr);
4061	IGB_RX_UNLOCK(rxr);
4062	return (error);
4063}
4064
4065
4066/*********************************************************************
4067 *
4068 *  Initialize all receive rings.
4069 *
4070 **********************************************************************/
4071static int
4072igb_setup_receive_structures(struct adapter *adapter)
4073{
4074	struct rx_ring *rxr = adapter->rx_rings;
4075	int i;
4076
4077	for (i = 0; i < adapter->num_queues; i++, rxr++)
4078		if (igb_setup_receive_ring(rxr))
4079			goto fail;
4080
4081	return (0);
4082fail:
4083	/*
4084	 * Free RX buffers allocated so far, we will only handle
4085	 * the rings that completed, the failing case will have
4086	 * cleaned up for itself. 'i' is the endpoint.
4087	 */
4088	for (int j = 0; j > i; ++j) {
4089		rxr = &adapter->rx_rings[i];
4090		IGB_RX_LOCK(rxr);
4091		igb_free_receive_ring(rxr);
4092		IGB_RX_UNLOCK(rxr);
4093	}
4094
4095	return (ENOBUFS);
4096}
4097
4098/*********************************************************************
4099 *
4100 *  Enable receive unit.
4101 *
4102 **********************************************************************/
4103static void
4104igb_initialize_receive_units(struct adapter *adapter)
4105{
4106	struct rx_ring	*rxr = adapter->rx_rings;
4107	struct ifnet	*ifp = adapter->ifp;
4108	struct e1000_hw *hw = &adapter->hw;
4109	u32		rctl, rxcsum, psize, srrctl = 0;
4110
4111	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4112
4113	/*
4114	 * Make sure receives are disabled while setting
4115	 * up the descriptor ring
4116	 */
4117	rctl = E1000_READ_REG(hw, E1000_RCTL);
4118	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4119
4120	/*
4121	** Set up for header split
4122	*/
4123	if (igb_header_split) {
4124		/* Use a standard mbuf for the header */
4125		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4126		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4127	} else
4128		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4129
4130	/*
4131	** Set up for jumbo frames
4132	*/
4133	if (ifp->if_mtu > ETHERMTU) {
4134		rctl |= E1000_RCTL_LPE;
4135		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4136			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4137			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4138		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4139			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4140			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4141		}
4142		/* Set maximum packet len */
4143		psize = adapter->max_frame_size;
4144		/* are we on a vlan? */
4145		if (adapter->ifp->if_vlantrunk != NULL)
4146			psize += VLAN_TAG_SIZE;
4147		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4148	} else {
4149		rctl &= ~E1000_RCTL_LPE;
4150		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4151		rctl |= E1000_RCTL_SZ_2048;
4152	}
4153
4154	/* Setup the Base and Length of the Rx Descriptor Rings */
4155	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4156		u64 bus_addr = rxr->rxdma.dma_paddr;
4157		u32 rxdctl;
4158
4159		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4160		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4161		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4162		    (uint32_t)(bus_addr >> 32));
4163		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4164		    (uint32_t)bus_addr);
4165		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4166		/* Enable this Queue */
4167		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4168		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4169		rxdctl &= 0xFFF00000;
4170		rxdctl |= IGB_RX_PTHRESH;
4171		rxdctl |= IGB_RX_HTHRESH << 8;
4172		rxdctl |= IGB_RX_WTHRESH << 16;
4173		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4174	}
4175
4176	/*
4177	** Setup for RX MultiQueue
4178	*/
4179	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4180	if (adapter->num_queues >1) {
4181		u32 random[10], mrqc, shift = 0;
4182		union igb_reta {
4183			u32 dword;
4184			u8  bytes[4];
4185		} reta;
4186
4187		arc4rand(&random, sizeof(random), 0);
4188		if (adapter->hw.mac.type == e1000_82575)
4189			shift = 6;
4190		/* Warning FM follows */
4191		for (int i = 0; i < 128; i++) {
4192			reta.bytes[i & 3] =
4193			    (i % adapter->num_queues) << shift;
4194			if ((i & 3) == 3)
4195				E1000_WRITE_REG(hw,
4196				    E1000_RETA(i >> 2), reta.dword);
4197		}
4198		/* Now fill in hash table */
4199		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4200		for (int i = 0; i < 10; i++)
4201			E1000_WRITE_REG_ARRAY(hw,
4202			    E1000_RSSRK(0), i, random[i]);
4203
4204		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4205		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4206		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4207		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4208		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4209		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4210		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4211		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4212
4213		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4214
4215		/*
4216		** NOTE: Receive Full-Packet Checksum Offload
4217		** is mutually exclusive with Multiqueue. However
4218		** this is not the same as TCP/IP checksums which
4219		** still work.
4220		*/
4221		rxcsum |= E1000_RXCSUM_PCSD;
4222#if __FreeBSD_version >= 800000
4223		/* For SCTP Offload */
4224		if ((hw->mac.type == e1000_82576)
4225		    && (ifp->if_capenable & IFCAP_RXCSUM))
4226			rxcsum |= E1000_RXCSUM_CRCOFL;
4227#endif
4228	} else {
4229		/* Non RSS setup */
4230		if (ifp->if_capenable & IFCAP_RXCSUM) {
4231			rxcsum |= E1000_RXCSUM_IPPCSE;
4232#if __FreeBSD_version >= 800000
4233			if (adapter->hw.mac.type == e1000_82576)
4234				rxcsum |= E1000_RXCSUM_CRCOFL;
4235#endif
4236		} else
4237			rxcsum &= ~E1000_RXCSUM_TUOFL;
4238	}
4239	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4240
4241	/* Setup the Receive Control Register */
4242	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4243	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4244		   E1000_RCTL_RDMTS_HALF |
4245		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4246	/* Strip CRC bytes. */
4247	rctl |= E1000_RCTL_SECRC;
4248	/* Make sure VLAN Filters are off */
4249	rctl &= ~E1000_RCTL_VFE;
4250	/* Don't store bad packets */
4251	rctl &= ~E1000_RCTL_SBP;
4252
4253	/* Enable Receives */
4254	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4255
4256	/*
4257	 * Setup the HW Rx Head and Tail Descriptor Pointers
4258	 *   - needs to be after enable
4259	 */
4260	for (int i = 0; i < adapter->num_queues; i++) {
4261		rxr = &adapter->rx_rings[i];
4262		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4263		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4264	}
4265	return;
4266}
4267
4268/*********************************************************************
4269 *
4270 *  Free receive rings.
4271 *
4272 **********************************************************************/
4273static void
4274igb_free_receive_structures(struct adapter *adapter)
4275{
4276	struct rx_ring *rxr = adapter->rx_rings;
4277
4278	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4279		struct lro_ctrl	*lro = &rxr->lro;
4280		igb_free_receive_buffers(rxr);
4281		tcp_lro_free(lro);
4282		igb_dma_free(adapter, &rxr->rxdma);
4283	}
4284
4285	free(adapter->rx_rings, M_DEVBUF);
4286}
4287
4288/*********************************************************************
4289 *
4290 *  Free receive ring data structures.
4291 *
4292 **********************************************************************/
4293static void
4294igb_free_receive_buffers(struct rx_ring *rxr)
4295{
4296	struct adapter		*adapter = rxr->adapter;
4297	struct igb_rx_buf	*rxbuf;
4298	int i;
4299
4300	INIT_DEBUGOUT("free_receive_structures: begin");
4301
4302	/* Cleanup any existing buffers */
4303	if (rxr->rx_buffers != NULL) {
4304		for (i = 0; i < adapter->num_rx_desc; i++) {
4305			rxbuf = &rxr->rx_buffers[i];
4306			if (rxbuf->m_head != NULL) {
4307				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4308				    BUS_DMASYNC_POSTREAD);
4309				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4310				rxbuf->m_head->m_flags |= M_PKTHDR;
4311				m_freem(rxbuf->m_head);
4312			}
4313			if (rxbuf->m_pack != NULL) {
4314				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4315				    BUS_DMASYNC_POSTREAD);
4316				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4317				rxbuf->m_pack->m_flags |= M_PKTHDR;
4318				m_freem(rxbuf->m_pack);
4319			}
4320			rxbuf->m_head = NULL;
4321			rxbuf->m_pack = NULL;
4322			if (rxbuf->hmap != NULL) {
4323				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4324				rxbuf->hmap = NULL;
4325			}
4326			if (rxbuf->pmap != NULL) {
4327				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4328				rxbuf->pmap = NULL;
4329			}
4330		}
4331		if (rxr->rx_buffers != NULL) {
4332			free(rxr->rx_buffers, M_DEVBUF);
4333			rxr->rx_buffers = NULL;
4334		}
4335	}
4336
4337	if (rxr->htag != NULL) {
4338		bus_dma_tag_destroy(rxr->htag);
4339		rxr->htag = NULL;
4340	}
4341	if (rxr->ptag != NULL) {
4342		bus_dma_tag_destroy(rxr->ptag);
4343		rxr->ptag = NULL;
4344	}
4345}
4346
4347static __inline void
4348igb_rx_discard(struct rx_ring *rxr, int i)
4349{
4350	struct igb_rx_buf	*rbuf;
4351
4352	rbuf = &rxr->rx_buffers[i];
4353
4354	/* Partially received? Free the chain */
4355	if (rxr->fmp != NULL) {
4356		rxr->fmp->m_flags |= M_PKTHDR;
4357		m_freem(rxr->fmp);
4358		rxr->fmp = NULL;
4359		rxr->lmp = NULL;
4360	}
4361
4362	/*
4363	** With advanced descriptors the writeback
4364	** clobbers the buffer addrs, so its easier
4365	** to just free the existing mbufs and take
4366	** the normal refresh path to get new buffers
4367	** and mapping.
4368	*/
4369	if (rbuf->m_head) {
4370		m_free(rbuf->m_head);
4371		rbuf->m_head = NULL;
4372	}
4373
4374	if (rbuf->m_pack) {
4375		m_free(rbuf->m_pack);
4376		rbuf->m_pack = NULL;
4377	}
4378
4379	return;
4380}
4381
4382static __inline void
4383igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4384{
4385
4386	/*
4387	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4388	 * should be computed by hardware. Also it should not have VLAN tag in
4389	 * ethernet header.
4390	 */
4391	if (rxr->lro_enabled &&
4392	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4393	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4394	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4395	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4396	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4397	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4398		/*
4399		 * Send to the stack if:
4400		 **  - LRO not enabled, or
4401		 **  - no LRO resources, or
4402		 **  - lro enqueue fails
4403		 */
4404		if (rxr->lro.lro_cnt != 0)
4405			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4406				return;
4407	}
4408	IGB_RX_UNLOCK(rxr);
4409	(*ifp->if_input)(ifp, m);
4410	IGB_RX_LOCK(rxr);
4411}
4412
4413/*********************************************************************
4414 *
4415 *  This routine executes in interrupt context. It replenishes
4416 *  the mbufs in the descriptor and sends data which has been
4417 *  dma'ed into host memory to upper layer.
4418 *
4419 *  We loop at most count times if count is > 0, or until done if
4420 *  count < 0.
4421 *
4422 *  Return TRUE if more to clean, FALSE otherwise
4423 *********************************************************************/
4424static bool
4425igb_rxeof(struct igb_queue *que, int count, int *done)
4426{
4427	struct adapter		*adapter = que->adapter;
4428	struct rx_ring		*rxr = que->rxr;
4429	struct ifnet		*ifp = adapter->ifp;
4430	struct lro_ctrl		*lro = &rxr->lro;
4431	struct lro_entry	*queued;
4432	int			i, processed = 0, rxdone = 0;
4433	u32			ptype, staterr = 0;
4434	union e1000_adv_rx_desc	*cur;
4435
4436	IGB_RX_LOCK(rxr);
4437	/* Sync the ring. */
4438	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4439	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4440
4441	/* Main clean loop */
4442	for (i = rxr->next_to_check; count != 0;) {
4443		struct mbuf		*sendmp, *mh, *mp;
4444		struct igb_rx_buf	*rxbuf;
4445		u16			hlen, plen, hdr, vtag;
4446		bool			eop = FALSE;
4447
4448		cur = &rxr->rx_base[i];
4449		staterr = le32toh(cur->wb.upper.status_error);
4450		if ((staterr & E1000_RXD_STAT_DD) == 0)
4451			break;
4452		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4453			break;
4454		count--;
4455		sendmp = mh = mp = NULL;
4456		cur->wb.upper.status_error = 0;
4457		rxbuf = &rxr->rx_buffers[i];
4458		plen = le16toh(cur->wb.upper.length);
4459		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4460		if ((adapter->hw.mac.type == e1000_i350) &&
4461		    (staterr & E1000_RXDEXT_STATERR_LB))
4462			vtag = be16toh(cur->wb.upper.vlan);
4463		else
4464			vtag = le16toh(cur->wb.upper.vlan);
4465		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4466		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4467
4468		/* Make sure all segments of a bad packet are discarded */
4469		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4470		    (rxr->discard)) {
4471			ifp->if_ierrors++;
4472			++rxr->rx_discarded;
4473			if (!eop) /* Catch subsequent segs */
4474				rxr->discard = TRUE;
4475			else
4476				rxr->discard = FALSE;
4477			igb_rx_discard(rxr, i);
4478			goto next_desc;
4479		}
4480
4481		/*
4482		** The way the hardware is configured to
4483		** split, it will ONLY use the header buffer
4484		** when header split is enabled, otherwise we
4485		** get normal behavior, ie, both header and
4486		** payload are DMA'd into the payload buffer.
4487		**
4488		** The fmp test is to catch the case where a
4489		** packet spans multiple descriptors, in that
4490		** case only the first header is valid.
4491		*/
4492		if (rxr->hdr_split && rxr->fmp == NULL) {
4493			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4494			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4495			if (hlen > IGB_HDR_BUF)
4496				hlen = IGB_HDR_BUF;
4497			mh = rxr->rx_buffers[i].m_head;
4498			mh->m_len = hlen;
4499			/* clear buf pointer for refresh */
4500			rxbuf->m_head = NULL;
4501			/*
4502			** Get the payload length, this
4503			** could be zero if its a small
4504			** packet.
4505			*/
4506			if (plen > 0) {
4507				mp = rxr->rx_buffers[i].m_pack;
4508				mp->m_len = plen;
4509				mh->m_next = mp;
4510				/* clear buf pointer */
4511				rxbuf->m_pack = NULL;
4512				rxr->rx_split_packets++;
4513			}
4514		} else {
4515			/*
4516			** Either no header split, or a
4517			** secondary piece of a fragmented
4518			** split packet.
4519			*/
4520			mh = rxr->rx_buffers[i].m_pack;
4521			mh->m_len = plen;
4522			/* clear buf info for refresh */
4523			rxbuf->m_pack = NULL;
4524		}
4525
4526		++processed; /* So we know when to refresh */
4527
4528		/* Initial frame - setup */
4529		if (rxr->fmp == NULL) {
4530			mh->m_pkthdr.len = mh->m_len;
4531			/* Save the head of the chain */
4532			rxr->fmp = mh;
4533			rxr->lmp = mh;
4534			if (mp != NULL) {
4535				/* Add payload if split */
4536				mh->m_pkthdr.len += mp->m_len;
4537				rxr->lmp = mh->m_next;
4538			}
4539		} else {
4540			/* Chain mbuf's together */
4541			rxr->lmp->m_next = mh;
4542			rxr->lmp = rxr->lmp->m_next;
4543			rxr->fmp->m_pkthdr.len += mh->m_len;
4544		}
4545
4546		if (eop) {
4547			rxr->fmp->m_pkthdr.rcvif = ifp;
4548			ifp->if_ipackets++;
4549			rxr->rx_packets++;
4550			/* capture data for AIM */
4551			rxr->packets++;
4552			rxr->bytes += rxr->fmp->m_pkthdr.len;
4553			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4554
4555			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4556				igb_rx_checksum(staterr, rxr->fmp, ptype);
4557
4558			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4559			    (staterr & E1000_RXD_STAT_VP) != 0) {
4560				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4561				rxr->fmp->m_flags |= M_VLANTAG;
4562			}
4563#if __FreeBSD_version >= 800000
4564			rxr->fmp->m_pkthdr.flowid = que->msix;
4565			rxr->fmp->m_flags |= M_FLOWID;
4566#endif
4567			sendmp = rxr->fmp;
4568			/* Make sure to set M_PKTHDR. */
4569			sendmp->m_flags |= M_PKTHDR;
4570			rxr->fmp = NULL;
4571			rxr->lmp = NULL;
4572		}
4573
4574next_desc:
4575		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4576		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4577
4578		/* Advance our pointers to the next descriptor. */
4579		if (++i == adapter->num_rx_desc)
4580			i = 0;
4581		/*
4582		** Send to the stack or LRO
4583		*/
4584		if (sendmp != NULL) {
4585			rxr->next_to_check = i;
4586			igb_rx_input(rxr, ifp, sendmp, ptype);
4587			i = rxr->next_to_check;
4588			rxdone++;
4589		}
4590
4591		/* Every 8 descriptors we go to refresh mbufs */
4592		if (processed == 8) {
4593                        igb_refresh_mbufs(rxr, i);
4594                        processed = 0;
4595		}
4596	}
4597
4598	/* Catch any remainders */
4599	if (igb_rx_unrefreshed(rxr))
4600		igb_refresh_mbufs(rxr, i);
4601
4602	rxr->next_to_check = i;
4603
4604	/*
4605	 * Flush any outstanding LRO work
4606	 */
4607	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4608		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4609		tcp_lro_flush(lro, queued);
4610	}
4611
4612	if (done != NULL)
4613		*done = rxdone;
4614
4615	IGB_RX_UNLOCK(rxr);
4616	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4617}
4618
4619/*********************************************************************
4620 *
4621 *  Verify that the hardware indicated that the checksum is valid.
4622 *  Inform the stack about the status of checksum so that stack
4623 *  doesn't spend time verifying the checksum.
4624 *
4625 *********************************************************************/
4626static void
4627igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4628{
4629	u16 status = (u16)staterr;
4630	u8  errors = (u8) (staterr >> 24);
4631	int sctp;
4632
4633	/* Ignore Checksum bit is set */
4634	if (status & E1000_RXD_STAT_IXSM) {
4635		mp->m_pkthdr.csum_flags = 0;
4636		return;
4637	}
4638
4639	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4640	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4641		sctp = 1;
4642	else
4643		sctp = 0;
4644	if (status & E1000_RXD_STAT_IPCS) {
4645		/* Did it pass? */
4646		if (!(errors & E1000_RXD_ERR_IPE)) {
4647			/* IP Checksum Good */
4648			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4649			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4650		} else
4651			mp->m_pkthdr.csum_flags = 0;
4652	}
4653
4654	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4655		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4656#if __FreeBSD_version >= 800000
4657		if (sctp) /* reassign */
4658			type = CSUM_SCTP_VALID;
4659#endif
4660		/* Did it pass? */
4661		if (!(errors & E1000_RXD_ERR_TCPE)) {
4662			mp->m_pkthdr.csum_flags |= type;
4663			if (sctp == 0)
4664				mp->m_pkthdr.csum_data = htons(0xffff);
4665		}
4666	}
4667	return;
4668}
4669
4670/*
4671 * This routine is run via an vlan
4672 * config EVENT
4673 */
4674static void
4675igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4676{
4677	struct adapter	*adapter = ifp->if_softc;
4678	u32		index, bit;
4679
4680	if (ifp->if_softc !=  arg)   /* Not our event */
4681		return;
4682
4683	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4684                return;
4685
4686	IGB_CORE_LOCK(adapter);
4687	index = (vtag >> 5) & 0x7F;
4688	bit = vtag & 0x1F;
4689	adapter->shadow_vfta[index] |= (1 << bit);
4690	++adapter->num_vlans;
4691	/* Change hw filter setting */
4692	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4693		igb_setup_vlan_hw_support(adapter);
4694	IGB_CORE_UNLOCK(adapter);
4695}
4696
4697/*
4698 * This routine is run via an vlan
4699 * unconfig EVENT
4700 */
4701static void
4702igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4703{
4704	struct adapter	*adapter = ifp->if_softc;
4705	u32		index, bit;
4706
4707	if (ifp->if_softc !=  arg)
4708		return;
4709
4710	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4711                return;
4712
4713	IGB_CORE_LOCK(adapter);
4714	index = (vtag >> 5) & 0x7F;
4715	bit = vtag & 0x1F;
4716	adapter->shadow_vfta[index] &= ~(1 << bit);
4717	--adapter->num_vlans;
4718	/* Change hw filter setting */
4719	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4720		igb_setup_vlan_hw_support(adapter);
4721	IGB_CORE_UNLOCK(adapter);
4722}
4723
4724static void
4725igb_setup_vlan_hw_support(struct adapter *adapter)
4726{
4727	struct e1000_hw *hw = &adapter->hw;
4728	struct ifnet	*ifp = adapter->ifp;
4729	u32             reg;
4730
4731	if (adapter->vf_ifp) {
4732		e1000_rlpml_set_vf(hw,
4733		    adapter->max_frame_size + VLAN_TAG_SIZE);
4734		return;
4735	}
4736
4737	reg = E1000_READ_REG(hw, E1000_CTRL);
4738	reg |= E1000_CTRL_VME;
4739	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4740
4741	/* Enable the Filter Table */
4742	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4743		reg = E1000_READ_REG(hw, E1000_RCTL);
4744		reg &= ~E1000_RCTL_CFIEN;
4745		reg |= E1000_RCTL_VFE;
4746		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4747	}
4748
4749	/* Update the frame size */
4750	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4751	    adapter->max_frame_size + VLAN_TAG_SIZE);
4752
4753	/* Don't bother with table if no vlans */
4754	if ((adapter->num_vlans == 0) ||
4755	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4756                return;
4757	/*
4758	** A soft reset zero's out the VFTA, so
4759	** we need to repopulate it now.
4760	*/
4761	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4762                if (adapter->shadow_vfta[i] != 0) {
4763			if (adapter->vf_ifp)
4764				e1000_vfta_set_vf(hw,
4765				    adapter->shadow_vfta[i], TRUE);
4766			else
4767				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4768                           	 i, adapter->shadow_vfta[i]);
4769		}
4770}
4771
4772static void
4773igb_enable_intr(struct adapter *adapter)
4774{
4775	/* With RSS set up what to auto clear */
4776	if (adapter->msix_mem) {
4777		u32 mask = (adapter->que_mask | adapter->link_mask);
4778		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4779		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4780		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4781		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4782		    E1000_IMS_LSC);
4783	} else {
4784		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4785		    IMS_ENABLE_MASK);
4786	}
4787	E1000_WRITE_FLUSH(&adapter->hw);
4788
4789	return;
4790}
4791
4792static void
4793igb_disable_intr(struct adapter *adapter)
4794{
4795	if (adapter->msix_mem) {
4796		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4797		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4798	}
4799	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4800	E1000_WRITE_FLUSH(&adapter->hw);
4801	return;
4802}
4803
4804/*
4805 * Bit of a misnomer, what this really means is
4806 * to enable OS management of the system... aka
4807 * to disable special hardware management features
4808 */
4809static void
4810igb_init_manageability(struct adapter *adapter)
4811{
4812	if (adapter->has_manage) {
4813		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4814		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4815
4816		/* disable hardware interception of ARP */
4817		manc &= ~(E1000_MANC_ARP_EN);
4818
4819                /* enable receiving management packets to the host */
4820		manc |= E1000_MANC_EN_MNG2HOST;
4821		manc2h |= 1 << 5;  /* Mng Port 623 */
4822		manc2h |= 1 << 6;  /* Mng Port 664 */
4823		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4824		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4825	}
4826}
4827
4828/*
4829 * Give control back to hardware management
4830 * controller if there is one.
4831 */
4832static void
4833igb_release_manageability(struct adapter *adapter)
4834{
4835	if (adapter->has_manage) {
4836		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4837
4838		/* re-enable hardware interception of ARP */
4839		manc |= E1000_MANC_ARP_EN;
4840		manc &= ~E1000_MANC_EN_MNG2HOST;
4841
4842		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4843	}
4844}
4845
4846/*
4847 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4848 * For ASF and Pass Through versions of f/w this means that
4849 * the driver is loaded.
4850 *
4851 */
4852static void
4853igb_get_hw_control(struct adapter *adapter)
4854{
4855	u32 ctrl_ext;
4856
4857	if (adapter->vf_ifp)
4858		return;
4859
4860	/* Let firmware know the driver has taken over */
4861	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4862	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4863	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4864}
4865
4866/*
4867 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4868 * For ASF and Pass Through versions of f/w this means that the
4869 * driver is no longer loaded.
4870 *
4871 */
4872static void
4873igb_release_hw_control(struct adapter *adapter)
4874{
4875	u32 ctrl_ext;
4876
4877	if (adapter->vf_ifp)
4878		return;
4879
4880	/* Let firmware taken over control of h/w */
4881	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4882	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4883	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4884}
4885
4886static int
4887igb_is_valid_ether_addr(uint8_t *addr)
4888{
4889	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4890
4891	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4892		return (FALSE);
4893	}
4894
4895	return (TRUE);
4896}
4897
4898
4899/*
4900 * Enable PCI Wake On Lan capability
4901 */
4902static void
4903igb_enable_wakeup(device_t dev)
4904{
4905	u16     cap, status;
4906	u8      id;
4907
4908	/* First find the capabilities pointer*/
4909	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4910	/* Read the PM Capabilities */
4911	id = pci_read_config(dev, cap, 1);
4912	if (id != PCIY_PMG)     /* Something wrong */
4913		return;
4914	/* OK, we have the power capabilities, so
4915	   now get the status register */
4916	cap += PCIR_POWER_STATUS;
4917	status = pci_read_config(dev, cap, 2);
4918	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4919	pci_write_config(dev, cap, status, 2);
4920	return;
4921}
4922
4923static void
4924igb_led_func(void *arg, int onoff)
4925{
4926	struct adapter	*adapter = arg;
4927
4928	IGB_CORE_LOCK(adapter);
4929	if (onoff) {
4930		e1000_setup_led(&adapter->hw);
4931		e1000_led_on(&adapter->hw);
4932	} else {
4933		e1000_led_off(&adapter->hw);
4934		e1000_cleanup_led(&adapter->hw);
4935	}
4936	IGB_CORE_UNLOCK(adapter);
4937}
4938
4939/**********************************************************************
4940 *
4941 *  Update the board statistics counters.
4942 *
4943 **********************************************************************/
4944static void
4945igb_update_stats_counters(struct adapter *adapter)
4946{
4947	struct ifnet		*ifp;
4948        struct e1000_hw		*hw = &adapter->hw;
4949	struct e1000_hw_stats	*stats;
4950
4951	/*
4952	** The virtual function adapter has only a
4953	** small controlled set of stats, do only
4954	** those and return.
4955	*/
4956	if (adapter->vf_ifp) {
4957		igb_update_vf_stats_counters(adapter);
4958		return;
4959	}
4960
4961	stats = (struct e1000_hw_stats	*)adapter->stats;
4962
4963	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4964	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4965		stats->symerrs +=
4966		    E1000_READ_REG(hw,E1000_SYMERRS);
4967		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4968	}
4969
4970	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4971	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4972	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4973	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4974
4975	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4976	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4977	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4978	stats->dc += E1000_READ_REG(hw, E1000_DC);
4979	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4980	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4981	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4982	/*
4983	** For watchdog management we need to know if we have been
4984	** paused during the last interval, so capture that here.
4985	*/
4986        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4987        stats->xoffrxc += adapter->pause_frames;
4988	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4989	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4990	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4991	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4992	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4993	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4994	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4995	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4996	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4997	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4998	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4999	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5000
5001	/* For the 64-bit byte counters the low dword must be read first. */
5002	/* Both registers clear on the read of the high dword */
5003
5004	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5005	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5006	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5007	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5008
5009	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5010	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5011	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5012	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5013	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5014
5015	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5016	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5017
5018	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5019	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5020	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5021	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5022	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5023	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5024	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5025	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5026	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5027	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5028
5029	/* Interrupt Counts */
5030
5031	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5032	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5033	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5034	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5035	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5036	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5037	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5038	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5039	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5040
5041	/* Host to Card Statistics */
5042
5043	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5044	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5045	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5046	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5047	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5048	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5049	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5050	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5051	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5052	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5053	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5054	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5055	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5056	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5057
5058	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5059	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5060	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5061	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5062	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5063	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5064
5065	ifp = adapter->ifp;
5066	ifp->if_collisions = stats->colc;
5067
5068	/* Rx Errors */
5069	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5070	    stats->crcerrs + stats->algnerrc +
5071	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5072
5073	/* Tx Errors */
5074	ifp->if_oerrors = stats->ecol +
5075	    stats->latecol + adapter->watchdog_events;
5076
5077	/* Driver specific counters */
5078	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5079	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5080	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5081	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5082	adapter->packet_buf_alloc_tx =
5083	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5084	adapter->packet_buf_alloc_rx =
5085	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5086}
5087
5088
5089/**********************************************************************
5090 *
5091 *  Initialize the VF board statistics counters.
5092 *
5093 **********************************************************************/
5094static void
5095igb_vf_init_stats(struct adapter *adapter)
5096{
5097        struct e1000_hw *hw = &adapter->hw;
5098	struct e1000_vf_stats	*stats;
5099
5100	stats = (struct e1000_vf_stats	*)adapter->stats;
5101	if (stats == NULL)
5102		return;
5103        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5104        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5105        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5106        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5107        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5108}
5109
5110/**********************************************************************
5111 *
5112 *  Update the VF board statistics counters.
5113 *
5114 **********************************************************************/
5115static void
5116igb_update_vf_stats_counters(struct adapter *adapter)
5117{
5118	struct e1000_hw *hw = &adapter->hw;
5119	struct e1000_vf_stats	*stats;
5120
5121	if (adapter->link_speed == 0)
5122		return;
5123
5124	stats = (struct e1000_vf_stats	*)adapter->stats;
5125
5126	UPDATE_VF_REG(E1000_VFGPRC,
5127	    stats->last_gprc, stats->gprc);
5128	UPDATE_VF_REG(E1000_VFGORC,
5129	    stats->last_gorc, stats->gorc);
5130	UPDATE_VF_REG(E1000_VFGPTC,
5131	    stats->last_gptc, stats->gptc);
5132	UPDATE_VF_REG(E1000_VFGOTC,
5133	    stats->last_gotc, stats->gotc);
5134	UPDATE_VF_REG(E1000_VFMPRC,
5135	    stats->last_mprc, stats->mprc);
5136}
5137
5138/* Export a single 32-bit register via a read-only sysctl. */
5139static int
5140igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5141{
5142	struct adapter *adapter;
5143	u_int val;
5144
5145	adapter = oidp->oid_arg1;
5146	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5147	return (sysctl_handle_int(oidp, &val, 0, req));
5148}
5149
5150/*
5151**  Tuneable interrupt rate handler
5152*/
5153static int
5154igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5155{
5156	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5157	int			error;
5158	u32			reg, usec, rate;
5159
5160	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5161	usec = ((reg & 0x7FFC) >> 2);
5162	if (usec > 0)
5163		rate = 1000000 / usec;
5164	else
5165		rate = 0;
5166	error = sysctl_handle_int(oidp, &rate, 0, req);
5167	if (error || !req->newptr)
5168		return error;
5169	return 0;
5170}
5171
5172/*
5173 * Add sysctl variables, one per statistic, to the system.
5174 */
5175static void
5176igb_add_hw_stats(struct adapter *adapter)
5177{
5178	device_t dev = adapter->dev;
5179
5180	struct tx_ring *txr = adapter->tx_rings;
5181	struct rx_ring *rxr = adapter->rx_rings;
5182
5183	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5184	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5185	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5186	struct e1000_hw_stats *stats = adapter->stats;
5187
5188	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5189	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5190
5191#define QUEUE_NAME_LEN 32
5192	char namebuf[QUEUE_NAME_LEN];
5193
5194	/* Driver Statistics */
5195	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5196			CTLFLAG_RD, &adapter->link_irq, 0,
5197			"Link MSIX IRQ Handled");
5198	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5199			CTLFLAG_RD, &adapter->dropped_pkts,
5200			"Driver dropped packets");
5201	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5202			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5203			"Driver tx dma failure in xmit");
5204	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5205			CTLFLAG_RD, &adapter->rx_overruns,
5206			"RX overruns");
5207	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5208			CTLFLAG_RD, &adapter->watchdog_events,
5209			"Watchdog timeouts");
5210
5211	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5212			CTLFLAG_RD, &adapter->device_control,
5213			"Device Control Register");
5214	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5215			CTLFLAG_RD, &adapter->rx_control,
5216			"Receiver Control Register");
5217	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5218			CTLFLAG_RD, &adapter->int_mask,
5219			"Interrupt Mask");
5220	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5221			CTLFLAG_RD, &adapter->eint_mask,
5222			"Extended Interrupt Mask");
5223	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5224			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5225			"Transmit Buffer Packet Allocation");
5226	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5227			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5228			"Receive Buffer Packet Allocation");
5229	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5230			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5231			"Flow Control High Watermark");
5232	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5233			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5234			"Flow Control Low Watermark");
5235
5236	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5237		struct lro_ctrl *lro = &rxr->lro;
5238
5239		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5240		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5241					    CTLFLAG_RD, NULL, "Queue Name");
5242		queue_list = SYSCTL_CHILDREN(queue_node);
5243
5244		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5245				CTLFLAG_RD, &adapter->queues[i],
5246				sizeof(&adapter->queues[i]),
5247				igb_sysctl_interrupt_rate_handler,
5248				"IU", "Interrupt Rate");
5249
5250		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5251				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5252				igb_sysctl_reg_handler, "IU",
5253 				"Transmit Descriptor Head");
5254		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5255				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5256				igb_sysctl_reg_handler, "IU",
5257 				"Transmit Descriptor Tail");
5258		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5259				CTLFLAG_RD, &txr->no_desc_avail,
5260				"Queue No Descriptor Available");
5261		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5262				CTLFLAG_RD, &txr->tx_packets,
5263				"Queue Packets Transmitted");
5264
5265		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5266				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5267				igb_sysctl_reg_handler, "IU",
5268				"Receive Descriptor Head");
5269		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5270				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5271				igb_sysctl_reg_handler, "IU",
5272				"Receive Descriptor Tail");
5273		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5274				CTLFLAG_RD, &rxr->rx_packets,
5275				"Queue Packets Received");
5276		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5277				CTLFLAG_RD, &rxr->rx_bytes,
5278				"Queue Bytes Received");
5279		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5280				CTLFLAG_RD, &lro->lro_queued, 0,
5281				"LRO Queued");
5282		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5283				CTLFLAG_RD, &lro->lro_flushed, 0,
5284				"LRO Flushed");
5285	}
5286
5287	/* MAC stats get their own sub node */
5288
5289	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5290				    CTLFLAG_RD, NULL, "MAC Statistics");
5291	stat_list = SYSCTL_CHILDREN(stat_node);
5292
5293	/*
5294	** VF adapter has a very limited set of stats
5295	** since its not managing the metal, so to speak.
5296	*/
5297	if (adapter->vf_ifp) {
5298	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5299			CTLFLAG_RD, &stats->gprc,
5300			"Good Packets Received");
5301	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5302			CTLFLAG_RD, &stats->gptc,
5303			"Good Packets Transmitted");
5304 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5305 			CTLFLAG_RD, &stats->gorc,
5306 			"Good Octets Received");
5307 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5308 			CTLFLAG_RD, &stats->gotc,
5309 			"Good Octets Transmitted");
5310	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5311			CTLFLAG_RD, &stats->mprc,
5312			"Multicast Packets Received");
5313		return;
5314	}
5315
5316	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5317			CTLFLAG_RD, &stats->ecol,
5318			"Excessive collisions");
5319	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5320			CTLFLAG_RD, &stats->scc,
5321			"Single collisions");
5322	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5323			CTLFLAG_RD, &stats->mcc,
5324			"Multiple collisions");
5325	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5326			CTLFLAG_RD, &stats->latecol,
5327			"Late collisions");
5328	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5329			CTLFLAG_RD, &stats->colc,
5330			"Collision Count");
5331	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5332			CTLFLAG_RD, &stats->symerrs,
5333			"Symbol Errors");
5334	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5335			CTLFLAG_RD, &stats->sec,
5336			"Sequence Errors");
5337	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5338			CTLFLAG_RD, &stats->dc,
5339			"Defer Count");
5340	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5341			CTLFLAG_RD, &stats->mpc,
5342			"Missed Packets");
5343	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5344			CTLFLAG_RD, &stats->rnbc,
5345			"Receive No Buffers");
5346	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5347			CTLFLAG_RD, &stats->ruc,
5348			"Receive Undersize");
5349	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5350			CTLFLAG_RD, &stats->rfc,
5351			"Fragmented Packets Received ");
5352	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5353			CTLFLAG_RD, &stats->roc,
5354			"Oversized Packets Received");
5355	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5356			CTLFLAG_RD, &stats->rjc,
5357			"Recevied Jabber");
5358	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5359			CTLFLAG_RD, &stats->rxerrc,
5360			"Receive Errors");
5361	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5362			CTLFLAG_RD, &stats->crcerrs,
5363			"CRC errors");
5364	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5365			CTLFLAG_RD, &stats->algnerrc,
5366			"Alignment Errors");
5367	/* On 82575 these are collision counts */
5368	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5369			CTLFLAG_RD, &stats->cexterr,
5370			"Collision/Carrier extension errors");
5371	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5372			CTLFLAG_RD, &stats->xonrxc,
5373			"XON Received");
5374	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5375			CTLFLAG_RD, &stats->xontxc,
5376			"XON Transmitted");
5377	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5378			CTLFLAG_RD, &stats->xoffrxc,
5379			"XOFF Received");
5380	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5381			CTLFLAG_RD, &stats->xofftxc,
5382			"XOFF Transmitted");
5383	/* Packet Reception Stats */
5384	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5385			CTLFLAG_RD, &stats->tpr,
5386			"Total Packets Received ");
5387	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5388			CTLFLAG_RD, &stats->gprc,
5389			"Good Packets Received");
5390	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5391			CTLFLAG_RD, &stats->bprc,
5392			"Broadcast Packets Received");
5393	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5394			CTLFLAG_RD, &stats->mprc,
5395			"Multicast Packets Received");
5396	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5397			CTLFLAG_RD, &stats->prc64,
5398			"64 byte frames received ");
5399	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5400			CTLFLAG_RD, &stats->prc127,
5401			"65-127 byte frames received");
5402	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5403			CTLFLAG_RD, &stats->prc255,
5404			"128-255 byte frames received");
5405	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5406			CTLFLAG_RD, &stats->prc511,
5407			"256-511 byte frames received");
5408	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5409			CTLFLAG_RD, &stats->prc1023,
5410			"512-1023 byte frames received");
5411	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5412			CTLFLAG_RD, &stats->prc1522,
5413			"1023-1522 byte frames received");
5414 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5415 			CTLFLAG_RD, &stats->gorc,
5416 			"Good Octets Received");
5417
5418	/* Packet Transmission Stats */
5419 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5420 			CTLFLAG_RD, &stats->gotc,
5421 			"Good Octets Transmitted");
5422	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5423			CTLFLAG_RD, &stats->tpt,
5424			"Total Packets Transmitted");
5425	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5426			CTLFLAG_RD, &stats->gptc,
5427			"Good Packets Transmitted");
5428	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5429			CTLFLAG_RD, &stats->bptc,
5430			"Broadcast Packets Transmitted");
5431	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5432			CTLFLAG_RD, &stats->mptc,
5433			"Multicast Packets Transmitted");
5434	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5435			CTLFLAG_RD, &stats->ptc64,
5436			"64 byte frames transmitted ");
5437	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5438			CTLFLAG_RD, &stats->ptc127,
5439			"65-127 byte frames transmitted");
5440	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5441			CTLFLAG_RD, &stats->ptc255,
5442			"128-255 byte frames transmitted");
5443	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5444			CTLFLAG_RD, &stats->ptc511,
5445			"256-511 byte frames transmitted");
5446	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5447			CTLFLAG_RD, &stats->ptc1023,
5448			"512-1023 byte frames transmitted");
5449	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5450			CTLFLAG_RD, &stats->ptc1522,
5451			"1024-1522 byte frames transmitted");
5452	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5453			CTLFLAG_RD, &stats->tsctc,
5454			"TSO Contexts Transmitted");
5455	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5456			CTLFLAG_RD, &stats->tsctfc,
5457			"TSO Contexts Failed");
5458
5459
5460	/* Interrupt Stats */
5461
5462	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5463				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5464	int_list = SYSCTL_CHILDREN(int_node);
5465
5466	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5467			CTLFLAG_RD, &stats->iac,
5468			"Interrupt Assertion Count");
5469
5470	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5471			CTLFLAG_RD, &stats->icrxptc,
5472			"Interrupt Cause Rx Pkt Timer Expire Count");
5473
5474	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5475			CTLFLAG_RD, &stats->icrxatc,
5476			"Interrupt Cause Rx Abs Timer Expire Count");
5477
5478	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5479			CTLFLAG_RD, &stats->ictxptc,
5480			"Interrupt Cause Tx Pkt Timer Expire Count");
5481
5482	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5483			CTLFLAG_RD, &stats->ictxatc,
5484			"Interrupt Cause Tx Abs Timer Expire Count");
5485
5486	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5487			CTLFLAG_RD, &stats->ictxqec,
5488			"Interrupt Cause Tx Queue Empty Count");
5489
5490	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5491			CTLFLAG_RD, &stats->ictxqmtc,
5492			"Interrupt Cause Tx Queue Min Thresh Count");
5493
5494	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5495			CTLFLAG_RD, &stats->icrxdmtc,
5496			"Interrupt Cause Rx Desc Min Thresh Count");
5497
5498	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5499			CTLFLAG_RD, &stats->icrxoc,
5500			"Interrupt Cause Receiver Overrun Count");
5501
5502	/* Host to Card Stats */
5503
5504	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5505				    CTLFLAG_RD, NULL,
5506				    "Host to Card Statistics");
5507
5508	host_list = SYSCTL_CHILDREN(host_node);
5509
5510	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5511			CTLFLAG_RD, &stats->cbtmpc,
5512			"Circuit Breaker Tx Packet Count");
5513
5514	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5515			CTLFLAG_RD, &stats->htdpmc,
5516			"Host Transmit Discarded Packets");
5517
5518	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5519			CTLFLAG_RD, &stats->rpthc,
5520			"Rx Packets To Host");
5521
5522	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5523			CTLFLAG_RD, &stats->cbrmpc,
5524			"Circuit Breaker Rx Packet Count");
5525
5526	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5527			CTLFLAG_RD, &stats->cbrdpc,
5528			"Circuit Breaker Rx Dropped Count");
5529
5530	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5531			CTLFLAG_RD, &stats->hgptc,
5532			"Host Good Packets Tx Count");
5533
5534	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5535			CTLFLAG_RD, &stats->htcbdpc,
5536			"Host Tx Circuit Breaker Dropped Count");
5537
5538	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5539			CTLFLAG_RD, &stats->hgorc,
5540			"Host Good Octets Received Count");
5541
5542	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5543			CTLFLAG_RD, &stats->hgotc,
5544			"Host Good Octets Transmit Count");
5545
5546	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5547			CTLFLAG_RD, &stats->lenerrs,
5548			"Length Errors");
5549
5550	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5551			CTLFLAG_RD, &stats->scvpc,
5552			"SerDes/SGMII Code Violation Pkt Count");
5553
5554	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5555			CTLFLAG_RD, &stats->hrmpc,
5556			"Header Redirection Missed Packet Count");
5557}
5558
5559
5560/**********************************************************************
5561 *
5562 *  This routine provides a way to dump out the adapter eeprom,
5563 *  often a useful debug/service tool. This only dumps the first
5564 *  32 words, stuff that matters is in that extent.
5565 *
5566 **********************************************************************/
5567static int
5568igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5569{
5570	struct adapter *adapter;
5571	int error;
5572	int result;
5573
5574	result = -1;
5575	error = sysctl_handle_int(oidp, &result, 0, req);
5576
5577	if (error || !req->newptr)
5578		return (error);
5579
5580	/*
5581	 * This value will cause a hex dump of the
5582	 * first 32 16-bit words of the EEPROM to
5583	 * the screen.
5584	 */
5585	if (result == 1) {
5586		adapter = (struct adapter *)arg1;
5587		igb_print_nvm_info(adapter);
5588        }
5589
5590	return (error);
5591}
5592
5593static void
5594igb_print_nvm_info(struct adapter *adapter)
5595{
5596	u16	eeprom_data;
5597	int	i, j, row = 0;
5598
5599	/* Its a bit crude, but it gets the job done */
5600	printf("\nInterface EEPROM Dump:\n");
5601	printf("Offset\n0x0000  ");
5602	for (i = 0, j = 0; i < 32; i++, j++) {
5603		if (j == 8) { /* Make the offset block */
5604			j = 0; ++row;
5605			printf("\n0x00%x0  ",row);
5606		}
5607		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5608		printf("%04x ", eeprom_data);
5609	}
5610	printf("\n");
5611}
5612
5613static void
5614igb_set_sysctl_value(struct adapter *adapter, const char *name,
5615	const char *description, int *limit, int value)
5616{
5617	*limit = value;
5618	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5619	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5620	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5621}
5622
5623/*
5624** Set flow control using sysctl:
5625** Flow control values:
5626** 	0 - off
5627**	1 - rx pause
5628**	2 - tx pause
5629**	3 - full
5630*/
5631static int
5632igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5633{
5634	int error;
5635	struct adapter *adapter = (struct adapter *) arg1;
5636
5637	error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5638
5639	if ((error) || (req->newptr == NULL))
5640		return (error);
5641
5642	switch (adapter->fc) {
5643		case e1000_fc_rx_pause:
5644		case e1000_fc_tx_pause:
5645		case e1000_fc_full:
5646			adapter->hw.fc.requested_mode = adapter->fc;
5647			break;
5648		case e1000_fc_none:
5649		default:
5650			adapter->hw.fc.requested_mode = e1000_fc_none;
5651	}
5652
5653	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5654	e1000_force_mac_fc(&adapter->hw);
5655	return (error);
5656}
5657
5658/*
5659** Manage DMA Coalesce:
5660** Control values:
5661** 	0/1 - off/on
5662**	Legal timer values are:
5663**	250,500,1000-10000 in thousands
5664*/
5665static int
5666igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5667{
5668	struct adapter *adapter = (struct adapter *) arg1;
5669	int		error;
5670
5671	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5672
5673	if ((error) || (req->newptr == NULL))
5674		return (error);
5675
5676	switch (adapter->dmac) {
5677		case 0:
5678			/*Disabling */
5679			break;
5680		case 1: /* Just enable and use default */
5681			adapter->dmac = 1000;
5682			break;
5683		case 250:
5684		case 500:
5685		case 1000:
5686		case 2000:
5687		case 3000:
5688		case 4000:
5689		case 5000:
5690		case 6000:
5691		case 7000:
5692		case 8000:
5693		case 9000:
5694		case 10000:
5695			/* Legal values - allow */
5696			break;
5697		default:
5698			/* Do nothing, illegal value */
5699			adapter->dmac = 0;
5700			return (error);
5701	}
5702	/* Reinit the interface */
5703	igb_init(adapter);
5704	return (error);
5705}
5706