if_igb.c revision 235210
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 235210 2012-05-10 00:00:28Z sbruno $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.1";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174#if __FreeBSD_version >= 800000
175static int	igb_mq_start(struct ifnet *, struct mbuf *);
176static int	igb_mq_start_locked(struct ifnet *,
177		    struct tx_ring *, struct mbuf *);
178static void	igb_qflush(struct ifnet *);
179static void	igb_deferred_mq_start(void *, int);
180#else
181static void	igb_start(struct ifnet *);
182static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
183#endif
184static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
185static void	igb_init(void *);
186static void	igb_init_locked(struct adapter *);
187static void	igb_stop(void *);
188static void	igb_media_status(struct ifnet *, struct ifmediareq *);
189static int	igb_media_change(struct ifnet *);
190static void	igb_identify_hardware(struct adapter *);
191static int	igb_allocate_pci_resources(struct adapter *);
192static int	igb_allocate_msix(struct adapter *);
193static int	igb_allocate_legacy(struct adapter *);
194static int	igb_setup_msix(struct adapter *);
195static void	igb_free_pci_resources(struct adapter *);
196static void	igb_local_timer(void *);
197static void	igb_reset(struct adapter *);
198static int	igb_setup_interface(device_t, struct adapter *);
199static int	igb_allocate_queues(struct adapter *);
200static void	igb_configure_queues(struct adapter *);
201
202static int	igb_allocate_transmit_buffers(struct tx_ring *);
203static void	igb_setup_transmit_structures(struct adapter *);
204static void	igb_setup_transmit_ring(struct tx_ring *);
205static void	igb_initialize_transmit_units(struct adapter *);
206static void	igb_free_transmit_structures(struct adapter *);
207static void	igb_free_transmit_buffers(struct tx_ring *);
208
209static int	igb_allocate_receive_buffers(struct rx_ring *);
210static int	igb_setup_receive_structures(struct adapter *);
211static int	igb_setup_receive_ring(struct rx_ring *);
212static void	igb_initialize_receive_units(struct adapter *);
213static void	igb_free_receive_structures(struct adapter *);
214static void	igb_free_receive_buffers(struct rx_ring *);
215static void	igb_free_receive_ring(struct rx_ring *);
216
217static void	igb_enable_intr(struct adapter *);
218static void	igb_disable_intr(struct adapter *);
219static void	igb_update_stats_counters(struct adapter *);
220static bool	igb_txeof(struct tx_ring *);
221
222static __inline	void igb_rx_discard(struct rx_ring *, int);
223static __inline void igb_rx_input(struct rx_ring *,
224		    struct ifnet *, struct mbuf *, u32);
225
226static bool	igb_rxeof(struct igb_queue *, int, int *);
227static void	igb_rx_checksum(u32, struct mbuf *, u32);
228static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
229static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
230		    struct ip *, struct tcphdr *);
231static void	igb_set_promisc(struct adapter *);
232static void	igb_disable_promisc(struct adapter *);
233static void	igb_set_multi(struct adapter *);
234static void	igb_update_link_status(struct adapter *);
235static void	igb_refresh_mbufs(struct rx_ring *, int);
236
237static void	igb_register_vlan(void *, struct ifnet *, u16);
238static void	igb_unregister_vlan(void *, struct ifnet *, u16);
239static void	igb_setup_vlan_hw_support(struct adapter *);
240
241static int	igb_xmit(struct tx_ring *, struct mbuf **);
242static int	igb_dma_malloc(struct adapter *, bus_size_t,
243		    struct igb_dma_alloc *, int);
244static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
245static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
246static void	igb_print_nvm_info(struct adapter *);
247static int 	igb_is_valid_ether_addr(u8 *);
248static void     igb_add_hw_stats(struct adapter *);
249
250static void	igb_vf_init_stats(struct adapter *);
251static void	igb_update_vf_stats_counters(struct adapter *);
252
253/* Management and WOL Support */
254static void	igb_init_manageability(struct adapter *);
255static void	igb_release_manageability(struct adapter *);
256static void     igb_get_hw_control(struct adapter *);
257static void     igb_release_hw_control(struct adapter *);
258static void     igb_enable_wakeup(device_t);
259static void     igb_led_func(void *, int);
260
261static int	igb_irq_fast(void *);
262static void	igb_msix_que(void *);
263static void	igb_msix_link(void *);
264static void	igb_handle_que(void *context, int pending);
265static void	igb_handle_link(void *context, int pending);
266static void	igb_handle_link_locked(struct adapter *);
267
268static void	igb_set_sysctl_value(struct adapter *, const char *,
269		    const char *, int *, int);
270static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
271static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
272
273#ifdef DEVICE_POLLING
274static poll_handler_t igb_poll;
275#endif /* POLLING */
276
277/*********************************************************************
278 *  FreeBSD Device Interface Entry Points
279 *********************************************************************/
280
281static device_method_t igb_methods[] = {
282	/* Device interface */
283	DEVMETHOD(device_probe, igb_probe),
284	DEVMETHOD(device_attach, igb_attach),
285	DEVMETHOD(device_detach, igb_detach),
286	DEVMETHOD(device_shutdown, igb_shutdown),
287	DEVMETHOD(device_suspend, igb_suspend),
288	DEVMETHOD(device_resume, igb_resume),
289	{0, 0}
290};
291
292static driver_t igb_driver = {
293	"igb", igb_methods, sizeof(struct adapter),
294};
295
296static devclass_t igb_devclass;
297DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
298MODULE_DEPEND(igb, pci, 1, 1, 1);
299MODULE_DEPEND(igb, ether, 1, 1, 1);
300
301/*********************************************************************
302 *  Tunable default values.
303 *********************************************************************/
304
305static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
306
307/* Descriptor defaults */
308static int igb_rxd = IGB_DEFAULT_RXD;
309static int igb_txd = IGB_DEFAULT_TXD;
310TUNABLE_INT("hw.igb.rxd", &igb_rxd);
311TUNABLE_INT("hw.igb.txd", &igb_txd);
312SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
313    "Number of receive descriptors per queue");
314SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
315    "Number of transmit descriptors per queue");
316
317/*
318** AIM: Adaptive Interrupt Moderation
319** which means that the interrupt rate
320** is varied over time based on the
321** traffic for that interrupt vector
322*/
323static int igb_enable_aim = TRUE;
324TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
325SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
326    "Enable adaptive interrupt moderation");
327
328/*
329 * MSIX should be the default for best performance,
330 * but this allows it to be forced off for testing.
331 */
332static int igb_enable_msix = 1;
333TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
335    "Enable MSI-X interrupts");
336
337/*
338** Tuneable Interrupt rate
339*/
340static int igb_max_interrupt_rate = 8000;
341TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
342SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
343    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
344
345/*
346** Header split causes the packet header to
347** be dma'd to a seperate mbuf from the payload.
348** this can have memory alignment benefits. But
349** another plus is that small packets often fit
350** into the header and thus use no cluster. Its
351** a very workload dependent type feature.
352*/
353static int igb_header_split = FALSE;
354TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
355SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
356    "Enable receive mbuf header split");
357
358/*
359** This will autoconfigure based on
360** the number of CPUs if left at 0.
361*/
362static int igb_num_queues = 0;
363TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
364SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
365    "Number of queues to configure, 0 indicates autoconfigure");
366
367/*
368** Global variable to store last used CPU when binding queues
369** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
370** queue is bound to a cpu.
371*/
372static int igb_last_bind_cpu = -1;
373
374/* How many packets rxeof tries to clean at a time */
375static int igb_rx_process_limit = 100;
376TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
377SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
378    &igb_rx_process_limit, 0,
379    "Maximum number of received packets to process at a time, -1 means unlimited");
380
381#ifdef DEV_NETMAP	/* see ixgbe.c for details */
382#include <dev/netmap/if_igb_netmap.h>
383#endif /* DEV_NETMAP */
384/*********************************************************************
385 *  Device identification routine
386 *
387 *  igb_probe determines if the driver should be loaded on
388 *  adapter based on PCI vendor/device id of the adapter.
389 *
390 *  return BUS_PROBE_DEFAULT on success, positive on failure
391 *********************************************************************/
392
393static int
394igb_probe(device_t dev)
395{
396	char		adapter_name[60];
397	uint16_t	pci_vendor_id = 0;
398	uint16_t	pci_device_id = 0;
399	uint16_t	pci_subvendor_id = 0;
400	uint16_t	pci_subdevice_id = 0;
401	igb_vendor_info_t *ent;
402
403	INIT_DEBUGOUT("igb_probe: begin");
404
405	pci_vendor_id = pci_get_vendor(dev);
406	if (pci_vendor_id != IGB_VENDOR_ID)
407		return (ENXIO);
408
409	pci_device_id = pci_get_device(dev);
410	pci_subvendor_id = pci_get_subvendor(dev);
411	pci_subdevice_id = pci_get_subdevice(dev);
412
413	ent = igb_vendor_info_array;
414	while (ent->vendor_id != 0) {
415		if ((pci_vendor_id == ent->vendor_id) &&
416		    (pci_device_id == ent->device_id) &&
417
418		    ((pci_subvendor_id == ent->subvendor_id) ||
419		    (ent->subvendor_id == PCI_ANY_ID)) &&
420
421		    ((pci_subdevice_id == ent->subdevice_id) ||
422		    (ent->subdevice_id == PCI_ANY_ID))) {
423			sprintf(adapter_name, "%s %s",
424				igb_strings[ent->index],
425				igb_driver_version);
426			device_set_desc_copy(dev, adapter_name);
427			return (BUS_PROBE_DEFAULT);
428		}
429		ent++;
430	}
431
432	return (ENXIO);
433}
434
435/*********************************************************************
436 *  Device initialization routine
437 *
438 *  The attach entry point is called when the driver is being loaded.
439 *  This routine identifies the type of hardware, allocates all resources
440 *  and initializes the hardware.
441 *
442 *  return 0 on success, positive on failure
443 *********************************************************************/
444
445static int
446igb_attach(device_t dev)
447{
448	struct adapter	*adapter;
449	int		error = 0;
450	u16		eeprom_data;
451
452	INIT_DEBUGOUT("igb_attach: begin");
453
454	if (resource_disabled("igb", device_get_unit(dev))) {
455		device_printf(dev, "Disabled by device hint\n");
456		return (ENXIO);
457	}
458
459	adapter = device_get_softc(dev);
460	adapter->dev = adapter->osdep.dev = dev;
461	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
462
463	/* SYSCTL stuff */
464	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
465	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
466	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
467	    igb_sysctl_nvm_info, "I", "NVM Information");
468
469	igb_set_sysctl_value(adapter, "enable_aim",
470	    "Interrupt Moderation", &adapter->enable_aim,
471	    igb_enable_aim);
472
473	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
476	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
477
478	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
479
480	/* Determine hardware and mac info */
481	igb_identify_hardware(adapter);
482
483	/* Setup PCI resources */
484	if (igb_allocate_pci_resources(adapter)) {
485		device_printf(dev, "Allocation of PCI resources failed\n");
486		error = ENXIO;
487		goto err_pci;
488	}
489
490	/* Do Shared Code initialization */
491	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
492		device_printf(dev, "Setup of Shared code failed\n");
493		error = ENXIO;
494		goto err_pci;
495	}
496
497	e1000_get_bus_info(&adapter->hw);
498
499	/* Sysctl for limiting the amount of work done in the taskqueue */
500	igb_set_sysctl_value(adapter, "rx_processing_limit",
501	    "max number of rx packets to process",
502	    &adapter->rx_process_limit, igb_rx_process_limit);
503
504	/*
505	 * Validate number of transmit and receive descriptors. It
506	 * must not exceed hardware maximum, and must be multiple
507	 * of E1000_DBA_ALIGN.
508	 */
509	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
510	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
511		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
512		    IGB_DEFAULT_TXD, igb_txd);
513		adapter->num_tx_desc = IGB_DEFAULT_TXD;
514	} else
515		adapter->num_tx_desc = igb_txd;
516	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
517	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
518		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
519		    IGB_DEFAULT_RXD, igb_rxd);
520		adapter->num_rx_desc = IGB_DEFAULT_RXD;
521	} else
522		adapter->num_rx_desc = igb_rxd;
523
524	adapter->hw.mac.autoneg = DO_AUTO_NEG;
525	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
526	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
527
528	/* Copper options */
529	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
530		adapter->hw.phy.mdix = AUTO_ALL_MODES;
531		adapter->hw.phy.disable_polarity_correction = FALSE;
532		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
533	}
534
535	/*
536	 * Set the frame limits assuming
537	 * standard ethernet sized frames.
538	 */
539	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
540	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
541
542	/*
543	** Allocate and Setup Queues
544	*/
545	if (igb_allocate_queues(adapter)) {
546		error = ENOMEM;
547		goto err_pci;
548	}
549
550	/* Allocate the appropriate stats memory */
551	if (adapter->vf_ifp) {
552		adapter->stats =
553		    (struct e1000_vf_stats *)malloc(sizeof \
554		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
555		igb_vf_init_stats(adapter);
556	} else
557		adapter->stats =
558		    (struct e1000_hw_stats *)malloc(sizeof \
559		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
560	if (adapter->stats == NULL) {
561		device_printf(dev, "Can not allocate stats memory\n");
562		error = ENOMEM;
563		goto err_late;
564	}
565
566	/* Allocate multicast array memory. */
567	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
568	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
569	if (adapter->mta == NULL) {
570		device_printf(dev, "Can not allocate multicast setup array\n");
571		error = ENOMEM;
572		goto err_late;
573	}
574
575	/* Some adapter-specific advanced features */
576	if (adapter->hw.mac.type >= e1000_i350) {
577		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
578		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
579		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
580		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
581		igb_set_sysctl_value(adapter, "eee_disabled",
582		    "enable Energy Efficient Ethernet",
583		    &adapter->hw.dev_spec._82575.eee_disable,
584		    TRUE);
585		e1000_set_eee_i350(&adapter->hw);
586	}
587
588	/*
589	** Start from a known state, this is
590	** important in reading the nvm and
591	** mac from that.
592	*/
593	e1000_reset_hw(&adapter->hw);
594
595	/* Make sure we have a good EEPROM before we read from it */
596	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
597		/*
598		** Some PCI-E parts fail the first check due to
599		** the link being in sleep state, call it again,
600		** if it fails a second time its a real issue.
601		*/
602		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
603			device_printf(dev,
604			    "The EEPROM Checksum Is Not Valid\n");
605			error = EIO;
606			goto err_late;
607		}
608	}
609
610	/*
611	** Copy the permanent MAC address out of the EEPROM
612	*/
613	if (e1000_read_mac_addr(&adapter->hw) < 0) {
614		device_printf(dev, "EEPROM read error while reading MAC"
615		    " address\n");
616		error = EIO;
617		goto err_late;
618	}
619	/* Check its sanity */
620	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
621		device_printf(dev, "Invalid MAC address\n");
622		error = EIO;
623		goto err_late;
624	}
625
626	/* Setup OS specific network interface */
627	if (igb_setup_interface(dev, adapter) != 0)
628		goto err_late;
629
630	/* Now get a good starting state */
631	igb_reset(adapter);
632
633	/* Initialize statistics */
634	igb_update_stats_counters(adapter);
635
636	adapter->hw.mac.get_link_status = 1;
637	igb_update_link_status(adapter);
638
639	/* Indicate SOL/IDER usage */
640	if (e1000_check_reset_block(&adapter->hw))
641		device_printf(dev,
642		    "PHY reset is blocked due to SOL/IDER session.\n");
643
644	/* Determine if we have to control management hardware */
645	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
646
647	/*
648	 * Setup Wake-on-Lan
649	 */
650	/* APME bit in EEPROM is mapped to WUC.APME */
651	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
652	if (eeprom_data)
653		adapter->wol = E1000_WUFC_MAG;
654
655	/* Register for VLAN events */
656	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
657	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
658	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
659	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
660
661	igb_add_hw_stats(adapter);
662
663	/* Tell the stack that the interface is not active */
664	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
665	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
666
667	adapter->led_dev = led_create(igb_led_func, adapter,
668	    device_get_nameunit(dev));
669
670	/*
671	** Configure Interrupts
672	*/
673	if ((adapter->msix > 1) && (igb_enable_msix))
674		error = igb_allocate_msix(adapter);
675	else /* MSI or Legacy */
676		error = igb_allocate_legacy(adapter);
677	if (error)
678		goto err_late;
679
680#ifdef DEV_NETMAP
681	igb_netmap_attach(adapter);
682#endif /* DEV_NETMAP */
683	INIT_DEBUGOUT("igb_attach: end");
684
685	return (0);
686
687err_late:
688	igb_detach(dev);
689	igb_free_transmit_structures(adapter);
690	igb_free_receive_structures(adapter);
691	igb_release_hw_control(adapter);
692err_pci:
693	igb_free_pci_resources(adapter);
694	if (adapter->ifp != NULL)
695		if_free(adapter->ifp);
696	free(adapter->mta, M_DEVBUF);
697	IGB_CORE_LOCK_DESTROY(adapter);
698
699	return (error);
700}
701
702/*********************************************************************
703 *  Device removal routine
704 *
705 *  The detach entry point is called when the driver is being removed.
706 *  This routine stops the adapter and deallocates all the resources
707 *  that were allocated for driver operation.
708 *
709 *  return 0 on success, positive on failure
710 *********************************************************************/
711
712static int
713igb_detach(device_t dev)
714{
715	struct adapter	*adapter = device_get_softc(dev);
716	struct ifnet	*ifp = adapter->ifp;
717
718	INIT_DEBUGOUT("igb_detach: begin");
719
720	/* Make sure VLANS are not using driver */
721	if (adapter->ifp->if_vlantrunk != NULL) {
722		device_printf(dev,"Vlan in use, detach first\n");
723		return (EBUSY);
724	}
725
726	ether_ifdetach(adapter->ifp);
727
728	if (adapter->led_dev != NULL)
729		led_destroy(adapter->led_dev);
730
731#ifdef DEVICE_POLLING
732	if (ifp->if_capenable & IFCAP_POLLING)
733		ether_poll_deregister(ifp);
734#endif
735
736	IGB_CORE_LOCK(adapter);
737	adapter->in_detach = 1;
738	igb_stop(adapter);
739	IGB_CORE_UNLOCK(adapter);
740
741	e1000_phy_hw_reset(&adapter->hw);
742
743	/* Give control back to firmware */
744	igb_release_manageability(adapter);
745	igb_release_hw_control(adapter);
746
747	if (adapter->wol) {
748		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
749		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
750		igb_enable_wakeup(dev);
751	}
752
753	/* Unregister VLAN events */
754	if (adapter->vlan_attach != NULL)
755		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
756	if (adapter->vlan_detach != NULL)
757		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
758
759	callout_drain(&adapter->timer);
760
761#ifdef DEV_NETMAP
762	netmap_detach(adapter->ifp);
763#endif /* DEV_NETMAP */
764	igb_free_pci_resources(adapter);
765	bus_generic_detach(dev);
766	if_free(ifp);
767
768	igb_free_transmit_structures(adapter);
769	igb_free_receive_structures(adapter);
770	if (adapter->mta != NULL)
771		free(adapter->mta, M_DEVBUF);
772
773	IGB_CORE_LOCK_DESTROY(adapter);
774
775	return (0);
776}
777
778/*********************************************************************
779 *
780 *  Shutdown entry point
781 *
782 **********************************************************************/
783
784static int
785igb_shutdown(device_t dev)
786{
787	return igb_suspend(dev);
788}
789
790/*
791 * Suspend/resume device methods.
792 */
793static int
794igb_suspend(device_t dev)
795{
796	struct adapter *adapter = device_get_softc(dev);
797
798	IGB_CORE_LOCK(adapter);
799
800	igb_stop(adapter);
801
802        igb_release_manageability(adapter);
803	igb_release_hw_control(adapter);
804
805        if (adapter->wol) {
806                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
807                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
808                igb_enable_wakeup(dev);
809        }
810
811	IGB_CORE_UNLOCK(adapter);
812
813	return bus_generic_suspend(dev);
814}
815
816static int
817igb_resume(device_t dev)
818{
819	struct adapter *adapter = device_get_softc(dev);
820	struct tx_ring	*txr = adapter->tx_rings;
821	struct ifnet *ifp = adapter->ifp;
822
823	IGB_CORE_LOCK(adapter);
824	igb_init_locked(adapter);
825	igb_init_manageability(adapter);
826
827	if ((ifp->if_flags & IFF_UP) &&
828	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
829		for (int i = 0; i < adapter->num_queues; i++, txr++) {
830			IGB_TX_LOCK(txr);
831#if __FreeBSD_version >= 800000
832			/* Process the stack queue only if not depleted */
833			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
834			    !drbr_empty(ifp, txr->br))
835				igb_mq_start_locked(ifp, txr, NULL);
836#else
837			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
838				igb_start_locked(txr, ifp);
839#endif
840			IGB_TX_UNLOCK(txr);
841		}
842	}
843	IGB_CORE_UNLOCK(adapter);
844
845	return bus_generic_resume(dev);
846}
847
848
849/*********************************************************************
850 *  Transmit entry point
851 *
852 *  igb_start is called by the stack to initiate a transmit.
853 *  The driver will remain in this routine as long as there are
854 *  packets to transmit and transmit resources are available.
855 *  In case resources are not available stack is notified and
856 *  the packet is requeued.
857 **********************************************************************/
858
859static void
860igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
861{
862	struct adapter	*adapter = ifp->if_softc;
863	struct mbuf	*m_head;
864
865	IGB_TX_LOCK_ASSERT(txr);
866
867	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
868	    IFF_DRV_RUNNING)
869		return;
870	if (!adapter->link_active)
871		return;
872
873	/* Call cleanup if number of TX descriptors low */
874	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
875		igb_txeof(txr);
876
877	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
878		if (txr->tx_avail <= IGB_MAX_SCATTER) {
879			txr->queue_status |= IGB_QUEUE_DEPLETED;
880			break;
881		}
882		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
883		if (m_head == NULL)
884			break;
885		/*
886		 *  Encapsulation can modify our pointer, and or make it
887		 *  NULL on failure.  In that event, we can't requeue.
888		 */
889		if (igb_xmit(txr, &m_head)) {
890			if (m_head != NULL)
891				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
892			if (txr->tx_avail <= IGB_MAX_SCATTER)
893				txr->queue_status |= IGB_QUEUE_DEPLETED;
894			break;
895		}
896
897		/* Send a copy of the frame to the BPF listener */
898		ETHER_BPF_MTAP(ifp, m_head);
899
900		/* Set watchdog on */
901		txr->watchdog_time = ticks;
902		txr->queue_status |= IGB_QUEUE_WORKING;
903	}
904}
905
906/*
907 * Legacy TX driver routine, called from the
908 * stack, always uses tx[0], and spins for it.
909 * Should not be used with multiqueue tx
910 */
911static void
912igb_start(struct ifnet *ifp)
913{
914	struct adapter	*adapter = ifp->if_softc;
915	struct tx_ring	*txr = adapter->tx_rings;
916
917	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
918		IGB_TX_LOCK(txr);
919		igb_start_locked(txr, ifp);
920		IGB_TX_UNLOCK(txr);
921	}
922	return;
923}
924
925#if __FreeBSD_version >= 800000
926/*
927** Multiqueue Transmit driver
928**
929*/
930static int
931igb_mq_start(struct ifnet *ifp, struct mbuf *m)
932{
933	struct adapter		*adapter = ifp->if_softc;
934	struct igb_queue	*que;
935	struct tx_ring		*txr;
936	int 			i, err = 0;
937	bool			moveable = TRUE;
938
939	/* Which queue to use */
940	if ((m->m_flags & M_FLOWID) != 0) {
941		i = m->m_pkthdr.flowid % adapter->num_queues;
942		moveable = FALSE;
943	} else
944		i = curcpu % adapter->num_queues;
945
946	txr = &adapter->tx_rings[i];
947	que = &adapter->queues[i];
948	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
949	    IGB_TX_TRYLOCK(txr)) {
950		err = igb_mq_start_locked(ifp, txr, m);
951		IGB_TX_UNLOCK(txr);
952	} else {
953		err = drbr_enqueue(ifp, txr->br, m);
954		taskqueue_enqueue(que->tq, &txr->txq_task);
955	}
956
957	return (err);
958}
959
960static int
961igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
962{
963	struct adapter  *adapter = txr->adapter;
964        struct mbuf     *next;
965        int             err = 0, enq;
966
967	IGB_TX_LOCK_ASSERT(txr);
968
969	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
970	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
971	    adapter->link_active == 0) {
972		if (m != NULL)
973			err = drbr_enqueue(ifp, txr->br, m);
974		return (err);
975	}
976
977	enq = 0;
978	if (m == NULL) {
979		next = drbr_dequeue(ifp, txr->br);
980	} else if (drbr_needs_enqueue(ifp, txr->br)) {
981		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
982			return (err);
983		next = drbr_dequeue(ifp, txr->br);
984	} else
985		next = m;
986
987	/* Process the queue */
988	while (next != NULL) {
989		if ((err = igb_xmit(txr, &next)) != 0) {
990			if (next != NULL)
991				err = drbr_enqueue(ifp, txr->br, next);
992			break;
993		}
994		enq++;
995		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
996		ETHER_BPF_MTAP(ifp, next);
997		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
998			break;
999		next = drbr_dequeue(ifp, txr->br);
1000	}
1001	if (enq > 0) {
1002		/* Set the watchdog */
1003		txr->queue_status |= IGB_QUEUE_WORKING;
1004		txr->watchdog_time = ticks;
1005	}
1006	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1007		igb_txeof(txr);
1008	if (txr->tx_avail <= IGB_MAX_SCATTER)
1009		txr->queue_status |= IGB_QUEUE_DEPLETED;
1010	return (err);
1011}
1012
1013/*
1014 * Called from a taskqueue to drain queued transmit packets.
1015 */
1016static void
1017igb_deferred_mq_start(void *arg, int pending)
1018{
1019	struct tx_ring *txr = arg;
1020	struct adapter *adapter = txr->adapter;
1021	struct ifnet *ifp = adapter->ifp;
1022
1023	IGB_TX_LOCK(txr);
1024	if (!drbr_empty(ifp, txr->br))
1025		igb_mq_start_locked(ifp, txr, NULL);
1026	IGB_TX_UNLOCK(txr);
1027}
1028
1029/*
1030** Flush all ring buffers
1031*/
1032static void
1033igb_qflush(struct ifnet *ifp)
1034{
1035	struct adapter	*adapter = ifp->if_softc;
1036	struct tx_ring	*txr = adapter->tx_rings;
1037	struct mbuf	*m;
1038
1039	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1040		IGB_TX_LOCK(txr);
1041		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1042			m_freem(m);
1043		IGB_TX_UNLOCK(txr);
1044	}
1045	if_qflush(ifp);
1046}
1047#endif /* __FreeBSD_version >= 800000 */
1048
1049/*********************************************************************
1050 *  Ioctl entry point
1051 *
1052 *  igb_ioctl is called when the user wants to configure the
1053 *  interface.
1054 *
1055 *  return 0 on success, positive on failure
1056 **********************************************************************/
1057
1058static int
1059igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1060{
1061	struct adapter	*adapter = ifp->if_softc;
1062	struct ifreq	*ifr = (struct ifreq *)data;
1063#if defined(INET) || defined(INET6)
1064	struct ifaddr	*ifa = (struct ifaddr *)data;
1065#endif
1066	bool		avoid_reset = FALSE;
1067	int		error = 0;
1068
1069	if (adapter->in_detach)
1070		return (error);
1071
1072	switch (command) {
1073	case SIOCSIFADDR:
1074#ifdef INET
1075		if (ifa->ifa_addr->sa_family == AF_INET)
1076			avoid_reset = TRUE;
1077#endif
1078#ifdef INET6
1079		if (ifa->ifa_addr->sa_family == AF_INET6)
1080			avoid_reset = TRUE;
1081#endif
1082		/*
1083		** Calling init results in link renegotiation,
1084		** so we avoid doing it when possible.
1085		*/
1086		if (avoid_reset) {
1087			ifp->if_flags |= IFF_UP;
1088			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1089				igb_init(adapter);
1090#ifdef INET
1091			if (!(ifp->if_flags & IFF_NOARP))
1092				arp_ifinit(ifp, ifa);
1093#endif
1094		} else
1095			error = ether_ioctl(ifp, command, data);
1096		break;
1097	case SIOCSIFMTU:
1098	    {
1099		int max_frame_size;
1100
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1102
1103		IGB_CORE_LOCK(adapter);
1104		max_frame_size = 9234;
1105		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1106		    ETHER_CRC_LEN) {
1107			IGB_CORE_UNLOCK(adapter);
1108			error = EINVAL;
1109			break;
1110		}
1111
1112		ifp->if_mtu = ifr->ifr_mtu;
1113		adapter->max_frame_size =
1114		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1115		igb_init_locked(adapter);
1116		IGB_CORE_UNLOCK(adapter);
1117		break;
1118	    }
1119	case SIOCSIFFLAGS:
1120		IOCTL_DEBUGOUT("ioctl rcv'd:\
1121		    SIOCSIFFLAGS (Set Interface Flags)");
1122		IGB_CORE_LOCK(adapter);
1123		if (ifp->if_flags & IFF_UP) {
1124			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1125				if ((ifp->if_flags ^ adapter->if_flags) &
1126				    (IFF_PROMISC | IFF_ALLMULTI)) {
1127					igb_disable_promisc(adapter);
1128					igb_set_promisc(adapter);
1129				}
1130			} else
1131				igb_init_locked(adapter);
1132		} else
1133			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1134				igb_stop(adapter);
1135		adapter->if_flags = ifp->if_flags;
1136		IGB_CORE_UNLOCK(adapter);
1137		break;
1138	case SIOCADDMULTI:
1139	case SIOCDELMULTI:
1140		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1141		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1142			IGB_CORE_LOCK(adapter);
1143			igb_disable_intr(adapter);
1144			igb_set_multi(adapter);
1145#ifdef DEVICE_POLLING
1146			if (!(ifp->if_capenable & IFCAP_POLLING))
1147#endif
1148				igb_enable_intr(adapter);
1149			IGB_CORE_UNLOCK(adapter);
1150		}
1151		break;
1152	case SIOCSIFMEDIA:
1153		/* Check SOL/IDER usage */
1154		IGB_CORE_LOCK(adapter);
1155		if (e1000_check_reset_block(&adapter->hw)) {
1156			IGB_CORE_UNLOCK(adapter);
1157			device_printf(adapter->dev, "Media change is"
1158			    " blocked due to SOL/IDER session.\n");
1159			break;
1160		}
1161		IGB_CORE_UNLOCK(adapter);
1162	case SIOCGIFMEDIA:
1163		IOCTL_DEBUGOUT("ioctl rcv'd: \
1164		    SIOCxIFMEDIA (Get/Set Interface Media)");
1165		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1166		break;
1167	case SIOCSIFCAP:
1168	    {
1169		int mask, reinit;
1170
1171		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1172		reinit = 0;
1173		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1174#ifdef DEVICE_POLLING
1175		if (mask & IFCAP_POLLING) {
1176			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1177				error = ether_poll_register(igb_poll, ifp);
1178				if (error)
1179					return (error);
1180				IGB_CORE_LOCK(adapter);
1181				igb_disable_intr(adapter);
1182				ifp->if_capenable |= IFCAP_POLLING;
1183				IGB_CORE_UNLOCK(adapter);
1184			} else {
1185				error = ether_poll_deregister(ifp);
1186				/* Enable interrupt even in error case */
1187				IGB_CORE_LOCK(adapter);
1188				igb_enable_intr(adapter);
1189				ifp->if_capenable &= ~IFCAP_POLLING;
1190				IGB_CORE_UNLOCK(adapter);
1191			}
1192		}
1193#endif
1194		if (mask & IFCAP_HWCSUM) {
1195			ifp->if_capenable ^= IFCAP_HWCSUM;
1196			reinit = 1;
1197		}
1198		if (mask & IFCAP_TSO4) {
1199			ifp->if_capenable ^= IFCAP_TSO4;
1200			reinit = 1;
1201		}
1202		if (mask & IFCAP_VLAN_HWTAGGING) {
1203			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1204			reinit = 1;
1205		}
1206		if (mask & IFCAP_VLAN_HWFILTER) {
1207			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1208			reinit = 1;
1209		}
1210		if (mask & IFCAP_VLAN_HWTSO) {
1211			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1212			reinit = 1;
1213		}
1214		if (mask & IFCAP_LRO) {
1215			ifp->if_capenable ^= IFCAP_LRO;
1216			reinit = 1;
1217		}
1218		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1219			igb_init(adapter);
1220		VLAN_CAPABILITIES(ifp);
1221		break;
1222	    }
1223
1224	default:
1225		error = ether_ioctl(ifp, command, data);
1226		break;
1227	}
1228
1229	return (error);
1230}
1231
1232
1233/*********************************************************************
1234 *  Init entry point
1235 *
1236 *  This routine is used in two ways. It is used by the stack as
1237 *  init entry point in network interface structure. It is also used
1238 *  by the driver as a hw/sw initialization routine to get to a
1239 *  consistent state.
1240 *
1241 *  return 0 on success, positive on failure
1242 **********************************************************************/
1243
1244static void
1245igb_init_locked(struct adapter *adapter)
1246{
1247	struct ifnet	*ifp = adapter->ifp;
1248	device_t	dev = adapter->dev;
1249
1250	INIT_DEBUGOUT("igb_init: begin");
1251
1252	IGB_CORE_LOCK_ASSERT(adapter);
1253
1254	igb_disable_intr(adapter);
1255	callout_stop(&adapter->timer);
1256
1257	/* Get the latest mac address, User can use a LAA */
1258        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1259              ETHER_ADDR_LEN);
1260
1261	/* Put the address into the Receive Address Array */
1262	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1263
1264	igb_reset(adapter);
1265	igb_update_link_status(adapter);
1266
1267	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1268
1269	/* Set hardware offload abilities */
1270	ifp->if_hwassist = 0;
1271	if (ifp->if_capenable & IFCAP_TXCSUM) {
1272		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1273#if __FreeBSD_version >= 800000
1274		if (adapter->hw.mac.type == e1000_82576)
1275			ifp->if_hwassist |= CSUM_SCTP;
1276#endif
1277	}
1278
1279	if (ifp->if_capenable & IFCAP_TSO4)
1280		ifp->if_hwassist |= CSUM_TSO;
1281
1282	/* Configure for OS presence */
1283	igb_init_manageability(adapter);
1284
1285	/* Prepare transmit descriptors and buffers */
1286	igb_setup_transmit_structures(adapter);
1287	igb_initialize_transmit_units(adapter);
1288
1289	/* Setup Multicast table */
1290	igb_set_multi(adapter);
1291
1292	/*
1293	** Figure out the desired mbuf pool
1294	** for doing jumbo/packetsplit
1295	*/
1296	if (adapter->max_frame_size <= 2048)
1297		adapter->rx_mbuf_sz = MCLBYTES;
1298	else if (adapter->max_frame_size <= 4096)
1299		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300	else
1301		adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (igb_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		return;
1307	}
1308	igb_initialize_receive_units(adapter);
1309
1310        /* Enable VLAN support */
1311	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1312		igb_setup_vlan_hw_support(adapter);
1313
1314	/* Don't lose promiscuous settings */
1315	igb_set_promisc(adapter);
1316
1317	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1318	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1319
1320	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1321	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1322
1323	if (adapter->msix > 1) /* Set up queue routing */
1324		igb_configure_queues(adapter);
1325
1326	/* this clears any pending interrupts */
1327	E1000_READ_REG(&adapter->hw, E1000_ICR);
1328#ifdef DEVICE_POLLING
1329	/*
1330	 * Only enable interrupts if we are not polling, make sure
1331	 * they are off otherwise.
1332	 */
1333	if (ifp->if_capenable & IFCAP_POLLING)
1334		igb_disable_intr(adapter);
1335	else
1336#endif /* DEVICE_POLLING */
1337	{
1338		igb_enable_intr(adapter);
1339		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1340	}
1341
1342	/* Set Energy Efficient Ethernet */
1343
1344	e1000_set_eee_i350(&adapter->hw);
1345}
1346
1347static void
1348igb_init(void *arg)
1349{
1350	struct adapter *adapter = arg;
1351
1352	IGB_CORE_LOCK(adapter);
1353	igb_init_locked(adapter);
1354	IGB_CORE_UNLOCK(adapter);
1355}
1356
1357
1358static void
1359igb_handle_que(void *context, int pending)
1360{
1361	struct igb_queue *que = context;
1362	struct adapter *adapter = que->adapter;
1363	struct tx_ring *txr = que->txr;
1364	struct ifnet	*ifp = adapter->ifp;
1365
1366	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1367		bool	more;
1368
1369		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1370
1371		IGB_TX_LOCK(txr);
1372		igb_txeof(txr);
1373#if __FreeBSD_version >= 800000
1374		/* Process the stack queue only if not depleted */
1375		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1376		    !drbr_empty(ifp, txr->br))
1377			igb_mq_start_locked(ifp, txr, NULL);
1378#else
1379		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1380			igb_start_locked(txr, ifp);
1381#endif
1382		IGB_TX_UNLOCK(txr);
1383		/* Do we need another? */
1384		if (more) {
1385			taskqueue_enqueue(que->tq, &que->que_task);
1386			return;
1387		}
1388	}
1389
1390#ifdef DEVICE_POLLING
1391	if (ifp->if_capenable & IFCAP_POLLING)
1392		return;
1393#endif
1394	/* Reenable this interrupt */
1395	if (que->eims)
1396		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1397	else
1398		igb_enable_intr(adapter);
1399}
1400
1401/* Deal with link in a sleepable context */
1402static void
1403igb_handle_link(void *context, int pending)
1404{
1405	struct adapter *adapter = context;
1406
1407	IGB_CORE_LOCK(adapter);
1408	igb_handle_link_locked(adapter);
1409	IGB_CORE_UNLOCK(adapter);
1410}
1411
1412static void
1413igb_handle_link_locked(struct adapter *adapter)
1414{
1415	struct tx_ring	*txr = adapter->tx_rings;
1416	struct ifnet *ifp = adapter->ifp;
1417
1418	IGB_CORE_LOCK_ASSERT(adapter);
1419	adapter->hw.mac.get_link_status = 1;
1420	igb_update_link_status(adapter);
1421	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1422		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1423			IGB_TX_LOCK(txr);
1424#if __FreeBSD_version >= 800000
1425			/* Process the stack queue only if not depleted */
1426			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1427			    !drbr_empty(ifp, txr->br))
1428				igb_mq_start_locked(ifp, txr, NULL);
1429#else
1430			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1431				igb_start_locked(txr, ifp);
1432#endif
1433			IGB_TX_UNLOCK(txr);
1434		}
1435	}
1436}
1437
1438/*********************************************************************
1439 *
1440 *  MSI/Legacy Deferred
1441 *  Interrupt Service routine
1442 *
1443 *********************************************************************/
1444static int
1445igb_irq_fast(void *arg)
1446{
1447	struct adapter		*adapter = arg;
1448	struct igb_queue	*que = adapter->queues;
1449	u32			reg_icr;
1450
1451
1452	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1453
1454	/* Hot eject?  */
1455	if (reg_icr == 0xffffffff)
1456		return FILTER_STRAY;
1457
1458	/* Definitely not our interrupt.  */
1459	if (reg_icr == 0x0)
1460		return FILTER_STRAY;
1461
1462	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1463		return FILTER_STRAY;
1464
1465	/*
1466	 * Mask interrupts until the taskqueue is finished running.  This is
1467	 * cheap, just assume that it is needed.  This also works around the
1468	 * MSI message reordering errata on certain systems.
1469	 */
1470	igb_disable_intr(adapter);
1471	taskqueue_enqueue(que->tq, &que->que_task);
1472
1473	/* Link status change */
1474	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1475		taskqueue_enqueue(que->tq, &adapter->link_task);
1476
1477	if (reg_icr & E1000_ICR_RXO)
1478		adapter->rx_overruns++;
1479	return FILTER_HANDLED;
1480}
1481
1482#ifdef DEVICE_POLLING
1483/*********************************************************************
1484 *
1485 *  Legacy polling routine : if using this code you MUST be sure that
1486 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1487 *
1488 *********************************************************************/
1489#if __FreeBSD_version >= 800000
1490#define POLL_RETURN_COUNT(a) (a)
1491static int
1492#else
1493#define POLL_RETURN_COUNT(a)
1494static void
1495#endif
1496igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1497{
1498	struct adapter		*adapter = ifp->if_softc;
1499	struct igb_queue	*que = adapter->queues;
1500	struct tx_ring		*txr = adapter->tx_rings;
1501	u32			reg_icr, rx_done = 0;
1502	u32			loop = IGB_MAX_LOOP;
1503	bool			more;
1504
1505	IGB_CORE_LOCK(adapter);
1506	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1507		IGB_CORE_UNLOCK(adapter);
1508		return POLL_RETURN_COUNT(rx_done);
1509	}
1510
1511	if (cmd == POLL_AND_CHECK_STATUS) {
1512		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1513		/* Link status change */
1514		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1515			igb_handle_link_locked(adapter);
1516
1517		if (reg_icr & E1000_ICR_RXO)
1518			adapter->rx_overruns++;
1519	}
1520	IGB_CORE_UNLOCK(adapter);
1521
1522	igb_rxeof(que, count, &rx_done);
1523
1524	IGB_TX_LOCK(txr);
1525	do {
1526		more = igb_txeof(txr);
1527	} while (loop-- && more);
1528#if __FreeBSD_version >= 800000
1529	if (!drbr_empty(ifp, txr->br))
1530		igb_mq_start_locked(ifp, txr, NULL);
1531#else
1532	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1533		igb_start_locked(txr, ifp);
1534#endif
1535	IGB_TX_UNLOCK(txr);
1536	return POLL_RETURN_COUNT(rx_done);
1537}
1538#endif /* DEVICE_POLLING */
1539
1540/*********************************************************************
1541 *
1542 *  MSIX Que Interrupt Service routine
1543 *
1544 **********************************************************************/
1545static void
1546igb_msix_que(void *arg)
1547{
1548	struct igb_queue *que = arg;
1549	struct adapter *adapter = que->adapter;
1550	struct ifnet   *ifp = adapter->ifp;
1551	struct tx_ring *txr = que->txr;
1552	struct rx_ring *rxr = que->rxr;
1553	u32		newitr = 0;
1554	bool		more_rx;
1555
1556	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1557	++que->irqs;
1558
1559	IGB_TX_LOCK(txr);
1560	igb_txeof(txr);
1561#if __FreeBSD_version >= 800000
1562	/* Process the stack queue only if not depleted */
1563	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1564	    !drbr_empty(ifp, txr->br))
1565		igb_mq_start_locked(ifp, txr, NULL);
1566#else
1567	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1568		igb_start_locked(txr, ifp);
1569#endif
1570	IGB_TX_UNLOCK(txr);
1571
1572	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1573
1574	if (adapter->enable_aim == FALSE)
1575		goto no_calc;
1576	/*
1577	** Do Adaptive Interrupt Moderation:
1578        **  - Write out last calculated setting
1579	**  - Calculate based on average size over
1580	**    the last interval.
1581	*/
1582        if (que->eitr_setting)
1583                E1000_WRITE_REG(&adapter->hw,
1584                    E1000_EITR(que->msix), que->eitr_setting);
1585
1586        que->eitr_setting = 0;
1587
1588        /* Idle, do nothing */
1589        if ((txr->bytes == 0) && (rxr->bytes == 0))
1590                goto no_calc;
1591
1592        /* Used half Default if sub-gig */
1593        if (adapter->link_speed != 1000)
1594                newitr = IGB_DEFAULT_ITR / 2;
1595        else {
1596		if ((txr->bytes) && (txr->packets))
1597                	newitr = txr->bytes/txr->packets;
1598		if ((rxr->bytes) && (rxr->packets))
1599			newitr = max(newitr,
1600			    (rxr->bytes / rxr->packets));
1601                newitr += 24; /* account for hardware frame, crc */
1602		/* set an upper boundary */
1603		newitr = min(newitr, 3000);
1604		/* Be nice to the mid range */
1605                if ((newitr > 300) && (newitr < 1200))
1606                        newitr = (newitr / 3);
1607                else
1608                        newitr = (newitr / 2);
1609        }
1610        newitr &= 0x7FFC;  /* Mask invalid bits */
1611        if (adapter->hw.mac.type == e1000_82575)
1612                newitr |= newitr << 16;
1613        else
1614                newitr |= E1000_EITR_CNT_IGNR;
1615
1616        /* save for next interrupt */
1617        que->eitr_setting = newitr;
1618
1619        /* Reset state */
1620        txr->bytes = 0;
1621        txr->packets = 0;
1622        rxr->bytes = 0;
1623        rxr->packets = 0;
1624
1625no_calc:
1626	/* Schedule a clean task if needed*/
1627	if (more_rx)
1628		taskqueue_enqueue(que->tq, &que->que_task);
1629	else
1630		/* Reenable this interrupt */
1631		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1632	return;
1633}
1634
1635
1636/*********************************************************************
1637 *
1638 *  MSIX Link Interrupt Service routine
1639 *
1640 **********************************************************************/
1641
1642static void
1643igb_msix_link(void *arg)
1644{
1645	struct adapter	*adapter = arg;
1646	u32       	icr;
1647
1648	++adapter->link_irq;
1649	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1650	if (!(icr & E1000_ICR_LSC))
1651		goto spurious;
1652	igb_handle_link(adapter, 0);
1653
1654spurious:
1655	/* Rearm */
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1657	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1658	return;
1659}
1660
1661
1662/*********************************************************************
1663 *
1664 *  Media Ioctl callback
1665 *
1666 *  This routine is called whenever the user queries the status of
1667 *  the interface using ifconfig.
1668 *
1669 **********************************************************************/
1670static void
1671igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1672{
1673	struct adapter *adapter = ifp->if_softc;
1674	u_char fiber_type = IFM_1000_SX;
1675
1676	INIT_DEBUGOUT("igb_media_status: begin");
1677
1678	IGB_CORE_LOCK(adapter);
1679	igb_update_link_status(adapter);
1680
1681	ifmr->ifm_status = IFM_AVALID;
1682	ifmr->ifm_active = IFM_ETHER;
1683
1684	if (!adapter->link_active) {
1685		IGB_CORE_UNLOCK(adapter);
1686		return;
1687	}
1688
1689	ifmr->ifm_status |= IFM_ACTIVE;
1690
1691	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1692	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1693		ifmr->ifm_active |= fiber_type | IFM_FDX;
1694	else {
1695		switch (adapter->link_speed) {
1696		case 10:
1697			ifmr->ifm_active |= IFM_10_T;
1698			break;
1699		case 100:
1700			ifmr->ifm_active |= IFM_100_TX;
1701			break;
1702		case 1000:
1703			ifmr->ifm_active |= IFM_1000_T;
1704			break;
1705		}
1706		if (adapter->link_duplex == FULL_DUPLEX)
1707			ifmr->ifm_active |= IFM_FDX;
1708		else
1709			ifmr->ifm_active |= IFM_HDX;
1710	}
1711	IGB_CORE_UNLOCK(adapter);
1712}
1713
1714/*********************************************************************
1715 *
1716 *  Media Ioctl callback
1717 *
1718 *  This routine is called when the user changes speed/duplex using
1719 *  media/mediopt option with ifconfig.
1720 *
1721 **********************************************************************/
1722static int
1723igb_media_change(struct ifnet *ifp)
1724{
1725	struct adapter *adapter = ifp->if_softc;
1726	struct ifmedia  *ifm = &adapter->media;
1727
1728	INIT_DEBUGOUT("igb_media_change: begin");
1729
1730	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1731		return (EINVAL);
1732
1733	IGB_CORE_LOCK(adapter);
1734	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1735	case IFM_AUTO:
1736		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1737		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1738		break;
1739	case IFM_1000_LX:
1740	case IFM_1000_SX:
1741	case IFM_1000_T:
1742		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1743		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1744		break;
1745	case IFM_100_TX:
1746		adapter->hw.mac.autoneg = FALSE;
1747		adapter->hw.phy.autoneg_advertised = 0;
1748		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1749			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1750		else
1751			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1752		break;
1753	case IFM_10_T:
1754		adapter->hw.mac.autoneg = FALSE;
1755		adapter->hw.phy.autoneg_advertised = 0;
1756		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1757			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1758		else
1759			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1760		break;
1761	default:
1762		device_printf(adapter->dev, "Unsupported media type\n");
1763	}
1764
1765	igb_init_locked(adapter);
1766	IGB_CORE_UNLOCK(adapter);
1767
1768	return (0);
1769}
1770
1771
1772/*********************************************************************
1773 *
1774 *  This routine maps the mbufs to Advanced TX descriptors.
1775 *
1776 **********************************************************************/
1777static int
1778igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1779{
1780	struct adapter		*adapter = txr->adapter;
1781	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1782	bus_dmamap_t		map;
1783	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1784	union e1000_adv_tx_desc	*txd = NULL;
1785	struct mbuf		*m_head = *m_headp;
1786	struct ether_vlan_header *eh = NULL;
1787	struct ip		*ip = NULL;
1788	struct tcphdr		*th = NULL;
1789	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1790	int			ehdrlen, poff;
1791	int			nsegs, i, first, last = 0;
1792	int			error, do_tso, remap = 1;
1793
1794	/* Set basic descriptor constants */
1795	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1796	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1797	if (m_head->m_flags & M_VLANTAG)
1798		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1799
1800retry:
1801	m_head = *m_headp;
1802	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1803	hdrlen = ehdrlen = poff = 0;
1804
1805	/*
1806	 * Intel recommends entire IP/TCP header length reside in a single
1807	 * buffer. If multiple descriptors are used to describe the IP and
1808	 * TCP header, each descriptor should describe one or more
1809	 * complete headers; descriptors referencing only parts of headers
1810	 * are not supported. If all layer headers are not coalesced into
1811	 * a single buffer, each buffer should not cross a 4KB boundary,
1812	 * or be larger than the maximum read request size.
1813	 * Controller also requires modifing IP/TCP header to make TSO work
1814	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1815	 * IP/TCP header into a single buffer to meet the requirement of
1816	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1817	 * which also has similiar restrictions.
1818	 */
1819	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1820		if (do_tso || (m_head->m_next != NULL &&
1821		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1822			if (M_WRITABLE(*m_headp) == 0) {
1823				m_head = m_dup(*m_headp, M_DONTWAIT);
1824				m_freem(*m_headp);
1825				if (m_head == NULL) {
1826					*m_headp = NULL;
1827					return (ENOBUFS);
1828				}
1829				*m_headp = m_head;
1830			}
1831		}
1832		/*
1833		 * Assume IPv4, we don't have TSO/checksum offload support
1834		 * for IPv6 yet.
1835		 */
1836		ehdrlen = sizeof(struct ether_header);
1837		m_head = m_pullup(m_head, ehdrlen);
1838		if (m_head == NULL) {
1839			*m_headp = NULL;
1840			return (ENOBUFS);
1841		}
1842		eh = mtod(m_head, struct ether_vlan_header *);
1843		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1844			ehdrlen = sizeof(struct ether_vlan_header);
1845			m_head = m_pullup(m_head, ehdrlen);
1846			if (m_head == NULL) {
1847				*m_headp = NULL;
1848				return (ENOBUFS);
1849			}
1850		}
1851		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1852		if (m_head == NULL) {
1853			*m_headp = NULL;
1854			return (ENOBUFS);
1855		}
1856		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1857		poff = ehdrlen + (ip->ip_hl << 2);
1858		if (do_tso) {
1859			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1860			if (m_head == NULL) {
1861				*m_headp = NULL;
1862				return (ENOBUFS);
1863			}
1864			/*
1865			 * The pseudo TCP checksum does not include TCP payload
1866			 * length so driver should recompute the checksum here
1867			 * what hardware expect to see. This is adherence of
1868			 * Microsoft's Large Send specification.
1869			 */
1870			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1871			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1872			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1873			/* Keep track of the full header length */
1874			hdrlen = poff + (th->th_off << 2);
1875		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1876			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1877			if (m_head == NULL) {
1878				*m_headp = NULL;
1879				return (ENOBUFS);
1880			}
1881			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1882			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1883			if (m_head == NULL) {
1884				*m_headp = NULL;
1885				return (ENOBUFS);
1886			}
1887			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1888			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1889		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1890			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1891			if (m_head == NULL) {
1892				*m_headp = NULL;
1893				return (ENOBUFS);
1894			}
1895			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1896		}
1897		*m_headp = m_head;
1898	}
1899
1900	/*
1901	 * Map the packet for DMA
1902	 *
1903	 * Capture the first descriptor index,
1904	 * this descriptor will have the index
1905	 * of the EOP which is the only one that
1906	 * now gets a DONE bit writeback.
1907	 */
1908	first = txr->next_avail_desc;
1909	tx_buffer = &txr->tx_buffers[first];
1910	tx_buffer_mapped = tx_buffer;
1911	map = tx_buffer->map;
1912
1913	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1914	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1915
1916	/*
1917	 * There are two types of errors we can (try) to handle:
1918	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1919	 *   out of segments.  Defragment the mbuf chain and try again.
1920	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1921	 *   at this point in time.  Defer sending and try again later.
1922	 * All other errors, in particular EINVAL, are fatal and prevent the
1923	 * mbuf chain from ever going through.  Drop it and report error.
1924	 */
1925	if (error == EFBIG && remap) {
1926		struct mbuf *m;
1927
1928		m = m_defrag(*m_headp, M_DONTWAIT);
1929		if (m == NULL) {
1930			adapter->mbuf_defrag_failed++;
1931			m_freem(*m_headp);
1932			*m_headp = NULL;
1933			return (ENOBUFS);
1934		}
1935		*m_headp = m;
1936
1937		/* Try it again, but only once */
1938		remap = 0;
1939		goto retry;
1940	} else if (error == ENOMEM) {
1941		adapter->no_tx_dma_setup++;
1942		return (error);
1943	} else if (error != 0) {
1944		adapter->no_tx_dma_setup++;
1945		m_freem(*m_headp);
1946		*m_headp = NULL;
1947		return (error);
1948	}
1949
1950	/*
1951	** Make sure we don't overrun the ring,
1952	** we need nsegs descriptors and one for
1953	** the context descriptor used for the
1954	** offloads.
1955	*/
1956        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1957                txr->no_desc_avail++;
1958		bus_dmamap_unload(txr->txtag, map);
1959		return (ENOBUFS);
1960        }
1961	m_head = *m_headp;
1962
1963	/* Do hardware assists:
1964         * Set up the context descriptor, used
1965         * when any hardware offload is done.
1966         * This includes CSUM, VLAN, and TSO.
1967         * It will use the first descriptor.
1968         */
1969
1970	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1971		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1972			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1973			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1974			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1975		} else
1976			return (ENXIO);
1977	} else if (igb_tx_ctx_setup(txr, m_head))
1978			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1979
1980	/* Calculate payload length */
1981	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1982	    << E1000_ADVTXD_PAYLEN_SHIFT);
1983
1984	/* 82575 needs the queue index added */
1985	if (adapter->hw.mac.type == e1000_82575)
1986		olinfo_status |= txr->me << 4;
1987
1988	/* Set up our transmit descriptors */
1989	i = txr->next_avail_desc;
1990	for (int j = 0; j < nsegs; j++) {
1991		bus_size_t seg_len;
1992		bus_addr_t seg_addr;
1993
1994		tx_buffer = &txr->tx_buffers[i];
1995		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1996		seg_addr = segs[j].ds_addr;
1997		seg_len  = segs[j].ds_len;
1998
1999		txd->read.buffer_addr = htole64(seg_addr);
2000		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2001		txd->read.olinfo_status = htole32(olinfo_status);
2002		last = i;
2003		if (++i == adapter->num_tx_desc)
2004			i = 0;
2005		tx_buffer->m_head = NULL;
2006		tx_buffer->next_eop = -1;
2007	}
2008
2009	txr->next_avail_desc = i;
2010	txr->tx_avail -= nsegs;
2011        tx_buffer->m_head = m_head;
2012
2013	/*
2014	** Here we swap the map so the last descriptor,
2015	** which gets the completion interrupt has the
2016	** real map, and the first descriptor gets the
2017	** unused map from this descriptor.
2018	*/
2019	tx_buffer_mapped->map = tx_buffer->map;
2020	tx_buffer->map = map;
2021        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2022
2023        /*
2024         * Last Descriptor of Packet
2025	 * needs End Of Packet (EOP)
2026	 * and Report Status (RS)
2027         */
2028        txd->read.cmd_type_len |=
2029	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2030	/*
2031	 * Keep track in the first buffer which
2032	 * descriptor will be written back
2033	 */
2034	tx_buffer = &txr->tx_buffers[first];
2035	tx_buffer->next_eop = last;
2036	/* Update the watchdog time early and often */
2037	txr->watchdog_time = ticks;
2038
2039	/*
2040	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2041	 * that this frame is available to transmit.
2042	 */
2043	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2044	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2045	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2046	++txr->tx_packets;
2047
2048	return (0);
2049}
2050static void
2051igb_set_promisc(struct adapter *adapter)
2052{
2053	struct ifnet	*ifp = adapter->ifp;
2054	struct e1000_hw *hw = &adapter->hw;
2055	u32		reg;
2056
2057	if (adapter->vf_ifp) {
2058		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2059		return;
2060	}
2061
2062	reg = E1000_READ_REG(hw, E1000_RCTL);
2063	if (ifp->if_flags & IFF_PROMISC) {
2064		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2065		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2066	} else if (ifp->if_flags & IFF_ALLMULTI) {
2067		reg |= E1000_RCTL_MPE;
2068		reg &= ~E1000_RCTL_UPE;
2069		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2070	}
2071}
2072
2073static void
2074igb_disable_promisc(struct adapter *adapter)
2075{
2076	struct e1000_hw *hw = &adapter->hw;
2077	u32		reg;
2078
2079	if (adapter->vf_ifp) {
2080		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2081		return;
2082	}
2083	reg = E1000_READ_REG(hw, E1000_RCTL);
2084	reg &=  (~E1000_RCTL_UPE);
2085	reg &=  (~E1000_RCTL_MPE);
2086	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2087}
2088
2089
2090/*********************************************************************
2091 *  Multicast Update
2092 *
2093 *  This routine is called whenever multicast address list is updated.
2094 *
2095 **********************************************************************/
2096
2097static void
2098igb_set_multi(struct adapter *adapter)
2099{
2100	struct ifnet	*ifp = adapter->ifp;
2101	struct ifmultiaddr *ifma;
2102	u32 reg_rctl = 0;
2103	u8  *mta;
2104
2105	int mcnt = 0;
2106
2107	IOCTL_DEBUGOUT("igb_set_multi: begin");
2108
2109	mta = adapter->mta;
2110	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2111	    MAX_NUM_MULTICAST_ADDRESSES);
2112
2113#if __FreeBSD_version < 800000
2114	IF_ADDR_LOCK(ifp);
2115#else
2116	if_maddr_rlock(ifp);
2117#endif
2118	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2119		if (ifma->ifma_addr->sa_family != AF_LINK)
2120			continue;
2121
2122		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2123			break;
2124
2125		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2126		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2127		mcnt++;
2128	}
2129#if __FreeBSD_version < 800000
2130	IF_ADDR_UNLOCK(ifp);
2131#else
2132	if_maddr_runlock(ifp);
2133#endif
2134
2135	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2136		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2137		reg_rctl |= E1000_RCTL_MPE;
2138		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139	} else
2140		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2141}
2142
2143
2144/*********************************************************************
2145 *  Timer routine:
2146 *  	This routine checks for link status,
2147 *	updates statistics, and does the watchdog.
2148 *
2149 **********************************************************************/
2150
2151static void
2152igb_local_timer(void *arg)
2153{
2154	struct adapter		*adapter = arg;
2155	device_t		dev = adapter->dev;
2156	struct ifnet		*ifp = adapter->ifp;
2157	struct tx_ring		*txr = adapter->tx_rings;
2158	struct igb_queue	*que = adapter->queues;
2159	int			hung = 0, busy = 0;
2160
2161
2162	IGB_CORE_LOCK_ASSERT(adapter);
2163
2164	igb_update_link_status(adapter);
2165	igb_update_stats_counters(adapter);
2166
2167        /*
2168        ** Check the TX queues status
2169	**	- central locked handling of OACTIVE
2170	**	- watchdog only if all queues show hung
2171        */
2172	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2173		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2174		    (adapter->pause_frames == 0))
2175			++hung;
2176		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2177			++busy;
2178		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2179			taskqueue_enqueue(que->tq, &que->que_task);
2180	}
2181	if (hung == adapter->num_queues)
2182		goto timeout;
2183	if (busy == adapter->num_queues)
2184		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2185	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2186	    (busy < adapter->num_queues))
2187		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2188
2189	adapter->pause_frames = 0;
2190	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2191#ifndef DEVICE_POLLING
2192	/* Schedule all queue interrupts - deadlock protection */
2193	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2194#endif
2195	return;
2196
2197timeout:
2198	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2199	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2200            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2201            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2202	device_printf(dev,"TX(%d) desc avail = %d,"
2203            "Next TX to Clean = %d\n",
2204            txr->me, txr->tx_avail, txr->next_to_clean);
2205	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2206	adapter->watchdog_events++;
2207	igb_init_locked(adapter);
2208}
2209
2210static void
2211igb_update_link_status(struct adapter *adapter)
2212{
2213	struct e1000_hw *hw = &adapter->hw;
2214	struct ifnet *ifp = adapter->ifp;
2215	device_t dev = adapter->dev;
2216	struct tx_ring *txr = adapter->tx_rings;
2217	u32 link_check, thstat, ctrl;
2218
2219	link_check = thstat = ctrl = 0;
2220
2221	/* Get the cached link value or read for real */
2222        switch (hw->phy.media_type) {
2223        case e1000_media_type_copper:
2224                if (hw->mac.get_link_status) {
2225			/* Do the work to read phy */
2226                        e1000_check_for_link(hw);
2227                        link_check = !hw->mac.get_link_status;
2228                } else
2229                        link_check = TRUE;
2230                break;
2231        case e1000_media_type_fiber:
2232                e1000_check_for_link(hw);
2233                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2234                                 E1000_STATUS_LU);
2235                break;
2236        case e1000_media_type_internal_serdes:
2237                e1000_check_for_link(hw);
2238                link_check = adapter->hw.mac.serdes_has_link;
2239                break;
2240	/* VF device is type_unknown */
2241        case e1000_media_type_unknown:
2242                e1000_check_for_link(hw);
2243		link_check = !hw->mac.get_link_status;
2244		/* Fall thru */
2245        default:
2246                break;
2247        }
2248
2249	/* Check for thermal downshift or shutdown */
2250	if (hw->mac.type == e1000_i350) {
2251		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2252		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2253	}
2254
2255	/* Now we check if a transition has happened */
2256	if (link_check && (adapter->link_active == 0)) {
2257		e1000_get_speed_and_duplex(&adapter->hw,
2258		    &adapter->link_speed, &adapter->link_duplex);
2259		if (bootverbose)
2260			device_printf(dev, "Link is up %d Mbps %s\n",
2261			    adapter->link_speed,
2262			    ((adapter->link_duplex == FULL_DUPLEX) ?
2263			    "Full Duplex" : "Half Duplex"));
2264		adapter->link_active = 1;
2265		ifp->if_baudrate = adapter->link_speed * 1000000;
2266		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2267		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2268			device_printf(dev, "Link: thermal downshift\n");
2269		/* This can sleep */
2270		if_link_state_change(ifp, LINK_STATE_UP);
2271	} else if (!link_check && (adapter->link_active == 1)) {
2272		ifp->if_baudrate = adapter->link_speed = 0;
2273		adapter->link_duplex = 0;
2274		if (bootverbose)
2275			device_printf(dev, "Link is Down\n");
2276		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2277		    (thstat & E1000_THSTAT_PWR_DOWN))
2278			device_printf(dev, "Link: thermal shutdown\n");
2279		adapter->link_active = 0;
2280		/* This can sleep */
2281		if_link_state_change(ifp, LINK_STATE_DOWN);
2282		/* Reset queue state */
2283		for (int i = 0; i < adapter->num_queues; i++, txr++)
2284			txr->queue_status = IGB_QUEUE_IDLE;
2285	}
2286}
2287
2288/*********************************************************************
2289 *
2290 *  This routine disables all traffic on the adapter by issuing a
2291 *  global reset on the MAC and deallocates TX/RX buffers.
2292 *
2293 **********************************************************************/
2294
2295static void
2296igb_stop(void *arg)
2297{
2298	struct adapter	*adapter = arg;
2299	struct ifnet	*ifp = adapter->ifp;
2300	struct tx_ring *txr = adapter->tx_rings;
2301
2302	IGB_CORE_LOCK_ASSERT(adapter);
2303
2304	INIT_DEBUGOUT("igb_stop: begin");
2305
2306	igb_disable_intr(adapter);
2307
2308	callout_stop(&adapter->timer);
2309
2310	/* Tell the stack that the interface is no longer active */
2311	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2312	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2313
2314	/* Disarm watchdog timer. */
2315	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2316		IGB_TX_LOCK(txr);
2317		txr->queue_status = IGB_QUEUE_IDLE;
2318		IGB_TX_UNLOCK(txr);
2319	}
2320
2321	e1000_reset_hw(&adapter->hw);
2322	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2323
2324	e1000_led_off(&adapter->hw);
2325	e1000_cleanup_led(&adapter->hw);
2326}
2327
2328
2329/*********************************************************************
2330 *
2331 *  Determine hardware revision.
2332 *
2333 **********************************************************************/
2334static void
2335igb_identify_hardware(struct adapter *adapter)
2336{
2337	device_t dev = adapter->dev;
2338
2339	/* Make sure our PCI config space has the necessary stuff set */
2340	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2341	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2342	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2343		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2344		    "bits were not set!\n");
2345		adapter->hw.bus.pci_cmd_word |=
2346		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2347		pci_write_config(dev, PCIR_COMMAND,
2348		    adapter->hw.bus.pci_cmd_word, 2);
2349	}
2350
2351	/* Save off the information about this board */
2352	adapter->hw.vendor_id = pci_get_vendor(dev);
2353	adapter->hw.device_id = pci_get_device(dev);
2354	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2355	adapter->hw.subsystem_vendor_id =
2356	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2357	adapter->hw.subsystem_device_id =
2358	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2359
2360	/* Set MAC type early for PCI setup */
2361	e1000_set_mac_type(&adapter->hw);
2362
2363	/* Are we a VF device? */
2364	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2365	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2366		adapter->vf_ifp = 1;
2367	else
2368		adapter->vf_ifp = 0;
2369}
2370
2371static int
2372igb_allocate_pci_resources(struct adapter *adapter)
2373{
2374	device_t	dev = adapter->dev;
2375	int		rid;
2376
2377	rid = PCIR_BAR(0);
2378	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2379	    &rid, RF_ACTIVE);
2380	if (adapter->pci_mem == NULL) {
2381		device_printf(dev, "Unable to allocate bus resource: memory\n");
2382		return (ENXIO);
2383	}
2384	adapter->osdep.mem_bus_space_tag =
2385	    rman_get_bustag(adapter->pci_mem);
2386	adapter->osdep.mem_bus_space_handle =
2387	    rman_get_bushandle(adapter->pci_mem);
2388	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2389
2390	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2391
2392	/* This will setup either MSI/X or MSI */
2393	adapter->msix = igb_setup_msix(adapter);
2394	adapter->hw.back = &adapter->osdep;
2395
2396	return (0);
2397}
2398
2399/*********************************************************************
2400 *
2401 *  Setup the Legacy or MSI Interrupt handler
2402 *
2403 **********************************************************************/
2404static int
2405igb_allocate_legacy(struct adapter *adapter)
2406{
2407	device_t		dev = adapter->dev;
2408	struct igb_queue	*que = adapter->queues;
2409	struct tx_ring		*txr = adapter->tx_rings;
2410	int			error, rid = 0;
2411
2412	/* Turn off all interrupts */
2413	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2414
2415	/* MSI RID is 1 */
2416	if (adapter->msix == 1)
2417		rid = 1;
2418
2419	/* We allocate a single interrupt resource */
2420	adapter->res = bus_alloc_resource_any(dev,
2421	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2422	if (adapter->res == NULL) {
2423		device_printf(dev, "Unable to allocate bus resource: "
2424		    "interrupt\n");
2425		return (ENXIO);
2426	}
2427
2428#if __FreeBSD_version >= 800000
2429	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2430#endif
2431
2432	/*
2433	 * Try allocating a fast interrupt and the associated deferred
2434	 * processing contexts.
2435	 */
2436	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2437	/* Make tasklet for deferred link handling */
2438	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2439	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2440	    taskqueue_thread_enqueue, &que->tq);
2441	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2442	    device_get_nameunit(adapter->dev));
2443	if ((error = bus_setup_intr(dev, adapter->res,
2444	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2445	    adapter, &adapter->tag)) != 0) {
2446		device_printf(dev, "Failed to register fast interrupt "
2447			    "handler: %d\n", error);
2448		taskqueue_free(que->tq);
2449		que->tq = NULL;
2450		return (error);
2451	}
2452
2453	return (0);
2454}
2455
2456
2457/*********************************************************************
2458 *
2459 *  Setup the MSIX Queue Interrupt handlers:
2460 *
2461 **********************************************************************/
2462static int
2463igb_allocate_msix(struct adapter *adapter)
2464{
2465	device_t		dev = adapter->dev;
2466	struct igb_queue	*que = adapter->queues;
2467	int			error, rid, vector = 0;
2468
2469	/* Be sure to start with all interrupts disabled */
2470	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2471	E1000_WRITE_FLUSH(&adapter->hw);
2472
2473	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2474		rid = vector +1;
2475		que->res = bus_alloc_resource_any(dev,
2476		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2477		if (que->res == NULL) {
2478			device_printf(dev,
2479			    "Unable to allocate bus resource: "
2480			    "MSIX Queue Interrupt\n");
2481			return (ENXIO);
2482		}
2483		error = bus_setup_intr(dev, que->res,
2484	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2485		    igb_msix_que, que, &que->tag);
2486		if (error) {
2487			que->res = NULL;
2488			device_printf(dev, "Failed to register Queue handler");
2489			return (error);
2490		}
2491#if __FreeBSD_version >= 800504
2492		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2493#endif
2494		que->msix = vector;
2495		if (adapter->hw.mac.type == e1000_82575)
2496			que->eims = E1000_EICR_TX_QUEUE0 << i;
2497		else
2498			que->eims = 1 << vector;
2499		/*
2500		** Bind the msix vector, and thus the
2501		** rings to the corresponding cpu.
2502		*/
2503		if (adapter->num_queues > 1) {
2504			if (igb_last_bind_cpu < 0)
2505				igb_last_bind_cpu = CPU_FIRST();
2506			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2507			device_printf(dev,
2508				"Bound queue %d to cpu %d\n",
2509				i,igb_last_bind_cpu);
2510			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2511			igb_last_bind_cpu = igb_last_bind_cpu % mp_ncpus;
2512		}
2513#if __FreeBSD_version >= 800000
2514		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2515		    que->txr);
2516#endif
2517		/* Make tasklet for deferred handling */
2518		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2519		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2520		    taskqueue_thread_enqueue, &que->tq);
2521		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2522		    device_get_nameunit(adapter->dev));
2523	}
2524
2525	/* And Link */
2526	rid = vector + 1;
2527	adapter->res = bus_alloc_resource_any(dev,
2528	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2529	if (adapter->res == NULL) {
2530		device_printf(dev,
2531		    "Unable to allocate bus resource: "
2532		    "MSIX Link Interrupt\n");
2533		return (ENXIO);
2534	}
2535	if ((error = bus_setup_intr(dev, adapter->res,
2536	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2537	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2538		device_printf(dev, "Failed to register Link handler");
2539		return (error);
2540	}
2541#if __FreeBSD_version >= 800504
2542	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2543#endif
2544	adapter->linkvec = vector;
2545
2546	return (0);
2547}
2548
2549
2550static void
2551igb_configure_queues(struct adapter *adapter)
2552{
2553	struct	e1000_hw	*hw = &adapter->hw;
2554	struct	igb_queue	*que;
2555	u32			tmp, ivar = 0, newitr = 0;
2556
2557	/* First turn on RSS capability */
2558	if (adapter->hw.mac.type != e1000_82575)
2559		E1000_WRITE_REG(hw, E1000_GPIE,
2560		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2561		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2562
2563	/* Turn on MSIX */
2564	switch (adapter->hw.mac.type) {
2565	case e1000_82580:
2566	case e1000_i350:
2567	case e1000_vfadapt:
2568	case e1000_vfadapt_i350:
2569		/* RX entries */
2570		for (int i = 0; i < adapter->num_queues; i++) {
2571			u32 index = i >> 1;
2572			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2573			que = &adapter->queues[i];
2574			if (i & 1) {
2575				ivar &= 0xFF00FFFF;
2576				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2577			} else {
2578				ivar &= 0xFFFFFF00;
2579				ivar |= que->msix | E1000_IVAR_VALID;
2580			}
2581			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2582		}
2583		/* TX entries */
2584		for (int i = 0; i < adapter->num_queues; i++) {
2585			u32 index = i >> 1;
2586			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2587			que = &adapter->queues[i];
2588			if (i & 1) {
2589				ivar &= 0x00FFFFFF;
2590				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2591			} else {
2592				ivar &= 0xFFFF00FF;
2593				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2594			}
2595			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2596			adapter->que_mask |= que->eims;
2597		}
2598
2599		/* And for the link interrupt */
2600		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2601		adapter->link_mask = 1 << adapter->linkvec;
2602		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2603		break;
2604	case e1000_82576:
2605		/* RX entries */
2606		for (int i = 0; i < adapter->num_queues; i++) {
2607			u32 index = i & 0x7; /* Each IVAR has two entries */
2608			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2609			que = &adapter->queues[i];
2610			if (i < 8) {
2611				ivar &= 0xFFFFFF00;
2612				ivar |= que->msix | E1000_IVAR_VALID;
2613			} else {
2614				ivar &= 0xFF00FFFF;
2615				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2616			}
2617			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2618			adapter->que_mask |= que->eims;
2619		}
2620		/* TX entries */
2621		for (int i = 0; i < adapter->num_queues; i++) {
2622			u32 index = i & 0x7; /* Each IVAR has two entries */
2623			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2624			que = &adapter->queues[i];
2625			if (i < 8) {
2626				ivar &= 0xFFFF00FF;
2627				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2628			} else {
2629				ivar &= 0x00FFFFFF;
2630				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2631			}
2632			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2633			adapter->que_mask |= que->eims;
2634		}
2635
2636		/* And for the link interrupt */
2637		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2638		adapter->link_mask = 1 << adapter->linkvec;
2639		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2640		break;
2641
2642	case e1000_82575:
2643                /* enable MSI-X support*/
2644		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2645                tmp |= E1000_CTRL_EXT_PBA_CLR;
2646                /* Auto-Mask interrupts upon ICR read. */
2647                tmp |= E1000_CTRL_EXT_EIAME;
2648                tmp |= E1000_CTRL_EXT_IRCA;
2649                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2650
2651		/* Queues */
2652		for (int i = 0; i < adapter->num_queues; i++) {
2653			que = &adapter->queues[i];
2654			tmp = E1000_EICR_RX_QUEUE0 << i;
2655			tmp |= E1000_EICR_TX_QUEUE0 << i;
2656			que->eims = tmp;
2657			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2658			    i, que->eims);
2659			adapter->que_mask |= que->eims;
2660		}
2661
2662		/* Link */
2663		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2664		    E1000_EIMS_OTHER);
2665		adapter->link_mask |= E1000_EIMS_OTHER;
2666	default:
2667		break;
2668	}
2669
2670	/* Set the starting interrupt rate */
2671	if (igb_max_interrupt_rate > 0)
2672		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2673
2674        if (hw->mac.type == e1000_82575)
2675                newitr |= newitr << 16;
2676        else
2677                newitr |= E1000_EITR_CNT_IGNR;
2678
2679	for (int i = 0; i < adapter->num_queues; i++) {
2680		que = &adapter->queues[i];
2681		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2682	}
2683
2684	return;
2685}
2686
2687
2688static void
2689igb_free_pci_resources(struct adapter *adapter)
2690{
2691	struct		igb_queue *que = adapter->queues;
2692	device_t	dev = adapter->dev;
2693	int		rid;
2694
2695	/*
2696	** There is a slight possibility of a failure mode
2697	** in attach that will result in entering this function
2698	** before interrupt resources have been initialized, and
2699	** in that case we do not want to execute the loops below
2700	** We can detect this reliably by the state of the adapter
2701	** res pointer.
2702	*/
2703	if (adapter->res == NULL)
2704		goto mem;
2705
2706	/*
2707	 * First release all the interrupt resources:
2708	 */
2709	for (int i = 0; i < adapter->num_queues; i++, que++) {
2710		rid = que->msix + 1;
2711		if (que->tag != NULL) {
2712			bus_teardown_intr(dev, que->res, que->tag);
2713			que->tag = NULL;
2714		}
2715		if (que->res != NULL)
2716			bus_release_resource(dev,
2717			    SYS_RES_IRQ, rid, que->res);
2718	}
2719
2720	/* Clean the Legacy or Link interrupt last */
2721	if (adapter->linkvec) /* we are doing MSIX */
2722		rid = adapter->linkvec + 1;
2723	else
2724		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2725
2726	que = adapter->queues;
2727	if (adapter->tag != NULL) {
2728		taskqueue_drain(que->tq, &adapter->link_task);
2729		bus_teardown_intr(dev, adapter->res, adapter->tag);
2730		adapter->tag = NULL;
2731	}
2732	if (adapter->res != NULL)
2733		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2734
2735	for (int i = 0; i < adapter->num_queues; i++, que++) {
2736		if (que->tq != NULL) {
2737#if __FreeBSD_version >= 800000
2738			taskqueue_drain(que->tq, &que->txr->txq_task);
2739#endif
2740			taskqueue_drain(que->tq, &que->que_task);
2741			taskqueue_free(que->tq);
2742		}
2743	}
2744mem:
2745	if (adapter->msix)
2746		pci_release_msi(dev);
2747
2748	if (adapter->msix_mem != NULL)
2749		bus_release_resource(dev, SYS_RES_MEMORY,
2750		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2751
2752	if (adapter->pci_mem != NULL)
2753		bus_release_resource(dev, SYS_RES_MEMORY,
2754		    PCIR_BAR(0), adapter->pci_mem);
2755
2756}
2757
2758/*
2759 * Setup Either MSI/X or MSI
2760 */
2761static int
2762igb_setup_msix(struct adapter *adapter)
2763{
2764	device_t dev = adapter->dev;
2765	int rid, want, queues, msgs;
2766
2767	/* tuneable override */
2768	if (igb_enable_msix == 0)
2769		goto msi;
2770
2771	/* First try MSI/X */
2772	rid = PCIR_BAR(IGB_MSIX_BAR);
2773	adapter->msix_mem = bus_alloc_resource_any(dev,
2774	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2775       	if (!adapter->msix_mem) {
2776		/* May not be enabled */
2777		device_printf(adapter->dev,
2778		    "Unable to map MSIX table \n");
2779		goto msi;
2780	}
2781
2782	msgs = pci_msix_count(dev);
2783	if (msgs == 0) { /* system has msix disabled */
2784		bus_release_resource(dev, SYS_RES_MEMORY,
2785		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2786		adapter->msix_mem = NULL;
2787		goto msi;
2788	}
2789
2790	/* Figure out a reasonable auto config value */
2791	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2792
2793	/* Manual override */
2794	if (igb_num_queues != 0)
2795		queues = igb_num_queues;
2796	if (queues > 8)  /* max queues */
2797		queues = 8;
2798
2799	/* Can have max of 4 queues on 82575 */
2800	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2801		queues = 4;
2802
2803	/* Limit the VF devices to one queue */
2804	if (adapter->vf_ifp)
2805		queues = 1;
2806
2807	/*
2808	** One vector (RX/TX pair) per queue
2809	** plus an additional for Link interrupt
2810	*/
2811	want = queues + 1;
2812	if (msgs >= want)
2813		msgs = want;
2814	else {
2815               	device_printf(adapter->dev,
2816		    "MSIX Configuration Problem, "
2817		    "%d vectors configured, but %d queues wanted!\n",
2818		    msgs, want);
2819		return (0);
2820	}
2821	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2822               	device_printf(adapter->dev,
2823		    "Using MSIX interrupts with %d vectors\n", msgs);
2824		adapter->num_queues = queues;
2825		return (msgs);
2826	}
2827msi:
2828       	msgs = pci_msi_count(dev);
2829	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2830		device_printf(adapter->dev," Using MSI interrupt\n");
2831		return (msgs);
2832	}
2833	return (0);
2834}
2835
2836/*********************************************************************
2837 *
2838 *  Set up an fresh starting state
2839 *
2840 **********************************************************************/
2841static void
2842igb_reset(struct adapter *adapter)
2843{
2844	device_t	dev = adapter->dev;
2845	struct e1000_hw *hw = &adapter->hw;
2846	struct e1000_fc_info *fc = &hw->fc;
2847	struct ifnet	*ifp = adapter->ifp;
2848	u32		pba = 0;
2849	u16		hwm;
2850
2851	INIT_DEBUGOUT("igb_reset: begin");
2852
2853	/* Let the firmware know the OS is in control */
2854	igb_get_hw_control(adapter);
2855
2856	/*
2857	 * Packet Buffer Allocation (PBA)
2858	 * Writing PBA sets the receive portion of the buffer
2859	 * the remainder is used for the transmit buffer.
2860	 */
2861	switch (hw->mac.type) {
2862	case e1000_82575:
2863		pba = E1000_PBA_32K;
2864		break;
2865	case e1000_82576:
2866	case e1000_vfadapt:
2867		pba = E1000_READ_REG(hw, E1000_RXPBS);
2868		pba &= E1000_RXPBS_SIZE_MASK_82576;
2869		break;
2870	case e1000_82580:
2871	case e1000_i350:
2872	case e1000_vfadapt_i350:
2873		pba = E1000_READ_REG(hw, E1000_RXPBS);
2874		pba = e1000_rxpbs_adjust_82580(pba);
2875		break;
2876	default:
2877		break;
2878	}
2879
2880	/* Special needs in case of Jumbo frames */
2881	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2882		u32 tx_space, min_tx, min_rx;
2883		pba = E1000_READ_REG(hw, E1000_PBA);
2884		tx_space = pba >> 16;
2885		pba &= 0xffff;
2886		min_tx = (adapter->max_frame_size +
2887		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2888		min_tx = roundup2(min_tx, 1024);
2889		min_tx >>= 10;
2890                min_rx = adapter->max_frame_size;
2891                min_rx = roundup2(min_rx, 1024);
2892                min_rx >>= 10;
2893		if (tx_space < min_tx &&
2894		    ((min_tx - tx_space) < pba)) {
2895			pba = pba - (min_tx - tx_space);
2896			/*
2897                         * if short on rx space, rx wins
2898                         * and must trump tx adjustment
2899			 */
2900                        if (pba < min_rx)
2901                                pba = min_rx;
2902		}
2903		E1000_WRITE_REG(hw, E1000_PBA, pba);
2904	}
2905
2906	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2907
2908	/*
2909	 * These parameters control the automatic generation (Tx) and
2910	 * response (Rx) to Ethernet PAUSE frames.
2911	 * - High water mark should allow for at least two frames to be
2912	 *   received after sending an XOFF.
2913	 * - Low water mark works best when it is very near the high water mark.
2914	 *   This allows the receiver to restart by sending XON when it has
2915	 *   drained a bit.
2916	 */
2917	hwm = min(((pba << 10) * 9 / 10),
2918	    ((pba << 10) - 2 * adapter->max_frame_size));
2919
2920	if (hw->mac.type < e1000_82576) {
2921		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2922		fc->low_water = fc->high_water - 8;
2923	} else {
2924		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2925		fc->low_water = fc->high_water - 16;
2926	}
2927
2928	fc->pause_time = IGB_FC_PAUSE_TIME;
2929	fc->send_xon = TRUE;
2930	if (adapter->fc)
2931		fc->requested_mode = adapter->fc;
2932	else
2933		fc->requested_mode = e1000_fc_default;
2934
2935	/* Issue a global reset */
2936	e1000_reset_hw(hw);
2937	E1000_WRITE_REG(hw, E1000_WUC, 0);
2938
2939	if (e1000_init_hw(hw) < 0)
2940		device_printf(dev, "Hardware Initialization Failed\n");
2941
2942	/* Setup DMA Coalescing */
2943	if (hw->mac.type == e1000_i350) {
2944		u32 reg = ~E1000_DMACR_DMAC_EN;
2945
2946		if (adapter->dmac == 0) { /* Disabling it */
2947			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2948			goto reset_out;
2949		}
2950
2951		hwm = (pba - 4) << 10;
2952		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2953		    & E1000_DMACR_DMACTHR_MASK);
2954
2955		/* transition to L0x or L1 if available..*/
2956		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2957
2958		/* timer = value in adapter->dmac in 32usec intervals */
2959		reg |= (adapter->dmac >> 5);
2960		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2961
2962		/* No lower threshold */
2963		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2964
2965		/* set hwm to PBA -  2 * max frame size */
2966		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2967
2968		/* Set the interval before transition */
2969		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2970		reg |= 0x800000FF; /* 255 usec */
2971		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2972
2973		/* free space in tx packet buffer to wake from DMA coal */
2974		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2975		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2976
2977		/* make low power state decision controlled by DMA coal */
2978		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2979		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2980		    reg | E1000_PCIEMISC_LX_DECISION);
2981		device_printf(dev, "DMA Coalescing enabled\n");
2982	}
2983
2984reset_out:
2985	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2986	e1000_get_phy_info(hw);
2987	e1000_check_for_link(hw);
2988	return;
2989}
2990
2991/*********************************************************************
2992 *
2993 *  Setup networking device structure and register an interface.
2994 *
2995 **********************************************************************/
2996static int
2997igb_setup_interface(device_t dev, struct adapter *adapter)
2998{
2999	struct ifnet   *ifp;
3000
3001	INIT_DEBUGOUT("igb_setup_interface: begin");
3002
3003	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3004	if (ifp == NULL) {
3005		device_printf(dev, "can not allocate ifnet structure\n");
3006		return (-1);
3007	}
3008	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3009	ifp->if_init =  igb_init;
3010	ifp->if_softc = adapter;
3011	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3012	ifp->if_ioctl = igb_ioctl;
3013	ifp->if_start = igb_start;
3014#if __FreeBSD_version >= 800000
3015	ifp->if_transmit = igb_mq_start;
3016	ifp->if_qflush = igb_qflush;
3017#endif
3018	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3019	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3020	IFQ_SET_READY(&ifp->if_snd);
3021
3022	ether_ifattach(ifp, adapter->hw.mac.addr);
3023
3024	ifp->if_capabilities = ifp->if_capenable = 0;
3025
3026	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3027	ifp->if_capabilities |= IFCAP_TSO4;
3028	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3029	ifp->if_capenable = ifp->if_capabilities;
3030
3031	/* Don't enable LRO by default */
3032	ifp->if_capabilities |= IFCAP_LRO;
3033
3034#ifdef DEVICE_POLLING
3035	ifp->if_capabilities |= IFCAP_POLLING;
3036#endif
3037
3038	/*
3039	 * Tell the upper layer(s) we
3040	 * support full VLAN capability.
3041	 */
3042	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3043	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3044			     |  IFCAP_VLAN_HWTSO
3045			     |  IFCAP_VLAN_MTU;
3046	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3047			  |  IFCAP_VLAN_HWTSO
3048			  |  IFCAP_VLAN_MTU;
3049
3050	/*
3051	** Don't turn this on by default, if vlans are
3052	** created on another pseudo device (eg. lagg)
3053	** then vlan events are not passed thru, breaking
3054	** operation, but with HW FILTER off it works. If
3055	** using vlans directly on the igb driver you can
3056	** enable this and get full hardware tag filtering.
3057	*/
3058	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3059
3060	/*
3061	 * Specify the media types supported by this adapter and register
3062	 * callbacks to update media and link information
3063	 */
3064	ifmedia_init(&adapter->media, IFM_IMASK,
3065	    igb_media_change, igb_media_status);
3066	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3067	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3068		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3069			    0, NULL);
3070		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3071	} else {
3072		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3073		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3074			    0, NULL);
3075		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3076			    0, NULL);
3077		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3078			    0, NULL);
3079		if (adapter->hw.phy.type != e1000_phy_ife) {
3080			ifmedia_add(&adapter->media,
3081				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3082			ifmedia_add(&adapter->media,
3083				IFM_ETHER | IFM_1000_T, 0, NULL);
3084		}
3085	}
3086	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3087	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3088	return (0);
3089}
3090
3091
3092/*
3093 * Manage DMA'able memory.
3094 */
3095static void
3096igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3097{
3098	if (error)
3099		return;
3100	*(bus_addr_t *) arg = segs[0].ds_addr;
3101}
3102
3103static int
3104igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3105        struct igb_dma_alloc *dma, int mapflags)
3106{
3107	int error;
3108
3109	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3110				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3111				BUS_SPACE_MAXADDR,	/* lowaddr */
3112				BUS_SPACE_MAXADDR,	/* highaddr */
3113				NULL, NULL,		/* filter, filterarg */
3114				size,			/* maxsize */
3115				1,			/* nsegments */
3116				size,			/* maxsegsize */
3117				0,			/* flags */
3118				NULL,			/* lockfunc */
3119				NULL,			/* lockarg */
3120				&dma->dma_tag);
3121	if (error) {
3122		device_printf(adapter->dev,
3123		    "%s: bus_dma_tag_create failed: %d\n",
3124		    __func__, error);
3125		goto fail_0;
3126	}
3127
3128	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3129	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3130	if (error) {
3131		device_printf(adapter->dev,
3132		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3133		    __func__, (uintmax_t)size, error);
3134		goto fail_2;
3135	}
3136
3137	dma->dma_paddr = 0;
3138	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3139	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3140	if (error || dma->dma_paddr == 0) {
3141		device_printf(adapter->dev,
3142		    "%s: bus_dmamap_load failed: %d\n",
3143		    __func__, error);
3144		goto fail_3;
3145	}
3146
3147	return (0);
3148
3149fail_3:
3150	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3151fail_2:
3152	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3153	bus_dma_tag_destroy(dma->dma_tag);
3154fail_0:
3155	dma->dma_map = NULL;
3156	dma->dma_tag = NULL;
3157
3158	return (error);
3159}
3160
3161static void
3162igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3163{
3164	if (dma->dma_tag == NULL)
3165		return;
3166	if (dma->dma_map != NULL) {
3167		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3168		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3169		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3170		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3171		dma->dma_map = NULL;
3172	}
3173	bus_dma_tag_destroy(dma->dma_tag);
3174	dma->dma_tag = NULL;
3175}
3176
3177
3178/*********************************************************************
3179 *
3180 *  Allocate memory for the transmit and receive rings, and then
3181 *  the descriptors associated with each, called only once at attach.
3182 *
3183 **********************************************************************/
3184static int
3185igb_allocate_queues(struct adapter *adapter)
3186{
3187	device_t dev = adapter->dev;
3188	struct igb_queue	*que = NULL;
3189	struct tx_ring		*txr = NULL;
3190	struct rx_ring		*rxr = NULL;
3191	int rsize, tsize, error = E1000_SUCCESS;
3192	int txconf = 0, rxconf = 0;
3193
3194	/* First allocate the top level queue structs */
3195	if (!(adapter->queues =
3196	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3197	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3198		device_printf(dev, "Unable to allocate queue memory\n");
3199		error = ENOMEM;
3200		goto fail;
3201	}
3202
3203	/* Next allocate the TX ring struct memory */
3204	if (!(adapter->tx_rings =
3205	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3206	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3207		device_printf(dev, "Unable to allocate TX ring memory\n");
3208		error = ENOMEM;
3209		goto tx_fail;
3210	}
3211
3212	/* Now allocate the RX */
3213	if (!(adapter->rx_rings =
3214	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3215	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3216		device_printf(dev, "Unable to allocate RX ring memory\n");
3217		error = ENOMEM;
3218		goto rx_fail;
3219	}
3220
3221	tsize = roundup2(adapter->num_tx_desc *
3222	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3223	/*
3224	 * Now set up the TX queues, txconf is needed to handle the
3225	 * possibility that things fail midcourse and we need to
3226	 * undo memory gracefully
3227	 */
3228	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3229		/* Set up some basics */
3230		txr = &adapter->tx_rings[i];
3231		txr->adapter = adapter;
3232		txr->me = i;
3233
3234		/* Initialize the TX lock */
3235		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3236		    device_get_nameunit(dev), txr->me);
3237		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3238
3239		if (igb_dma_malloc(adapter, tsize,
3240			&txr->txdma, BUS_DMA_NOWAIT)) {
3241			device_printf(dev,
3242			    "Unable to allocate TX Descriptor memory\n");
3243			error = ENOMEM;
3244			goto err_tx_desc;
3245		}
3246		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3247		bzero((void *)txr->tx_base, tsize);
3248
3249        	/* Now allocate transmit buffers for the ring */
3250        	if (igb_allocate_transmit_buffers(txr)) {
3251			device_printf(dev,
3252			    "Critical Failure setting up transmit buffers\n");
3253			error = ENOMEM;
3254			goto err_tx_desc;
3255        	}
3256#if __FreeBSD_version >= 800000
3257		/* Allocate a buf ring */
3258		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3259		    M_WAITOK, &txr->tx_mtx);
3260#endif
3261	}
3262
3263	/*
3264	 * Next the RX queues...
3265	 */
3266	rsize = roundup2(adapter->num_rx_desc *
3267	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3268	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3269		rxr = &adapter->rx_rings[i];
3270		rxr->adapter = adapter;
3271		rxr->me = i;
3272
3273		/* Initialize the RX lock */
3274		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3275		    device_get_nameunit(dev), txr->me);
3276		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3277
3278		if (igb_dma_malloc(adapter, rsize,
3279			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3280			device_printf(dev,
3281			    "Unable to allocate RxDescriptor memory\n");
3282			error = ENOMEM;
3283			goto err_rx_desc;
3284		}
3285		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3286		bzero((void *)rxr->rx_base, rsize);
3287
3288        	/* Allocate receive buffers for the ring*/
3289		if (igb_allocate_receive_buffers(rxr)) {
3290			device_printf(dev,
3291			    "Critical Failure setting up receive buffers\n");
3292			error = ENOMEM;
3293			goto err_rx_desc;
3294		}
3295	}
3296
3297	/*
3298	** Finally set up the queue holding structs
3299	*/
3300	for (int i = 0; i < adapter->num_queues; i++) {
3301		que = &adapter->queues[i];
3302		que->adapter = adapter;
3303		que->txr = &adapter->tx_rings[i];
3304		que->rxr = &adapter->rx_rings[i];
3305	}
3306
3307	return (0);
3308
3309err_rx_desc:
3310	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3311		igb_dma_free(adapter, &rxr->rxdma);
3312err_tx_desc:
3313	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3314		igb_dma_free(adapter, &txr->txdma);
3315	free(adapter->rx_rings, M_DEVBUF);
3316rx_fail:
3317#if __FreeBSD_version >= 800000
3318	buf_ring_free(txr->br, M_DEVBUF);
3319#endif
3320	free(adapter->tx_rings, M_DEVBUF);
3321tx_fail:
3322	free(adapter->queues, M_DEVBUF);
3323fail:
3324	return (error);
3325}
3326
3327/*********************************************************************
3328 *
3329 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3330 *  the information needed to transmit a packet on the wire. This is
3331 *  called only once at attach, setup is done every reset.
3332 *
3333 **********************************************************************/
3334static int
3335igb_allocate_transmit_buffers(struct tx_ring *txr)
3336{
3337	struct adapter *adapter = txr->adapter;
3338	device_t dev = adapter->dev;
3339	struct igb_tx_buffer *txbuf;
3340	int error, i;
3341
3342	/*
3343	 * Setup DMA descriptor areas.
3344	 */
3345	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3346			       1, 0,			/* alignment, bounds */
3347			       BUS_SPACE_MAXADDR,	/* lowaddr */
3348			       BUS_SPACE_MAXADDR,	/* highaddr */
3349			       NULL, NULL,		/* filter, filterarg */
3350			       IGB_TSO_SIZE,		/* maxsize */
3351			       IGB_MAX_SCATTER,		/* nsegments */
3352			       PAGE_SIZE,		/* maxsegsize */
3353			       0,			/* flags */
3354			       NULL,			/* lockfunc */
3355			       NULL,			/* lockfuncarg */
3356			       &txr->txtag))) {
3357		device_printf(dev,"Unable to allocate TX DMA tag\n");
3358		goto fail;
3359	}
3360
3361	if (!(txr->tx_buffers =
3362	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3363	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3364		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3365		error = ENOMEM;
3366		goto fail;
3367	}
3368
3369        /* Create the descriptor buffer dma maps */
3370	txbuf = txr->tx_buffers;
3371	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3372		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3373		if (error != 0) {
3374			device_printf(dev, "Unable to create TX DMA map\n");
3375			goto fail;
3376		}
3377	}
3378
3379	return 0;
3380fail:
3381	/* We free all, it handles case where we are in the middle */
3382	igb_free_transmit_structures(adapter);
3383	return (error);
3384}
3385
3386/*********************************************************************
3387 *
3388 *  Initialize a transmit ring.
3389 *
3390 **********************************************************************/
3391static void
3392igb_setup_transmit_ring(struct tx_ring *txr)
3393{
3394	struct adapter *adapter = txr->adapter;
3395	struct igb_tx_buffer *txbuf;
3396	int i;
3397#ifdef DEV_NETMAP
3398	struct netmap_adapter *na = NA(adapter->ifp);
3399	struct netmap_slot *slot;
3400#endif /* DEV_NETMAP */
3401
3402	/* Clear the old descriptor contents */
3403	IGB_TX_LOCK(txr);
3404#ifdef DEV_NETMAP
3405	slot = netmap_reset(na, NR_TX, txr->me, 0);
3406#endif /* DEV_NETMAP */
3407	bzero((void *)txr->tx_base,
3408	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3409	/* Reset indices */
3410	txr->next_avail_desc = 0;
3411	txr->next_to_clean = 0;
3412
3413	/* Free any existing tx buffers. */
3414        txbuf = txr->tx_buffers;
3415	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3416		if (txbuf->m_head != NULL) {
3417			bus_dmamap_sync(txr->txtag, txbuf->map,
3418			    BUS_DMASYNC_POSTWRITE);
3419			bus_dmamap_unload(txr->txtag, txbuf->map);
3420			m_freem(txbuf->m_head);
3421			txbuf->m_head = NULL;
3422		}
3423#ifdef DEV_NETMAP
3424		if (slot) {
3425			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3426			/* no need to set the address */
3427			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3428		}
3429#endif /* DEV_NETMAP */
3430		/* clear the watch index */
3431		txbuf->next_eop = -1;
3432        }
3433
3434	/* Set number of descriptors available */
3435	txr->tx_avail = adapter->num_tx_desc;
3436
3437	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3438	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3439	IGB_TX_UNLOCK(txr);
3440}
3441
3442/*********************************************************************
3443 *
3444 *  Initialize all transmit rings.
3445 *
3446 **********************************************************************/
3447static void
3448igb_setup_transmit_structures(struct adapter *adapter)
3449{
3450	struct tx_ring *txr = adapter->tx_rings;
3451
3452	for (int i = 0; i < adapter->num_queues; i++, txr++)
3453		igb_setup_transmit_ring(txr);
3454
3455	return;
3456}
3457
3458/*********************************************************************
3459 *
3460 *  Enable transmit unit.
3461 *
3462 **********************************************************************/
3463static void
3464igb_initialize_transmit_units(struct adapter *adapter)
3465{
3466	struct tx_ring	*txr = adapter->tx_rings;
3467	struct e1000_hw *hw = &adapter->hw;
3468	u32		tctl, txdctl;
3469
3470	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3471	tctl = txdctl = 0;
3472
3473	/* Setup the Tx Descriptor Rings */
3474	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3475		u64 bus_addr = txr->txdma.dma_paddr;
3476
3477		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3478		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3479		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3480		    (uint32_t)(bus_addr >> 32));
3481		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3482		    (uint32_t)bus_addr);
3483
3484		/* Setup the HW Tx Head and Tail descriptor pointers */
3485		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3486		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3487
3488		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3489		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3490		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3491
3492		txr->queue_status = IGB_QUEUE_IDLE;
3493
3494		txdctl |= IGB_TX_PTHRESH;
3495		txdctl |= IGB_TX_HTHRESH << 8;
3496		txdctl |= IGB_TX_WTHRESH << 16;
3497		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3498		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3499	}
3500
3501	if (adapter->vf_ifp)
3502		return;
3503
3504	e1000_config_collision_dist(hw);
3505
3506	/* Program the Transmit Control Register */
3507	tctl = E1000_READ_REG(hw, E1000_TCTL);
3508	tctl &= ~E1000_TCTL_CT;
3509	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3510		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3511
3512	/* This write will effectively turn on the transmit unit. */
3513	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3514}
3515
3516/*********************************************************************
3517 *
3518 *  Free all transmit rings.
3519 *
3520 **********************************************************************/
3521static void
3522igb_free_transmit_structures(struct adapter *adapter)
3523{
3524	struct tx_ring *txr = adapter->tx_rings;
3525
3526	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3527		IGB_TX_LOCK(txr);
3528		igb_free_transmit_buffers(txr);
3529		igb_dma_free(adapter, &txr->txdma);
3530		IGB_TX_UNLOCK(txr);
3531		IGB_TX_LOCK_DESTROY(txr);
3532	}
3533	free(adapter->tx_rings, M_DEVBUF);
3534}
3535
3536/*********************************************************************
3537 *
3538 *  Free transmit ring related data structures.
3539 *
3540 **********************************************************************/
3541static void
3542igb_free_transmit_buffers(struct tx_ring *txr)
3543{
3544	struct adapter *adapter = txr->adapter;
3545	struct igb_tx_buffer *tx_buffer;
3546	int             i;
3547
3548	INIT_DEBUGOUT("free_transmit_ring: begin");
3549
3550	if (txr->tx_buffers == NULL)
3551		return;
3552
3553	tx_buffer = txr->tx_buffers;
3554	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3555		if (tx_buffer->m_head != NULL) {
3556			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3557			    BUS_DMASYNC_POSTWRITE);
3558			bus_dmamap_unload(txr->txtag,
3559			    tx_buffer->map);
3560			m_freem(tx_buffer->m_head);
3561			tx_buffer->m_head = NULL;
3562			if (tx_buffer->map != NULL) {
3563				bus_dmamap_destroy(txr->txtag,
3564				    tx_buffer->map);
3565				tx_buffer->map = NULL;
3566			}
3567		} else if (tx_buffer->map != NULL) {
3568			bus_dmamap_unload(txr->txtag,
3569			    tx_buffer->map);
3570			bus_dmamap_destroy(txr->txtag,
3571			    tx_buffer->map);
3572			tx_buffer->map = NULL;
3573		}
3574	}
3575#if __FreeBSD_version >= 800000
3576	if (txr->br != NULL)
3577		buf_ring_free(txr->br, M_DEVBUF);
3578#endif
3579	if (txr->tx_buffers != NULL) {
3580		free(txr->tx_buffers, M_DEVBUF);
3581		txr->tx_buffers = NULL;
3582	}
3583	if (txr->txtag != NULL) {
3584		bus_dma_tag_destroy(txr->txtag);
3585		txr->txtag = NULL;
3586	}
3587	return;
3588}
3589
3590/**********************************************************************
3591 *
3592 *  Setup work for hardware segmentation offload (TSO)
3593 *
3594 **********************************************************************/
3595static bool
3596igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3597	struct ip *ip, struct tcphdr *th)
3598{
3599	struct adapter *adapter = txr->adapter;
3600	struct e1000_adv_tx_context_desc *TXD;
3601	struct igb_tx_buffer        *tx_buffer;
3602	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3603	u32 mss_l4len_idx = 0;
3604	u16 vtag = 0;
3605	int ctxd, ip_hlen, tcp_hlen;
3606
3607	ctxd = txr->next_avail_desc;
3608	tx_buffer = &txr->tx_buffers[ctxd];
3609	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3610
3611	ip->ip_sum = 0;
3612	ip_hlen = ip->ip_hl << 2;
3613	tcp_hlen = th->th_off << 2;
3614
3615	/* VLAN MACLEN IPLEN */
3616	if (mp->m_flags & M_VLANTAG) {
3617		vtag = htole16(mp->m_pkthdr.ether_vtag);
3618		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3619	}
3620
3621	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3622	vlan_macip_lens |= ip_hlen;
3623	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3624
3625	/* ADV DTYPE TUCMD */
3626	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3627	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3628	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3629	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3630
3631	/* MSS L4LEN IDX */
3632	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3633	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3634	/* 82575 needs the queue index added */
3635	if (adapter->hw.mac.type == e1000_82575)
3636		mss_l4len_idx |= txr->me << 4;
3637	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3638
3639	TXD->seqnum_seed = htole32(0);
3640	tx_buffer->m_head = NULL;
3641	tx_buffer->next_eop = -1;
3642
3643	if (++ctxd == adapter->num_tx_desc)
3644		ctxd = 0;
3645
3646	txr->tx_avail--;
3647	txr->next_avail_desc = ctxd;
3648	return TRUE;
3649}
3650
3651
3652/*********************************************************************
3653 *
3654 *  Context Descriptor setup for VLAN or CSUM
3655 *
3656 **********************************************************************/
3657
3658static bool
3659igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3660{
3661	struct adapter *adapter = txr->adapter;
3662	struct e1000_adv_tx_context_desc *TXD;
3663	struct igb_tx_buffer        *tx_buffer;
3664	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3665	struct ether_vlan_header *eh;
3666	struct ip *ip = NULL;
3667	struct ip6_hdr *ip6;
3668	int  ehdrlen, ctxd, ip_hlen = 0;
3669	u16	etype, vtag = 0;
3670	u8	ipproto = 0;
3671	bool	offload = TRUE;
3672
3673	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3674		offload = FALSE;
3675
3676	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3677	ctxd = txr->next_avail_desc;
3678	tx_buffer = &txr->tx_buffers[ctxd];
3679	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3680
3681	/*
3682	** In advanced descriptors the vlan tag must
3683	** be placed into the context descriptor, thus
3684	** we need to be here just for that setup.
3685	*/
3686	if (mp->m_flags & M_VLANTAG) {
3687		vtag = htole16(mp->m_pkthdr.ether_vtag);
3688		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3689	} else if (offload == FALSE)
3690		return FALSE;
3691
3692	/*
3693	 * Determine where frame payload starts.
3694	 * Jump over vlan headers if already present,
3695	 * helpful for QinQ too.
3696	 */
3697	eh = mtod(mp, struct ether_vlan_header *);
3698	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3699		etype = ntohs(eh->evl_proto);
3700		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3701	} else {
3702		etype = ntohs(eh->evl_encap_proto);
3703		ehdrlen = ETHER_HDR_LEN;
3704	}
3705
3706	/* Set the ether header length */
3707	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3708
3709	switch (etype) {
3710		case ETHERTYPE_IP:
3711			ip = (struct ip *)(mp->m_data + ehdrlen);
3712			ip_hlen = ip->ip_hl << 2;
3713			if (mp->m_len < ehdrlen + ip_hlen) {
3714				offload = FALSE;
3715				break;
3716			}
3717			ipproto = ip->ip_p;
3718			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3719			break;
3720		case ETHERTYPE_IPV6:
3721			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3722			ip_hlen = sizeof(struct ip6_hdr);
3723			ipproto = ip6->ip6_nxt;
3724			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3725			break;
3726		default:
3727			offload = FALSE;
3728			break;
3729	}
3730
3731	vlan_macip_lens |= ip_hlen;
3732	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3733
3734	switch (ipproto) {
3735		case IPPROTO_TCP:
3736			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3737				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3738			break;
3739		case IPPROTO_UDP:
3740			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3741				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3742			break;
3743#if __FreeBSD_version >= 800000
3744		case IPPROTO_SCTP:
3745			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3746				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3747			break;
3748#endif
3749		default:
3750			offload = FALSE;
3751			break;
3752	}
3753
3754	/* 82575 needs the queue index added */
3755	if (adapter->hw.mac.type == e1000_82575)
3756		mss_l4len_idx = txr->me << 4;
3757
3758	/* Now copy bits into descriptor */
3759	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3760	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3761	TXD->seqnum_seed = htole32(0);
3762	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3763
3764	tx_buffer->m_head = NULL;
3765	tx_buffer->next_eop = -1;
3766
3767	/* We've consumed the first desc, adjust counters */
3768	if (++ctxd == adapter->num_tx_desc)
3769		ctxd = 0;
3770	txr->next_avail_desc = ctxd;
3771	--txr->tx_avail;
3772
3773        return (offload);
3774}
3775
3776
3777/**********************************************************************
3778 *
3779 *  Examine each tx_buffer in the used queue. If the hardware is done
3780 *  processing the packet then free associated resources. The
3781 *  tx_buffer is put back on the free queue.
3782 *
3783 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3784 **********************************************************************/
3785static bool
3786igb_txeof(struct tx_ring *txr)
3787{
3788	struct adapter	*adapter = txr->adapter;
3789        int first, last, done, processed;
3790        struct igb_tx_buffer *tx_buffer;
3791        struct e1000_tx_desc   *tx_desc, *eop_desc;
3792	struct ifnet   *ifp = adapter->ifp;
3793
3794	IGB_TX_LOCK_ASSERT(txr);
3795
3796#ifdef DEV_NETMAP
3797	if (ifp->if_capenable & IFCAP_NETMAP) {
3798		struct netmap_adapter *na = NA(ifp);
3799
3800		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3801		IGB_TX_UNLOCK(txr);
3802		IGB_CORE_LOCK(adapter);
3803		selwakeuppri(&na->tx_si, PI_NET);
3804		IGB_CORE_UNLOCK(adapter);
3805		IGB_TX_LOCK(txr);
3806		return FALSE;
3807	}
3808#endif /* DEV_NETMAP */
3809        if (txr->tx_avail == adapter->num_tx_desc) {
3810		txr->queue_status = IGB_QUEUE_IDLE;
3811                return FALSE;
3812	}
3813
3814	processed = 0;
3815        first = txr->next_to_clean;
3816        tx_desc = &txr->tx_base[first];
3817        tx_buffer = &txr->tx_buffers[first];
3818	last = tx_buffer->next_eop;
3819        eop_desc = &txr->tx_base[last];
3820
3821	/*
3822	 * What this does is get the index of the
3823	 * first descriptor AFTER the EOP of the
3824	 * first packet, that way we can do the
3825	 * simple comparison on the inner while loop.
3826	 */
3827	if (++last == adapter->num_tx_desc)
3828 		last = 0;
3829	done = last;
3830
3831        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3832            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3833
3834        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3835		/* We clean the range of the packet */
3836		while (first != done) {
3837                	tx_desc->upper.data = 0;
3838                	tx_desc->lower.data = 0;
3839                	tx_desc->buffer_addr = 0;
3840                	++txr->tx_avail;
3841			++processed;
3842
3843			if (tx_buffer->m_head) {
3844				txr->bytes +=
3845				    tx_buffer->m_head->m_pkthdr.len;
3846				bus_dmamap_sync(txr->txtag,
3847				    tx_buffer->map,
3848				    BUS_DMASYNC_POSTWRITE);
3849				bus_dmamap_unload(txr->txtag,
3850				    tx_buffer->map);
3851
3852                        	m_freem(tx_buffer->m_head);
3853                        	tx_buffer->m_head = NULL;
3854                	}
3855			tx_buffer->next_eop = -1;
3856			txr->watchdog_time = ticks;
3857
3858	                if (++first == adapter->num_tx_desc)
3859				first = 0;
3860
3861	                tx_buffer = &txr->tx_buffers[first];
3862			tx_desc = &txr->tx_base[first];
3863		}
3864		++txr->packets;
3865		++ifp->if_opackets;
3866		/* See if we can continue to the next packet */
3867		last = tx_buffer->next_eop;
3868		if (last != -1) {
3869        		eop_desc = &txr->tx_base[last];
3870			/* Get new done point */
3871			if (++last == adapter->num_tx_desc) last = 0;
3872			done = last;
3873		} else
3874			break;
3875        }
3876        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3877            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3878
3879        txr->next_to_clean = first;
3880
3881	/*
3882	** Watchdog calculation, we know there's
3883	** work outstanding or the first return
3884	** would have been taken, so none processed
3885	** for too long indicates a hang.
3886	*/
3887	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3888		txr->queue_status |= IGB_QUEUE_HUNG;
3889        /*
3890         * If we have a minimum free,
3891         * clear depleted state bit
3892         */
3893        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3894                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3895
3896	/* All clean, turn off the watchdog */
3897	if (txr->tx_avail == adapter->num_tx_desc) {
3898		txr->queue_status = IGB_QUEUE_IDLE;
3899		return (FALSE);
3900        }
3901
3902	return (TRUE);
3903}
3904
3905/*********************************************************************
3906 *
3907 *  Refresh mbuf buffers for RX descriptor rings
3908 *   - now keeps its own state so discards due to resource
3909 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3910 *     it just returns, keeping its placeholder, thus it can simply
3911 *     be recalled to try again.
3912 *
3913 **********************************************************************/
3914static void
3915igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3916{
3917	struct adapter		*adapter = rxr->adapter;
3918	bus_dma_segment_t	hseg[1];
3919	bus_dma_segment_t	pseg[1];
3920	struct igb_rx_buf	*rxbuf;
3921	struct mbuf		*mh, *mp;
3922	int			i, j, nsegs, error;
3923	bool			refreshed = FALSE;
3924
3925	i = j = rxr->next_to_refresh;
3926	/*
3927	** Get one descriptor beyond
3928	** our work mark to control
3929	** the loop.
3930        */
3931	if (++j == adapter->num_rx_desc)
3932		j = 0;
3933
3934	while (j != limit) {
3935		rxbuf = &rxr->rx_buffers[i];
3936		/* No hdr mbuf used with header split off */
3937		if (rxr->hdr_split == FALSE)
3938			goto no_split;
3939		if (rxbuf->m_head == NULL) {
3940			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3941			if (mh == NULL)
3942				goto update;
3943		} else
3944			mh = rxbuf->m_head;
3945
3946		mh->m_pkthdr.len = mh->m_len = MHLEN;
3947		mh->m_len = MHLEN;
3948		mh->m_flags |= M_PKTHDR;
3949		/* Get the memory mapping */
3950		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3951		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3952		if (error != 0) {
3953			printf("Refresh mbufs: hdr dmamap load"
3954			    " failure - %d\n", error);
3955			m_free(mh);
3956			rxbuf->m_head = NULL;
3957			goto update;
3958		}
3959		rxbuf->m_head = mh;
3960		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3961		    BUS_DMASYNC_PREREAD);
3962		rxr->rx_base[i].read.hdr_addr =
3963		    htole64(hseg[0].ds_addr);
3964no_split:
3965		if (rxbuf->m_pack == NULL) {
3966			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3967			    M_PKTHDR, adapter->rx_mbuf_sz);
3968			if (mp == NULL)
3969				goto update;
3970		} else
3971			mp = rxbuf->m_pack;
3972
3973		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3974		/* Get the memory mapping */
3975		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3976		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3977		if (error != 0) {
3978			printf("Refresh mbufs: payload dmamap load"
3979			    " failure - %d\n", error);
3980			m_free(mp);
3981			rxbuf->m_pack = NULL;
3982			goto update;
3983		}
3984		rxbuf->m_pack = mp;
3985		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3986		    BUS_DMASYNC_PREREAD);
3987		rxr->rx_base[i].read.pkt_addr =
3988		    htole64(pseg[0].ds_addr);
3989		refreshed = TRUE; /* I feel wefreshed :) */
3990
3991		i = j; /* our next is precalculated */
3992		rxr->next_to_refresh = i;
3993		if (++j == adapter->num_rx_desc)
3994			j = 0;
3995	}
3996update:
3997	if (refreshed) /* update tail */
3998		E1000_WRITE_REG(&adapter->hw,
3999		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4000	return;
4001}
4002
4003
4004/*********************************************************************
4005 *
4006 *  Allocate memory for rx_buffer structures. Since we use one
4007 *  rx_buffer per received packet, the maximum number of rx_buffer's
4008 *  that we'll need is equal to the number of receive descriptors
4009 *  that we've allocated.
4010 *
4011 **********************************************************************/
4012static int
4013igb_allocate_receive_buffers(struct rx_ring *rxr)
4014{
4015	struct	adapter 	*adapter = rxr->adapter;
4016	device_t 		dev = adapter->dev;
4017	struct igb_rx_buf	*rxbuf;
4018	int             	i, bsize, error;
4019
4020	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4021	if (!(rxr->rx_buffers =
4022	    (struct igb_rx_buf *) malloc(bsize,
4023	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4024		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4025		error = ENOMEM;
4026		goto fail;
4027	}
4028
4029	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4030				   1, 0,		/* alignment, bounds */
4031				   BUS_SPACE_MAXADDR,	/* lowaddr */
4032				   BUS_SPACE_MAXADDR,	/* highaddr */
4033				   NULL, NULL,		/* filter, filterarg */
4034				   MSIZE,		/* maxsize */
4035				   1,			/* nsegments */
4036				   MSIZE,		/* maxsegsize */
4037				   0,			/* flags */
4038				   NULL,		/* lockfunc */
4039				   NULL,		/* lockfuncarg */
4040				   &rxr->htag))) {
4041		device_printf(dev, "Unable to create RX DMA tag\n");
4042		goto fail;
4043	}
4044
4045	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4046				   1, 0,		/* alignment, bounds */
4047				   BUS_SPACE_MAXADDR,	/* lowaddr */
4048				   BUS_SPACE_MAXADDR,	/* highaddr */
4049				   NULL, NULL,		/* filter, filterarg */
4050				   MJUM9BYTES,		/* maxsize */
4051				   1,			/* nsegments */
4052				   MJUM9BYTES,		/* maxsegsize */
4053				   0,			/* flags */
4054				   NULL,		/* lockfunc */
4055				   NULL,		/* lockfuncarg */
4056				   &rxr->ptag))) {
4057		device_printf(dev, "Unable to create RX payload DMA tag\n");
4058		goto fail;
4059	}
4060
4061	for (i = 0; i < adapter->num_rx_desc; i++) {
4062		rxbuf = &rxr->rx_buffers[i];
4063		error = bus_dmamap_create(rxr->htag,
4064		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4065		if (error) {
4066			device_printf(dev,
4067			    "Unable to create RX head DMA maps\n");
4068			goto fail;
4069		}
4070		error = bus_dmamap_create(rxr->ptag,
4071		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4072		if (error) {
4073			device_printf(dev,
4074			    "Unable to create RX packet DMA maps\n");
4075			goto fail;
4076		}
4077	}
4078
4079	return (0);
4080
4081fail:
4082	/* Frees all, but can handle partial completion */
4083	igb_free_receive_structures(adapter);
4084	return (error);
4085}
4086
4087
4088static void
4089igb_free_receive_ring(struct rx_ring *rxr)
4090{
4091	struct	adapter		*adapter = rxr->adapter;
4092	struct igb_rx_buf	*rxbuf;
4093
4094
4095	for (int i = 0; i < adapter->num_rx_desc; i++) {
4096		rxbuf = &rxr->rx_buffers[i];
4097		if (rxbuf->m_head != NULL) {
4098			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4099			    BUS_DMASYNC_POSTREAD);
4100			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4101			rxbuf->m_head->m_flags |= M_PKTHDR;
4102			m_freem(rxbuf->m_head);
4103		}
4104		if (rxbuf->m_pack != NULL) {
4105			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4106			    BUS_DMASYNC_POSTREAD);
4107			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4108			rxbuf->m_pack->m_flags |= M_PKTHDR;
4109			m_freem(rxbuf->m_pack);
4110		}
4111		rxbuf->m_head = NULL;
4112		rxbuf->m_pack = NULL;
4113	}
4114}
4115
4116
4117/*********************************************************************
4118 *
4119 *  Initialize a receive ring and its buffers.
4120 *
4121 **********************************************************************/
4122static int
4123igb_setup_receive_ring(struct rx_ring *rxr)
4124{
4125	struct	adapter		*adapter;
4126	struct  ifnet		*ifp;
4127	device_t		dev;
4128	struct igb_rx_buf	*rxbuf;
4129	bus_dma_segment_t	pseg[1], hseg[1];
4130	struct lro_ctrl		*lro = &rxr->lro;
4131	int			rsize, nsegs, error = 0;
4132#ifdef DEV_NETMAP
4133	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4134	struct netmap_slot *slot;
4135#endif /* DEV_NETMAP */
4136
4137	adapter = rxr->adapter;
4138	dev = adapter->dev;
4139	ifp = adapter->ifp;
4140
4141	/* Clear the ring contents */
4142	IGB_RX_LOCK(rxr);
4143#ifdef DEV_NETMAP
4144	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4145#endif /* DEV_NETMAP */
4146	rsize = roundup2(adapter->num_rx_desc *
4147	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4148	bzero((void *)rxr->rx_base, rsize);
4149
4150	/*
4151	** Free current RX buffer structures and their mbufs
4152	*/
4153	igb_free_receive_ring(rxr);
4154
4155	/* Configure for header split? */
4156	if (igb_header_split)
4157		rxr->hdr_split = TRUE;
4158
4159        /* Now replenish the ring mbufs */
4160	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4161		struct mbuf	*mh, *mp;
4162
4163		rxbuf = &rxr->rx_buffers[j];
4164#ifdef DEV_NETMAP
4165		if (slot) {
4166			/* slot sj is mapped to the i-th NIC-ring entry */
4167			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4168			uint64_t paddr;
4169			void *addr;
4170
4171			addr = PNMB(slot + sj, &paddr);
4172			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4173			/* Update descriptor */
4174			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4175			continue;
4176		}
4177#endif /* DEV_NETMAP */
4178		if (rxr->hdr_split == FALSE)
4179			goto skip_head;
4180
4181		/* First the header */
4182		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4183		if (rxbuf->m_head == NULL) {
4184			error = ENOBUFS;
4185                        goto fail;
4186		}
4187		m_adj(rxbuf->m_head, ETHER_ALIGN);
4188		mh = rxbuf->m_head;
4189		mh->m_len = mh->m_pkthdr.len = MHLEN;
4190		mh->m_flags |= M_PKTHDR;
4191		/* Get the memory mapping */
4192		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4193		    rxbuf->hmap, rxbuf->m_head, hseg,
4194		    &nsegs, BUS_DMA_NOWAIT);
4195		if (error != 0) /* Nothing elegant to do here */
4196                        goto fail;
4197		bus_dmamap_sync(rxr->htag,
4198		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4199		/* Update descriptor */
4200		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4201
4202skip_head:
4203		/* Now the payload cluster */
4204		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4205		    M_PKTHDR, adapter->rx_mbuf_sz);
4206		if (rxbuf->m_pack == NULL) {
4207			error = ENOBUFS;
4208                        goto fail;
4209		}
4210		mp = rxbuf->m_pack;
4211		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4212		/* Get the memory mapping */
4213		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4214		    rxbuf->pmap, mp, pseg,
4215		    &nsegs, BUS_DMA_NOWAIT);
4216		if (error != 0)
4217                        goto fail;
4218		bus_dmamap_sync(rxr->ptag,
4219		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4220		/* Update descriptor */
4221		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4222        }
4223
4224	/* Setup our descriptor indices */
4225	rxr->next_to_check = 0;
4226	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4227	rxr->lro_enabled = FALSE;
4228	rxr->rx_split_packets = 0;
4229	rxr->rx_bytes = 0;
4230
4231	rxr->fmp = NULL;
4232	rxr->lmp = NULL;
4233	rxr->discard = FALSE;
4234
4235	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4236	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4237
4238	/*
4239	** Now set up the LRO interface, we
4240	** also only do head split when LRO
4241	** is enabled, since so often they
4242	** are undesireable in similar setups.
4243	*/
4244	if (ifp->if_capenable & IFCAP_LRO) {
4245		error = tcp_lro_init(lro);
4246		if (error) {
4247			device_printf(dev, "LRO Initialization failed!\n");
4248			goto fail;
4249		}
4250		INIT_DEBUGOUT("RX LRO Initialized\n");
4251		rxr->lro_enabled = TRUE;
4252		lro->ifp = adapter->ifp;
4253	}
4254
4255	IGB_RX_UNLOCK(rxr);
4256	return (0);
4257
4258fail:
4259	igb_free_receive_ring(rxr);
4260	IGB_RX_UNLOCK(rxr);
4261	return (error);
4262}
4263
4264
4265/*********************************************************************
4266 *
4267 *  Initialize all receive rings.
4268 *
4269 **********************************************************************/
4270static int
4271igb_setup_receive_structures(struct adapter *adapter)
4272{
4273	struct rx_ring *rxr = adapter->rx_rings;
4274	int i;
4275
4276	for (i = 0; i < adapter->num_queues; i++, rxr++)
4277		if (igb_setup_receive_ring(rxr))
4278			goto fail;
4279
4280	return (0);
4281fail:
4282	/*
4283	 * Free RX buffers allocated so far, we will only handle
4284	 * the rings that completed, the failing case will have
4285	 * cleaned up for itself. 'i' is the endpoint.
4286	 */
4287	for (int j = 0; j > i; ++j) {
4288		rxr = &adapter->rx_rings[i];
4289		IGB_RX_LOCK(rxr);
4290		igb_free_receive_ring(rxr);
4291		IGB_RX_UNLOCK(rxr);
4292	}
4293
4294	return (ENOBUFS);
4295}
4296
4297/*********************************************************************
4298 *
4299 *  Enable receive unit.
4300 *
4301 **********************************************************************/
4302static void
4303igb_initialize_receive_units(struct adapter *adapter)
4304{
4305	struct rx_ring	*rxr = adapter->rx_rings;
4306	struct ifnet	*ifp = adapter->ifp;
4307	struct e1000_hw *hw = &adapter->hw;
4308	u32		rctl, rxcsum, psize, srrctl = 0;
4309
4310	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4311
4312	/*
4313	 * Make sure receives are disabled while setting
4314	 * up the descriptor ring
4315	 */
4316	rctl = E1000_READ_REG(hw, E1000_RCTL);
4317	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4318
4319	/*
4320	** Set up for header split
4321	*/
4322	if (igb_header_split) {
4323		/* Use a standard mbuf for the header */
4324		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4325		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4326	} else
4327		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4328
4329	/*
4330	** Set up for jumbo frames
4331	*/
4332	if (ifp->if_mtu > ETHERMTU) {
4333		rctl |= E1000_RCTL_LPE;
4334		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4335			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4336			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4337		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4338			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4339			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4340		}
4341		/* Set maximum packet len */
4342		psize = adapter->max_frame_size;
4343		/* are we on a vlan? */
4344		if (adapter->ifp->if_vlantrunk != NULL)
4345			psize += VLAN_TAG_SIZE;
4346		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4347	} else {
4348		rctl &= ~E1000_RCTL_LPE;
4349		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4350		rctl |= E1000_RCTL_SZ_2048;
4351	}
4352
4353	/* Setup the Base and Length of the Rx Descriptor Rings */
4354	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4355		u64 bus_addr = rxr->rxdma.dma_paddr;
4356		u32 rxdctl;
4357
4358		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4359		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4360		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4361		    (uint32_t)(bus_addr >> 32));
4362		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4363		    (uint32_t)bus_addr);
4364		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4365		/* Enable this Queue */
4366		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4367		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4368		rxdctl &= 0xFFF00000;
4369		rxdctl |= IGB_RX_PTHRESH;
4370		rxdctl |= IGB_RX_HTHRESH << 8;
4371		rxdctl |= IGB_RX_WTHRESH << 16;
4372		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4373	}
4374
4375	/*
4376	** Setup for RX MultiQueue
4377	*/
4378	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4379	if (adapter->num_queues >1) {
4380		u32 random[10], mrqc, shift = 0;
4381		union igb_reta {
4382			u32 dword;
4383			u8  bytes[4];
4384		} reta;
4385
4386		arc4rand(&random, sizeof(random), 0);
4387		if (adapter->hw.mac.type == e1000_82575)
4388			shift = 6;
4389		/* Warning FM follows */
4390		for (int i = 0; i < 128; i++) {
4391			reta.bytes[i & 3] =
4392			    (i % adapter->num_queues) << shift;
4393			if ((i & 3) == 3)
4394				E1000_WRITE_REG(hw,
4395				    E1000_RETA(i >> 2), reta.dword);
4396		}
4397		/* Now fill in hash table */
4398		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4399		for (int i = 0; i < 10; i++)
4400			E1000_WRITE_REG_ARRAY(hw,
4401			    E1000_RSSRK(0), i, random[i]);
4402
4403		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4404		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4405		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4406		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4407		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4408		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4409		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4410		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4411
4412		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4413
4414		/*
4415		** NOTE: Receive Full-Packet Checksum Offload
4416		** is mutually exclusive with Multiqueue. However
4417		** this is not the same as TCP/IP checksums which
4418		** still work.
4419		*/
4420		rxcsum |= E1000_RXCSUM_PCSD;
4421#if __FreeBSD_version >= 800000
4422		/* For SCTP Offload */
4423		if ((hw->mac.type == e1000_82576)
4424		    && (ifp->if_capenable & IFCAP_RXCSUM))
4425			rxcsum |= E1000_RXCSUM_CRCOFL;
4426#endif
4427	} else {
4428		/* Non RSS setup */
4429		if (ifp->if_capenable & IFCAP_RXCSUM) {
4430			rxcsum |= E1000_RXCSUM_IPPCSE;
4431#if __FreeBSD_version >= 800000
4432			if (adapter->hw.mac.type == e1000_82576)
4433				rxcsum |= E1000_RXCSUM_CRCOFL;
4434#endif
4435		} else
4436			rxcsum &= ~E1000_RXCSUM_TUOFL;
4437	}
4438	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4439
4440	/* Setup the Receive Control Register */
4441	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4442	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4443		   E1000_RCTL_RDMTS_HALF |
4444		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4445	/* Strip CRC bytes. */
4446	rctl |= E1000_RCTL_SECRC;
4447	/* Make sure VLAN Filters are off */
4448	rctl &= ~E1000_RCTL_VFE;
4449	/* Don't store bad packets */
4450	rctl &= ~E1000_RCTL_SBP;
4451
4452	/* Enable Receives */
4453	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4454
4455	/*
4456	 * Setup the HW Rx Head and Tail Descriptor Pointers
4457	 *   - needs to be after enable
4458	 */
4459	for (int i = 0; i < adapter->num_queues; i++) {
4460		rxr = &adapter->rx_rings[i];
4461		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4462#ifdef DEV_NETMAP
4463		/*
4464		 * an init() while a netmap client is active must
4465		 * preserve the rx buffers passed to userspace.
4466		 * In this driver it means we adjust RDT to
4467		 * somthing different from next_to_refresh
4468		 * (which is not used in netmap mode).
4469		 */
4470		if (ifp->if_capenable & IFCAP_NETMAP) {
4471			struct netmap_adapter *na = NA(adapter->ifp);
4472			struct netmap_kring *kring = &na->rx_rings[i];
4473			int t = rxr->next_to_refresh - kring->nr_hwavail;
4474
4475			if (t >= adapter->num_rx_desc)
4476				t -= adapter->num_rx_desc;
4477			else if (t < 0)
4478				t += adapter->num_rx_desc;
4479			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4480		} else
4481#endif /* DEV_NETMAP */
4482		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4483	}
4484	return;
4485}
4486
4487/*********************************************************************
4488 *
4489 *  Free receive rings.
4490 *
4491 **********************************************************************/
4492static void
4493igb_free_receive_structures(struct adapter *adapter)
4494{
4495	struct rx_ring *rxr = adapter->rx_rings;
4496
4497	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4498		struct lro_ctrl	*lro = &rxr->lro;
4499		igb_free_receive_buffers(rxr);
4500		tcp_lro_free(lro);
4501		igb_dma_free(adapter, &rxr->rxdma);
4502	}
4503
4504	free(adapter->rx_rings, M_DEVBUF);
4505}
4506
4507/*********************************************************************
4508 *
4509 *  Free receive ring data structures.
4510 *
4511 **********************************************************************/
4512static void
4513igb_free_receive_buffers(struct rx_ring *rxr)
4514{
4515	struct adapter		*adapter = rxr->adapter;
4516	struct igb_rx_buf	*rxbuf;
4517	int i;
4518
4519	INIT_DEBUGOUT("free_receive_structures: begin");
4520
4521	/* Cleanup any existing buffers */
4522	if (rxr->rx_buffers != NULL) {
4523		for (i = 0; i < adapter->num_rx_desc; i++) {
4524			rxbuf = &rxr->rx_buffers[i];
4525			if (rxbuf->m_head != NULL) {
4526				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4527				    BUS_DMASYNC_POSTREAD);
4528				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4529				rxbuf->m_head->m_flags |= M_PKTHDR;
4530				m_freem(rxbuf->m_head);
4531			}
4532			if (rxbuf->m_pack != NULL) {
4533				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4534				    BUS_DMASYNC_POSTREAD);
4535				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4536				rxbuf->m_pack->m_flags |= M_PKTHDR;
4537				m_freem(rxbuf->m_pack);
4538			}
4539			rxbuf->m_head = NULL;
4540			rxbuf->m_pack = NULL;
4541			if (rxbuf->hmap != NULL) {
4542				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4543				rxbuf->hmap = NULL;
4544			}
4545			if (rxbuf->pmap != NULL) {
4546				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4547				rxbuf->pmap = NULL;
4548			}
4549		}
4550		if (rxr->rx_buffers != NULL) {
4551			free(rxr->rx_buffers, M_DEVBUF);
4552			rxr->rx_buffers = NULL;
4553		}
4554	}
4555
4556	if (rxr->htag != NULL) {
4557		bus_dma_tag_destroy(rxr->htag);
4558		rxr->htag = NULL;
4559	}
4560	if (rxr->ptag != NULL) {
4561		bus_dma_tag_destroy(rxr->ptag);
4562		rxr->ptag = NULL;
4563	}
4564}
4565
4566static __inline void
4567igb_rx_discard(struct rx_ring *rxr, int i)
4568{
4569	struct igb_rx_buf	*rbuf;
4570
4571	rbuf = &rxr->rx_buffers[i];
4572
4573	/* Partially received? Free the chain */
4574	if (rxr->fmp != NULL) {
4575		rxr->fmp->m_flags |= M_PKTHDR;
4576		m_freem(rxr->fmp);
4577		rxr->fmp = NULL;
4578		rxr->lmp = NULL;
4579	}
4580
4581	/*
4582	** With advanced descriptors the writeback
4583	** clobbers the buffer addrs, so its easier
4584	** to just free the existing mbufs and take
4585	** the normal refresh path to get new buffers
4586	** and mapping.
4587	*/
4588	if (rbuf->m_head) {
4589		m_free(rbuf->m_head);
4590		rbuf->m_head = NULL;
4591	}
4592
4593	if (rbuf->m_pack) {
4594		m_free(rbuf->m_pack);
4595		rbuf->m_pack = NULL;
4596	}
4597
4598	return;
4599}
4600
4601static __inline void
4602igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4603{
4604
4605	/*
4606	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4607	 * should be computed by hardware. Also it should not have VLAN tag in
4608	 * ethernet header.
4609	 */
4610	if (rxr->lro_enabled &&
4611	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4612	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4613	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4614	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4615	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4616	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4617		/*
4618		 * Send to the stack if:
4619		 **  - LRO not enabled, or
4620		 **  - no LRO resources, or
4621		 **  - lro enqueue fails
4622		 */
4623		if (rxr->lro.lro_cnt != 0)
4624			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4625				return;
4626	}
4627	IGB_RX_UNLOCK(rxr);
4628	(*ifp->if_input)(ifp, m);
4629	IGB_RX_LOCK(rxr);
4630}
4631
4632/*********************************************************************
4633 *
4634 *  This routine executes in interrupt context. It replenishes
4635 *  the mbufs in the descriptor and sends data which has been
4636 *  dma'ed into host memory to upper layer.
4637 *
4638 *  We loop at most count times if count is > 0, or until done if
4639 *  count < 0.
4640 *
4641 *  Return TRUE if more to clean, FALSE otherwise
4642 *********************************************************************/
4643static bool
4644igb_rxeof(struct igb_queue *que, int count, int *done)
4645{
4646	struct adapter		*adapter = que->adapter;
4647	struct rx_ring		*rxr = que->rxr;
4648	struct ifnet		*ifp = adapter->ifp;
4649	struct lro_ctrl		*lro = &rxr->lro;
4650	struct lro_entry	*queued;
4651	int			i, processed = 0, rxdone = 0;
4652	u32			ptype, staterr = 0;
4653	union e1000_adv_rx_desc	*cur;
4654
4655	IGB_RX_LOCK(rxr);
4656	/* Sync the ring. */
4657	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4658	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4659
4660#ifdef DEV_NETMAP
4661	if (ifp->if_capenable & IFCAP_NETMAP) {
4662		struct netmap_adapter *na = NA(ifp);
4663
4664		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4665		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4666		IGB_RX_UNLOCK(rxr);
4667		IGB_CORE_LOCK(adapter);
4668		selwakeuppri(&na->rx_si, PI_NET);
4669		IGB_CORE_UNLOCK(adapter);
4670		return (0);
4671	}
4672#endif /* DEV_NETMAP */
4673
4674	/* Main clean loop */
4675	for (i = rxr->next_to_check; count != 0;) {
4676		struct mbuf		*sendmp, *mh, *mp;
4677		struct igb_rx_buf	*rxbuf;
4678		u16			hlen, plen, hdr, vtag;
4679		bool			eop = FALSE;
4680
4681		cur = &rxr->rx_base[i];
4682		staterr = le32toh(cur->wb.upper.status_error);
4683		if ((staterr & E1000_RXD_STAT_DD) == 0)
4684			break;
4685		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4686			break;
4687		count--;
4688		sendmp = mh = mp = NULL;
4689		cur->wb.upper.status_error = 0;
4690		rxbuf = &rxr->rx_buffers[i];
4691		plen = le16toh(cur->wb.upper.length);
4692		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4693		if ((adapter->hw.mac.type == e1000_i350) &&
4694		    (staterr & E1000_RXDEXT_STATERR_LB))
4695			vtag = be16toh(cur->wb.upper.vlan);
4696		else
4697			vtag = le16toh(cur->wb.upper.vlan);
4698		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4699		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4700
4701		/* Make sure all segments of a bad packet are discarded */
4702		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4703		    (rxr->discard)) {
4704			ifp->if_ierrors++;
4705			++rxr->rx_discarded;
4706			if (!eop) /* Catch subsequent segs */
4707				rxr->discard = TRUE;
4708			else
4709				rxr->discard = FALSE;
4710			igb_rx_discard(rxr, i);
4711			goto next_desc;
4712		}
4713
4714		/*
4715		** The way the hardware is configured to
4716		** split, it will ONLY use the header buffer
4717		** when header split is enabled, otherwise we
4718		** get normal behavior, ie, both header and
4719		** payload are DMA'd into the payload buffer.
4720		**
4721		** The fmp test is to catch the case where a
4722		** packet spans multiple descriptors, in that
4723		** case only the first header is valid.
4724		*/
4725		if (rxr->hdr_split && rxr->fmp == NULL) {
4726			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4727			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4728			if (hlen > IGB_HDR_BUF)
4729				hlen = IGB_HDR_BUF;
4730			mh = rxr->rx_buffers[i].m_head;
4731			mh->m_len = hlen;
4732			/* clear buf pointer for refresh */
4733			rxbuf->m_head = NULL;
4734			/*
4735			** Get the payload length, this
4736			** could be zero if its a small
4737			** packet.
4738			*/
4739			if (plen > 0) {
4740				mp = rxr->rx_buffers[i].m_pack;
4741				mp->m_len = plen;
4742				mh->m_next = mp;
4743				/* clear buf pointer */
4744				rxbuf->m_pack = NULL;
4745				rxr->rx_split_packets++;
4746			}
4747		} else {
4748			/*
4749			** Either no header split, or a
4750			** secondary piece of a fragmented
4751			** split packet.
4752			*/
4753			mh = rxr->rx_buffers[i].m_pack;
4754			mh->m_len = plen;
4755			/* clear buf info for refresh */
4756			rxbuf->m_pack = NULL;
4757		}
4758
4759		++processed; /* So we know when to refresh */
4760
4761		/* Initial frame - setup */
4762		if (rxr->fmp == NULL) {
4763			mh->m_pkthdr.len = mh->m_len;
4764			/* Save the head of the chain */
4765			rxr->fmp = mh;
4766			rxr->lmp = mh;
4767			if (mp != NULL) {
4768				/* Add payload if split */
4769				mh->m_pkthdr.len += mp->m_len;
4770				rxr->lmp = mh->m_next;
4771			}
4772		} else {
4773			/* Chain mbuf's together */
4774			rxr->lmp->m_next = mh;
4775			rxr->lmp = rxr->lmp->m_next;
4776			rxr->fmp->m_pkthdr.len += mh->m_len;
4777		}
4778
4779		if (eop) {
4780			rxr->fmp->m_pkthdr.rcvif = ifp;
4781			ifp->if_ipackets++;
4782			rxr->rx_packets++;
4783			/* capture data for AIM */
4784			rxr->packets++;
4785			rxr->bytes += rxr->fmp->m_pkthdr.len;
4786			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4787
4788			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4789				igb_rx_checksum(staterr, rxr->fmp, ptype);
4790
4791			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4792			    (staterr & E1000_RXD_STAT_VP) != 0) {
4793				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4794				rxr->fmp->m_flags |= M_VLANTAG;
4795			}
4796#if __FreeBSD_version >= 800000
4797			rxr->fmp->m_pkthdr.flowid = que->msix;
4798			rxr->fmp->m_flags |= M_FLOWID;
4799#endif
4800			sendmp = rxr->fmp;
4801			/* Make sure to set M_PKTHDR. */
4802			sendmp->m_flags |= M_PKTHDR;
4803			rxr->fmp = NULL;
4804			rxr->lmp = NULL;
4805		}
4806
4807next_desc:
4808		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4809		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4810
4811		/* Advance our pointers to the next descriptor. */
4812		if (++i == adapter->num_rx_desc)
4813			i = 0;
4814		/*
4815		** Send to the stack or LRO
4816		*/
4817		if (sendmp != NULL) {
4818			rxr->next_to_check = i;
4819			igb_rx_input(rxr, ifp, sendmp, ptype);
4820			i = rxr->next_to_check;
4821			rxdone++;
4822		}
4823
4824		/* Every 8 descriptors we go to refresh mbufs */
4825		if (processed == 8) {
4826                        igb_refresh_mbufs(rxr, i);
4827                        processed = 0;
4828		}
4829	}
4830
4831	/* Catch any remainders */
4832	if (igb_rx_unrefreshed(rxr))
4833		igb_refresh_mbufs(rxr, i);
4834
4835	rxr->next_to_check = i;
4836
4837	/*
4838	 * Flush any outstanding LRO work
4839	 */
4840	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4841		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4842		tcp_lro_flush(lro, queued);
4843	}
4844
4845	if (done != NULL)
4846		*done = rxdone;
4847
4848	IGB_RX_UNLOCK(rxr);
4849	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4850}
4851
4852/*********************************************************************
4853 *
4854 *  Verify that the hardware indicated that the checksum is valid.
4855 *  Inform the stack about the status of checksum so that stack
4856 *  doesn't spend time verifying the checksum.
4857 *
4858 *********************************************************************/
4859static void
4860igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4861{
4862	u16 status = (u16)staterr;
4863	u8  errors = (u8) (staterr >> 24);
4864	int sctp;
4865
4866	/* Ignore Checksum bit is set */
4867	if (status & E1000_RXD_STAT_IXSM) {
4868		mp->m_pkthdr.csum_flags = 0;
4869		return;
4870	}
4871
4872	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4873	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4874		sctp = 1;
4875	else
4876		sctp = 0;
4877	if (status & E1000_RXD_STAT_IPCS) {
4878		/* Did it pass? */
4879		if (!(errors & E1000_RXD_ERR_IPE)) {
4880			/* IP Checksum Good */
4881			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4882			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4883		} else
4884			mp->m_pkthdr.csum_flags = 0;
4885	}
4886
4887	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4888		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4889#if __FreeBSD_version >= 800000
4890		if (sctp) /* reassign */
4891			type = CSUM_SCTP_VALID;
4892#endif
4893		/* Did it pass? */
4894		if (!(errors & E1000_RXD_ERR_TCPE)) {
4895			mp->m_pkthdr.csum_flags |= type;
4896			if (sctp == 0)
4897				mp->m_pkthdr.csum_data = htons(0xffff);
4898		}
4899	}
4900	return;
4901}
4902
4903/*
4904 * This routine is run via an vlan
4905 * config EVENT
4906 */
4907static void
4908igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4909{
4910	struct adapter	*adapter = ifp->if_softc;
4911	u32		index, bit;
4912
4913	if (ifp->if_softc !=  arg)   /* Not our event */
4914		return;
4915
4916	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4917                return;
4918
4919	IGB_CORE_LOCK(adapter);
4920	index = (vtag >> 5) & 0x7F;
4921	bit = vtag & 0x1F;
4922	adapter->shadow_vfta[index] |= (1 << bit);
4923	++adapter->num_vlans;
4924	/* Change hw filter setting */
4925	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4926		igb_setup_vlan_hw_support(adapter);
4927	IGB_CORE_UNLOCK(adapter);
4928}
4929
4930/*
4931 * This routine is run via an vlan
4932 * unconfig EVENT
4933 */
4934static void
4935igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4936{
4937	struct adapter	*adapter = ifp->if_softc;
4938	u32		index, bit;
4939
4940	if (ifp->if_softc !=  arg)
4941		return;
4942
4943	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4944                return;
4945
4946	IGB_CORE_LOCK(adapter);
4947	index = (vtag >> 5) & 0x7F;
4948	bit = vtag & 0x1F;
4949	adapter->shadow_vfta[index] &= ~(1 << bit);
4950	--adapter->num_vlans;
4951	/* Change hw filter setting */
4952	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4953		igb_setup_vlan_hw_support(adapter);
4954	IGB_CORE_UNLOCK(adapter);
4955}
4956
4957static void
4958igb_setup_vlan_hw_support(struct adapter *adapter)
4959{
4960	struct e1000_hw *hw = &adapter->hw;
4961	struct ifnet	*ifp = adapter->ifp;
4962	u32             reg;
4963
4964	if (adapter->vf_ifp) {
4965		e1000_rlpml_set_vf(hw,
4966		    adapter->max_frame_size + VLAN_TAG_SIZE);
4967		return;
4968	}
4969
4970	reg = E1000_READ_REG(hw, E1000_CTRL);
4971	reg |= E1000_CTRL_VME;
4972	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4973
4974	/* Enable the Filter Table */
4975	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4976		reg = E1000_READ_REG(hw, E1000_RCTL);
4977		reg &= ~E1000_RCTL_CFIEN;
4978		reg |= E1000_RCTL_VFE;
4979		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4980	}
4981
4982	/* Update the frame size */
4983	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4984	    adapter->max_frame_size + VLAN_TAG_SIZE);
4985
4986	/* Don't bother with table if no vlans */
4987	if ((adapter->num_vlans == 0) ||
4988	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4989                return;
4990	/*
4991	** A soft reset zero's out the VFTA, so
4992	** we need to repopulate it now.
4993	*/
4994	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4995                if (adapter->shadow_vfta[i] != 0) {
4996			if (adapter->vf_ifp)
4997				e1000_vfta_set_vf(hw,
4998				    adapter->shadow_vfta[i], TRUE);
4999			else
5000				e1000_write_vfta(hw,
5001				    i, adapter->shadow_vfta[i]);
5002		}
5003}
5004
5005static void
5006igb_enable_intr(struct adapter *adapter)
5007{
5008	/* With RSS set up what to auto clear */
5009	if (adapter->msix_mem) {
5010		u32 mask = (adapter->que_mask | adapter->link_mask);
5011		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5012		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5013		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5014		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5015		    E1000_IMS_LSC);
5016	} else {
5017		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5018		    IMS_ENABLE_MASK);
5019	}
5020	E1000_WRITE_FLUSH(&adapter->hw);
5021
5022	return;
5023}
5024
5025static void
5026igb_disable_intr(struct adapter *adapter)
5027{
5028	if (adapter->msix_mem) {
5029		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5030		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5031	}
5032	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5033	E1000_WRITE_FLUSH(&adapter->hw);
5034	return;
5035}
5036
5037/*
5038 * Bit of a misnomer, what this really means is
5039 * to enable OS management of the system... aka
5040 * to disable special hardware management features
5041 */
5042static void
5043igb_init_manageability(struct adapter *adapter)
5044{
5045	if (adapter->has_manage) {
5046		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5047		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5048
5049		/* disable hardware interception of ARP */
5050		manc &= ~(E1000_MANC_ARP_EN);
5051
5052                /* enable receiving management packets to the host */
5053		manc |= E1000_MANC_EN_MNG2HOST;
5054		manc2h |= 1 << 5;  /* Mng Port 623 */
5055		manc2h |= 1 << 6;  /* Mng Port 664 */
5056		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5057		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5058	}
5059}
5060
5061/*
5062 * Give control back to hardware management
5063 * controller if there is one.
5064 */
5065static void
5066igb_release_manageability(struct adapter *adapter)
5067{
5068	if (adapter->has_manage) {
5069		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5070
5071		/* re-enable hardware interception of ARP */
5072		manc |= E1000_MANC_ARP_EN;
5073		manc &= ~E1000_MANC_EN_MNG2HOST;
5074
5075		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5076	}
5077}
5078
5079/*
5080 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5081 * For ASF and Pass Through versions of f/w this means that
5082 * the driver is loaded.
5083 *
5084 */
5085static void
5086igb_get_hw_control(struct adapter *adapter)
5087{
5088	u32 ctrl_ext;
5089
5090	if (adapter->vf_ifp)
5091		return;
5092
5093	/* Let firmware know the driver has taken over */
5094	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5095	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5096	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5097}
5098
5099/*
5100 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5101 * For ASF and Pass Through versions of f/w this means that the
5102 * driver is no longer loaded.
5103 *
5104 */
5105static void
5106igb_release_hw_control(struct adapter *adapter)
5107{
5108	u32 ctrl_ext;
5109
5110	if (adapter->vf_ifp)
5111		return;
5112
5113	/* Let firmware taken over control of h/w */
5114	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5115	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5116	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5117}
5118
5119static int
5120igb_is_valid_ether_addr(uint8_t *addr)
5121{
5122	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5123
5124	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5125		return (FALSE);
5126	}
5127
5128	return (TRUE);
5129}
5130
5131
5132/*
5133 * Enable PCI Wake On Lan capability
5134 */
5135static void
5136igb_enable_wakeup(device_t dev)
5137{
5138	u16     cap, status;
5139	u8      id;
5140
5141	/* First find the capabilities pointer*/
5142	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5143	/* Read the PM Capabilities */
5144	id = pci_read_config(dev, cap, 1);
5145	if (id != PCIY_PMG)     /* Something wrong */
5146		return;
5147	/* OK, we have the power capabilities, so
5148	   now get the status register */
5149	cap += PCIR_POWER_STATUS;
5150	status = pci_read_config(dev, cap, 2);
5151	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5152	pci_write_config(dev, cap, status, 2);
5153	return;
5154}
5155
5156static void
5157igb_led_func(void *arg, int onoff)
5158{
5159	struct adapter	*adapter = arg;
5160
5161	IGB_CORE_LOCK(adapter);
5162	if (onoff) {
5163		e1000_setup_led(&adapter->hw);
5164		e1000_led_on(&adapter->hw);
5165	} else {
5166		e1000_led_off(&adapter->hw);
5167		e1000_cleanup_led(&adapter->hw);
5168	}
5169	IGB_CORE_UNLOCK(adapter);
5170}
5171
5172/**********************************************************************
5173 *
5174 *  Update the board statistics counters.
5175 *
5176 **********************************************************************/
5177static void
5178igb_update_stats_counters(struct adapter *adapter)
5179{
5180	struct ifnet		*ifp;
5181        struct e1000_hw		*hw = &adapter->hw;
5182	struct e1000_hw_stats	*stats;
5183
5184	/*
5185	** The virtual function adapter has only a
5186	** small controlled set of stats, do only
5187	** those and return.
5188	*/
5189	if (adapter->vf_ifp) {
5190		igb_update_vf_stats_counters(adapter);
5191		return;
5192	}
5193
5194	stats = (struct e1000_hw_stats	*)adapter->stats;
5195
5196	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5197	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5198		stats->symerrs +=
5199		    E1000_READ_REG(hw,E1000_SYMERRS);
5200		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5201	}
5202
5203	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5204	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5205	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5206	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5207
5208	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5209	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5210	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5211	stats->dc += E1000_READ_REG(hw, E1000_DC);
5212	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5213	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5214	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5215	/*
5216	** For watchdog management we need to know if we have been
5217	** paused during the last interval, so capture that here.
5218	*/
5219        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5220        stats->xoffrxc += adapter->pause_frames;
5221	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5222	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5223	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5224	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5225	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5226	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5227	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5228	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5229	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5230	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5231	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5232	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5233
5234	/* For the 64-bit byte counters the low dword must be read first. */
5235	/* Both registers clear on the read of the high dword */
5236
5237	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5238	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5239	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5240	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5241
5242	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5243	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5244	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5245	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5246	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5247
5248	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5249	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5250
5251	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5252	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5253	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5254	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5255	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5256	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5257	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5258	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5259	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5260	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5261
5262	/* Interrupt Counts */
5263
5264	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5265	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5266	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5267	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5268	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5269	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5270	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5271	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5272	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5273
5274	/* Host to Card Statistics */
5275
5276	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5277	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5278	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5279	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5280	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5281	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5282	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5283	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5284	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5285	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5286	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5287	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5288	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5289	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5290
5291	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5292	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5293	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5294	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5295	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5296	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5297
5298	ifp = adapter->ifp;
5299	ifp->if_collisions = stats->colc;
5300
5301	/* Rx Errors */
5302	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5303	    stats->crcerrs + stats->algnerrc +
5304	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5305
5306	/* Tx Errors */
5307	ifp->if_oerrors = stats->ecol +
5308	    stats->latecol + adapter->watchdog_events;
5309
5310	/* Driver specific counters */
5311	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5312	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5313	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5314	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5315	adapter->packet_buf_alloc_tx =
5316	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5317	adapter->packet_buf_alloc_rx =
5318	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5319}
5320
5321
5322/**********************************************************************
5323 *
5324 *  Initialize the VF board statistics counters.
5325 *
5326 **********************************************************************/
5327static void
5328igb_vf_init_stats(struct adapter *adapter)
5329{
5330        struct e1000_hw *hw = &adapter->hw;
5331	struct e1000_vf_stats	*stats;
5332
5333	stats = (struct e1000_vf_stats	*)adapter->stats;
5334	if (stats == NULL)
5335		return;
5336        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5337        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5338        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5339        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5340        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5341}
5342
5343/**********************************************************************
5344 *
5345 *  Update the VF board statistics counters.
5346 *
5347 **********************************************************************/
5348static void
5349igb_update_vf_stats_counters(struct adapter *adapter)
5350{
5351	struct e1000_hw *hw = &adapter->hw;
5352	struct e1000_vf_stats	*stats;
5353
5354	if (adapter->link_speed == 0)
5355		return;
5356
5357	stats = (struct e1000_vf_stats	*)adapter->stats;
5358
5359	UPDATE_VF_REG(E1000_VFGPRC,
5360	    stats->last_gprc, stats->gprc);
5361	UPDATE_VF_REG(E1000_VFGORC,
5362	    stats->last_gorc, stats->gorc);
5363	UPDATE_VF_REG(E1000_VFGPTC,
5364	    stats->last_gptc, stats->gptc);
5365	UPDATE_VF_REG(E1000_VFGOTC,
5366	    stats->last_gotc, stats->gotc);
5367	UPDATE_VF_REG(E1000_VFMPRC,
5368	    stats->last_mprc, stats->mprc);
5369}
5370
5371/* Export a single 32-bit register via a read-only sysctl. */
5372static int
5373igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5374{
5375	struct adapter *adapter;
5376	u_int val;
5377
5378	adapter = oidp->oid_arg1;
5379	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5380	return (sysctl_handle_int(oidp, &val, 0, req));
5381}
5382
5383/*
5384**  Tuneable interrupt rate handler
5385*/
5386static int
5387igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5388{
5389	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5390	int			error;
5391	u32			reg, usec, rate;
5392
5393	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5394	usec = ((reg & 0x7FFC) >> 2);
5395	if (usec > 0)
5396		rate = 1000000 / usec;
5397	else
5398		rate = 0;
5399	error = sysctl_handle_int(oidp, &rate, 0, req);
5400	if (error || !req->newptr)
5401		return error;
5402	return 0;
5403}
5404
5405/*
5406 * Add sysctl variables, one per statistic, to the system.
5407 */
5408static void
5409igb_add_hw_stats(struct adapter *adapter)
5410{
5411	device_t dev = adapter->dev;
5412
5413	struct tx_ring *txr = adapter->tx_rings;
5414	struct rx_ring *rxr = adapter->rx_rings;
5415
5416	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5417	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5418	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5419	struct e1000_hw_stats *stats = adapter->stats;
5420
5421	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5422	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5423
5424#define QUEUE_NAME_LEN 32
5425	char namebuf[QUEUE_NAME_LEN];
5426
5427	/* Driver Statistics */
5428	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5429			CTLFLAG_RD, &adapter->link_irq, 0,
5430			"Link MSIX IRQ Handled");
5431	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5432			CTLFLAG_RD, &adapter->dropped_pkts,
5433			"Driver dropped packets");
5434	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5435			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5436			"Driver tx dma failure in xmit");
5437	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5438			CTLFLAG_RD, &adapter->rx_overruns,
5439			"RX overruns");
5440	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5441			CTLFLAG_RD, &adapter->watchdog_events,
5442			"Watchdog timeouts");
5443
5444	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5445			CTLFLAG_RD, &adapter->device_control,
5446			"Device Control Register");
5447	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5448			CTLFLAG_RD, &adapter->rx_control,
5449			"Receiver Control Register");
5450	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5451			CTLFLAG_RD, &adapter->int_mask,
5452			"Interrupt Mask");
5453	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5454			CTLFLAG_RD, &adapter->eint_mask,
5455			"Extended Interrupt Mask");
5456	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5457			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5458			"Transmit Buffer Packet Allocation");
5459	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5460			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5461			"Receive Buffer Packet Allocation");
5462	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5463			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5464			"Flow Control High Watermark");
5465	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5466			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5467			"Flow Control Low Watermark");
5468
5469	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5470		struct lro_ctrl *lro = &rxr->lro;
5471
5472		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5473		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5474					    CTLFLAG_RD, NULL, "Queue Name");
5475		queue_list = SYSCTL_CHILDREN(queue_node);
5476
5477		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5478				CTLFLAG_RD, &adapter->queues[i],
5479				sizeof(&adapter->queues[i]),
5480				igb_sysctl_interrupt_rate_handler,
5481				"IU", "Interrupt Rate");
5482
5483		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5484				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5485				igb_sysctl_reg_handler, "IU",
5486 				"Transmit Descriptor Head");
5487		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5488				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5489				igb_sysctl_reg_handler, "IU",
5490 				"Transmit Descriptor Tail");
5491		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5492				CTLFLAG_RD, &txr->no_desc_avail,
5493				"Queue No Descriptor Available");
5494		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5495				CTLFLAG_RD, &txr->tx_packets,
5496				"Queue Packets Transmitted");
5497
5498		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5499				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5500				igb_sysctl_reg_handler, "IU",
5501				"Receive Descriptor Head");
5502		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5503				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5504				igb_sysctl_reg_handler, "IU",
5505				"Receive Descriptor Tail");
5506		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5507				CTLFLAG_RD, &rxr->rx_packets,
5508				"Queue Packets Received");
5509		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5510				CTLFLAG_RD, &rxr->rx_bytes,
5511				"Queue Bytes Received");
5512		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5513				CTLFLAG_RD, &lro->lro_queued, 0,
5514				"LRO Queued");
5515		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5516				CTLFLAG_RD, &lro->lro_flushed, 0,
5517				"LRO Flushed");
5518	}
5519
5520	/* MAC stats get their own sub node */
5521
5522	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5523				    CTLFLAG_RD, NULL, "MAC Statistics");
5524	stat_list = SYSCTL_CHILDREN(stat_node);
5525
5526	/*
5527	** VF adapter has a very limited set of stats
5528	** since its not managing the metal, so to speak.
5529	*/
5530	if (adapter->vf_ifp) {
5531	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5532			CTLFLAG_RD, &stats->gprc,
5533			"Good Packets Received");
5534	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5535			CTLFLAG_RD, &stats->gptc,
5536			"Good Packets Transmitted");
5537 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5538 			CTLFLAG_RD, &stats->gorc,
5539 			"Good Octets Received");
5540 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5541 			CTLFLAG_RD, &stats->gotc,
5542 			"Good Octets Transmitted");
5543	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5544			CTLFLAG_RD, &stats->mprc,
5545			"Multicast Packets Received");
5546		return;
5547	}
5548
5549	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5550			CTLFLAG_RD, &stats->ecol,
5551			"Excessive collisions");
5552	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5553			CTLFLAG_RD, &stats->scc,
5554			"Single collisions");
5555	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5556			CTLFLAG_RD, &stats->mcc,
5557			"Multiple collisions");
5558	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5559			CTLFLAG_RD, &stats->latecol,
5560			"Late collisions");
5561	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5562			CTLFLAG_RD, &stats->colc,
5563			"Collision Count");
5564	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5565			CTLFLAG_RD, &stats->symerrs,
5566			"Symbol Errors");
5567	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5568			CTLFLAG_RD, &stats->sec,
5569			"Sequence Errors");
5570	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5571			CTLFLAG_RD, &stats->dc,
5572			"Defer Count");
5573	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5574			CTLFLAG_RD, &stats->mpc,
5575			"Missed Packets");
5576	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5577			CTLFLAG_RD, &stats->rnbc,
5578			"Receive No Buffers");
5579	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5580			CTLFLAG_RD, &stats->ruc,
5581			"Receive Undersize");
5582	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5583			CTLFLAG_RD, &stats->rfc,
5584			"Fragmented Packets Received ");
5585	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5586			CTLFLAG_RD, &stats->roc,
5587			"Oversized Packets Received");
5588	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5589			CTLFLAG_RD, &stats->rjc,
5590			"Recevied Jabber");
5591	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5592			CTLFLAG_RD, &stats->rxerrc,
5593			"Receive Errors");
5594	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5595			CTLFLAG_RD, &stats->crcerrs,
5596			"CRC errors");
5597	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5598			CTLFLAG_RD, &stats->algnerrc,
5599			"Alignment Errors");
5600	/* On 82575 these are collision counts */
5601	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5602			CTLFLAG_RD, &stats->cexterr,
5603			"Collision/Carrier extension errors");
5604	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5605			CTLFLAG_RD, &stats->xonrxc,
5606			"XON Received");
5607	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5608			CTLFLAG_RD, &stats->xontxc,
5609			"XON Transmitted");
5610	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5611			CTLFLAG_RD, &stats->xoffrxc,
5612			"XOFF Received");
5613	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5614			CTLFLAG_RD, &stats->xofftxc,
5615			"XOFF Transmitted");
5616	/* Packet Reception Stats */
5617	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5618			CTLFLAG_RD, &stats->tpr,
5619			"Total Packets Received ");
5620	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5621			CTLFLAG_RD, &stats->gprc,
5622			"Good Packets Received");
5623	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5624			CTLFLAG_RD, &stats->bprc,
5625			"Broadcast Packets Received");
5626	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5627			CTLFLAG_RD, &stats->mprc,
5628			"Multicast Packets Received");
5629	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5630			CTLFLAG_RD, &stats->prc64,
5631			"64 byte frames received ");
5632	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5633			CTLFLAG_RD, &stats->prc127,
5634			"65-127 byte frames received");
5635	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5636			CTLFLAG_RD, &stats->prc255,
5637			"128-255 byte frames received");
5638	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5639			CTLFLAG_RD, &stats->prc511,
5640			"256-511 byte frames received");
5641	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5642			CTLFLAG_RD, &stats->prc1023,
5643			"512-1023 byte frames received");
5644	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5645			CTLFLAG_RD, &stats->prc1522,
5646			"1023-1522 byte frames received");
5647 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5648 			CTLFLAG_RD, &stats->gorc,
5649 			"Good Octets Received");
5650
5651	/* Packet Transmission Stats */
5652 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5653 			CTLFLAG_RD, &stats->gotc,
5654 			"Good Octets Transmitted");
5655	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5656			CTLFLAG_RD, &stats->tpt,
5657			"Total Packets Transmitted");
5658	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5659			CTLFLAG_RD, &stats->gptc,
5660			"Good Packets Transmitted");
5661	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5662			CTLFLAG_RD, &stats->bptc,
5663			"Broadcast Packets Transmitted");
5664	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5665			CTLFLAG_RD, &stats->mptc,
5666			"Multicast Packets Transmitted");
5667	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5668			CTLFLAG_RD, &stats->ptc64,
5669			"64 byte frames transmitted ");
5670	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5671			CTLFLAG_RD, &stats->ptc127,
5672			"65-127 byte frames transmitted");
5673	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5674			CTLFLAG_RD, &stats->ptc255,
5675			"128-255 byte frames transmitted");
5676	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5677			CTLFLAG_RD, &stats->ptc511,
5678			"256-511 byte frames transmitted");
5679	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5680			CTLFLAG_RD, &stats->ptc1023,
5681			"512-1023 byte frames transmitted");
5682	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5683			CTLFLAG_RD, &stats->ptc1522,
5684			"1024-1522 byte frames transmitted");
5685	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5686			CTLFLAG_RD, &stats->tsctc,
5687			"TSO Contexts Transmitted");
5688	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5689			CTLFLAG_RD, &stats->tsctfc,
5690			"TSO Contexts Failed");
5691
5692
5693	/* Interrupt Stats */
5694
5695	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5696				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5697	int_list = SYSCTL_CHILDREN(int_node);
5698
5699	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5700			CTLFLAG_RD, &stats->iac,
5701			"Interrupt Assertion Count");
5702
5703	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5704			CTLFLAG_RD, &stats->icrxptc,
5705			"Interrupt Cause Rx Pkt Timer Expire Count");
5706
5707	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5708			CTLFLAG_RD, &stats->icrxatc,
5709			"Interrupt Cause Rx Abs Timer Expire Count");
5710
5711	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5712			CTLFLAG_RD, &stats->ictxptc,
5713			"Interrupt Cause Tx Pkt Timer Expire Count");
5714
5715	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5716			CTLFLAG_RD, &stats->ictxatc,
5717			"Interrupt Cause Tx Abs Timer Expire Count");
5718
5719	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5720			CTLFLAG_RD, &stats->ictxqec,
5721			"Interrupt Cause Tx Queue Empty Count");
5722
5723	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5724			CTLFLAG_RD, &stats->ictxqmtc,
5725			"Interrupt Cause Tx Queue Min Thresh Count");
5726
5727	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5728			CTLFLAG_RD, &stats->icrxdmtc,
5729			"Interrupt Cause Rx Desc Min Thresh Count");
5730
5731	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5732			CTLFLAG_RD, &stats->icrxoc,
5733			"Interrupt Cause Receiver Overrun Count");
5734
5735	/* Host to Card Stats */
5736
5737	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5738				    CTLFLAG_RD, NULL,
5739				    "Host to Card Statistics");
5740
5741	host_list = SYSCTL_CHILDREN(host_node);
5742
5743	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5744			CTLFLAG_RD, &stats->cbtmpc,
5745			"Circuit Breaker Tx Packet Count");
5746
5747	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5748			CTLFLAG_RD, &stats->htdpmc,
5749			"Host Transmit Discarded Packets");
5750
5751	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5752			CTLFLAG_RD, &stats->rpthc,
5753			"Rx Packets To Host");
5754
5755	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5756			CTLFLAG_RD, &stats->cbrmpc,
5757			"Circuit Breaker Rx Packet Count");
5758
5759	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5760			CTLFLAG_RD, &stats->cbrdpc,
5761			"Circuit Breaker Rx Dropped Count");
5762
5763	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5764			CTLFLAG_RD, &stats->hgptc,
5765			"Host Good Packets Tx Count");
5766
5767	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5768			CTLFLAG_RD, &stats->htcbdpc,
5769			"Host Tx Circuit Breaker Dropped Count");
5770
5771	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5772			CTLFLAG_RD, &stats->hgorc,
5773			"Host Good Octets Received Count");
5774
5775	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5776			CTLFLAG_RD, &stats->hgotc,
5777			"Host Good Octets Transmit Count");
5778
5779	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5780			CTLFLAG_RD, &stats->lenerrs,
5781			"Length Errors");
5782
5783	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5784			CTLFLAG_RD, &stats->scvpc,
5785			"SerDes/SGMII Code Violation Pkt Count");
5786
5787	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5788			CTLFLAG_RD, &stats->hrmpc,
5789			"Header Redirection Missed Packet Count");
5790}
5791
5792
5793/**********************************************************************
5794 *
5795 *  This routine provides a way to dump out the adapter eeprom,
5796 *  often a useful debug/service tool. This only dumps the first
5797 *  32 words, stuff that matters is in that extent.
5798 *
5799 **********************************************************************/
5800static int
5801igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5802{
5803	struct adapter *adapter;
5804	int error;
5805	int result;
5806
5807	result = -1;
5808	error = sysctl_handle_int(oidp, &result, 0, req);
5809
5810	if (error || !req->newptr)
5811		return (error);
5812
5813	/*
5814	 * This value will cause a hex dump of the
5815	 * first 32 16-bit words of the EEPROM to
5816	 * the screen.
5817	 */
5818	if (result == 1) {
5819		adapter = (struct adapter *)arg1;
5820		igb_print_nvm_info(adapter);
5821        }
5822
5823	return (error);
5824}
5825
5826static void
5827igb_print_nvm_info(struct adapter *adapter)
5828{
5829	u16	eeprom_data;
5830	int	i, j, row = 0;
5831
5832	/* Its a bit crude, but it gets the job done */
5833	printf("\nInterface EEPROM Dump:\n");
5834	printf("Offset\n0x0000  ");
5835	for (i = 0, j = 0; i < 32; i++, j++) {
5836		if (j == 8) { /* Make the offset block */
5837			j = 0; ++row;
5838			printf("\n0x00%x0  ",row);
5839		}
5840		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5841		printf("%04x ", eeprom_data);
5842	}
5843	printf("\n");
5844}
5845
5846static void
5847igb_set_sysctl_value(struct adapter *adapter, const char *name,
5848	const char *description, int *limit, int value)
5849{
5850	*limit = value;
5851	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5852	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5853	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5854}
5855
5856/*
5857** Set flow control using sysctl:
5858** Flow control values:
5859** 	0 - off
5860**	1 - rx pause
5861**	2 - tx pause
5862**	3 - full
5863*/
5864static int
5865igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5866{
5867	int		error;
5868	static int	input = 3; /* default is full */
5869	struct adapter	*adapter = (struct adapter *) arg1;
5870
5871	error = sysctl_handle_int(oidp, &input, 0, req);
5872
5873	if ((error) || (req->newptr == NULL))
5874		return (error);
5875
5876	switch (input) {
5877		case e1000_fc_rx_pause:
5878		case e1000_fc_tx_pause:
5879		case e1000_fc_full:
5880		case e1000_fc_none:
5881			adapter->hw.fc.requested_mode = input;
5882			adapter->fc = input;
5883			break;
5884		default:
5885			/* Do nothing */
5886			return (error);
5887	}
5888
5889	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5890	e1000_force_mac_fc(&adapter->hw);
5891	return (error);
5892}
5893
5894/*
5895** Manage DMA Coalesce:
5896** Control values:
5897** 	0/1 - off/on
5898**	Legal timer values are:
5899**	250,500,1000-10000 in thousands
5900*/
5901static int
5902igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5903{
5904	struct adapter *adapter = (struct adapter *) arg1;
5905	int		error;
5906
5907	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5908
5909	if ((error) || (req->newptr == NULL))
5910		return (error);
5911
5912	switch (adapter->dmac) {
5913		case 0:
5914			/*Disabling */
5915			break;
5916		case 1: /* Just enable and use default */
5917			adapter->dmac = 1000;
5918			break;
5919		case 250:
5920		case 500:
5921		case 1000:
5922		case 2000:
5923		case 3000:
5924		case 4000:
5925		case 5000:
5926		case 6000:
5927		case 7000:
5928		case 8000:
5929		case 9000:
5930		case 10000:
5931			/* Legal values - allow */
5932			break;
5933		default:
5934			/* Do nothing, illegal value */
5935			adapter->dmac = 0;
5936			return (error);
5937	}
5938	/* Reinit the interface */
5939	igb_init(adapter);
5940	return (error);
5941}
5942