if_igb.c revision 238151
1/******************************************************************************
2
3  Copyright (c) 2001-2012, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 238151 2012-07-05 23:36:17Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.4";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#if __FreeBSD_version >= 800000
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *,
185		    struct tx_ring *, struct mbuf *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280
281#ifdef DEVICE_POLLING
282static poll_handler_t igb_poll;
283#endif /* POLLING */
284
285/*********************************************************************
286 *  FreeBSD Device Interface Entry Points
287 *********************************************************************/
288
289static device_method_t igb_methods[] = {
290	/* Device interface */
291	DEVMETHOD(device_probe, igb_probe),
292	DEVMETHOD(device_attach, igb_attach),
293	DEVMETHOD(device_detach, igb_detach),
294	DEVMETHOD(device_shutdown, igb_shutdown),
295	DEVMETHOD(device_suspend, igb_suspend),
296	DEVMETHOD(device_resume, igb_resume),
297	{0, 0}
298};
299
300static driver_t igb_driver = {
301	"igb", igb_methods, sizeof(struct adapter),
302};
303
304static devclass_t igb_devclass;
305DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
306MODULE_DEPEND(igb, pci, 1, 1, 1);
307MODULE_DEPEND(igb, ether, 1, 1, 1);
308
309/*********************************************************************
310 *  Tunable default values.
311 *********************************************************************/
312
313static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
314
315/* Descriptor defaults */
316static int igb_rxd = IGB_DEFAULT_RXD;
317static int igb_txd = IGB_DEFAULT_TXD;
318TUNABLE_INT("hw.igb.rxd", &igb_rxd);
319TUNABLE_INT("hw.igb.txd", &igb_txd);
320SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
321    "Number of receive descriptors per queue");
322SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
323    "Number of transmit descriptors per queue");
324
325/*
326** AIM: Adaptive Interrupt Moderation
327** which means that the interrupt rate
328** is varied over time based on the
329** traffic for that interrupt vector
330*/
331static int igb_enable_aim = TRUE;
332TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
333SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
334    "Enable adaptive interrupt moderation");
335
336/*
337 * MSIX should be the default for best performance,
338 * but this allows it to be forced off for testing.
339 */
340static int igb_enable_msix = 1;
341TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
342SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
343    "Enable MSI-X interrupts");
344
345/*
346** Tuneable Interrupt rate
347*/
348static int igb_max_interrupt_rate = 8000;
349TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
350SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
351    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
352
353/*
354** Header split causes the packet header to
355** be dma'd to a seperate mbuf from the payload.
356** this can have memory alignment benefits. But
357** another plus is that small packets often fit
358** into the header and thus use no cluster. Its
359** a very workload dependent type feature.
360*/
361static int igb_header_split = FALSE;
362TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
363SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
364    "Enable receive mbuf header split");
365
366/*
367** This will autoconfigure based on
368** the number of CPUs if left at 0.
369*/
370static int igb_num_queues = 0;
371TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
372SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
373    "Number of queues to configure, 0 indicates autoconfigure");
374
375/*
376** Global variable to store last used CPU when binding queues
377** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
378** queue is bound to a cpu.
379*/
380static int igb_last_bind_cpu = -1;
381
382/* How many packets rxeof tries to clean at a time */
383static int igb_rx_process_limit = 100;
384TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
385SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
386    &igb_rx_process_limit, 0,
387    "Maximum number of received packets to process at a time, -1 means unlimited");
388
389#ifdef DEV_NETMAP	/* see ixgbe.c for details */
390#include <dev/netmap/if_igb_netmap.h>
391#endif /* DEV_NETMAP */
392/*********************************************************************
393 *  Device identification routine
394 *
395 *  igb_probe determines if the driver should be loaded on
396 *  adapter based on PCI vendor/device id of the adapter.
397 *
398 *  return BUS_PROBE_DEFAULT on success, positive on failure
399 *********************************************************************/
400
401static int
402igb_probe(device_t dev)
403{
404	char		adapter_name[60];
405	uint16_t	pci_vendor_id = 0;
406	uint16_t	pci_device_id = 0;
407	uint16_t	pci_subvendor_id = 0;
408	uint16_t	pci_subdevice_id = 0;
409	igb_vendor_info_t *ent;
410
411	INIT_DEBUGOUT("igb_probe: begin");
412
413	pci_vendor_id = pci_get_vendor(dev);
414	if (pci_vendor_id != IGB_VENDOR_ID)
415		return (ENXIO);
416
417	pci_device_id = pci_get_device(dev);
418	pci_subvendor_id = pci_get_subvendor(dev);
419	pci_subdevice_id = pci_get_subdevice(dev);
420
421	ent = igb_vendor_info_array;
422	while (ent->vendor_id != 0) {
423		if ((pci_vendor_id == ent->vendor_id) &&
424		    (pci_device_id == ent->device_id) &&
425
426		    ((pci_subvendor_id == ent->subvendor_id) ||
427		    (ent->subvendor_id == PCI_ANY_ID)) &&
428
429		    ((pci_subdevice_id == ent->subdevice_id) ||
430		    (ent->subdevice_id == PCI_ANY_ID))) {
431			sprintf(adapter_name, "%s %s",
432				igb_strings[ent->index],
433				igb_driver_version);
434			device_set_desc_copy(dev, adapter_name);
435			return (BUS_PROBE_DEFAULT);
436		}
437		ent++;
438	}
439
440	return (ENXIO);
441}
442
443/*********************************************************************
444 *  Device initialization routine
445 *
446 *  The attach entry point is called when the driver is being loaded.
447 *  This routine identifies the type of hardware, allocates all resources
448 *  and initializes the hardware.
449 *
450 *  return 0 on success, positive on failure
451 *********************************************************************/
452
453static int
454igb_attach(device_t dev)
455{
456	struct adapter	*adapter;
457	int		error = 0;
458	u16		eeprom_data;
459
460	INIT_DEBUGOUT("igb_attach: begin");
461
462	if (resource_disabled("igb", device_get_unit(dev))) {
463		device_printf(dev, "Disabled by device hint\n");
464		return (ENXIO);
465	}
466
467	adapter = device_get_softc(dev);
468	adapter->dev = adapter->osdep.dev = dev;
469	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
470
471	/* SYSCTL stuff */
472	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
473	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
474	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
475	    igb_sysctl_nvm_info, "I", "NVM Information");
476
477	igb_set_sysctl_value(adapter, "enable_aim",
478	    "Interrupt Moderation", &adapter->enable_aim,
479	    igb_enable_aim);
480
481	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
482	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
483	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
484	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
485
486	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
487
488	/* Determine hardware and mac info */
489	igb_identify_hardware(adapter);
490
491	/* Setup PCI resources */
492	if (igb_allocate_pci_resources(adapter)) {
493		device_printf(dev, "Allocation of PCI resources failed\n");
494		error = ENXIO;
495		goto err_pci;
496	}
497
498	/* Do Shared Code initialization */
499	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
500		device_printf(dev, "Setup of Shared code failed\n");
501		error = ENXIO;
502		goto err_pci;
503	}
504
505	e1000_get_bus_info(&adapter->hw);
506
507	/* Sysctl for limiting the amount of work done in the taskqueue */
508	igb_set_sysctl_value(adapter, "rx_processing_limit",
509	    "max number of rx packets to process",
510	    &adapter->rx_process_limit, igb_rx_process_limit);
511
512	/*
513	 * Validate number of transmit and receive descriptors. It
514	 * must not exceed hardware maximum, and must be multiple
515	 * of E1000_DBA_ALIGN.
516	 */
517	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
518	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
519		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
520		    IGB_DEFAULT_TXD, igb_txd);
521		adapter->num_tx_desc = IGB_DEFAULT_TXD;
522	} else
523		adapter->num_tx_desc = igb_txd;
524	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
525	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
526		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
527		    IGB_DEFAULT_RXD, igb_rxd);
528		adapter->num_rx_desc = IGB_DEFAULT_RXD;
529	} else
530		adapter->num_rx_desc = igb_rxd;
531
532	adapter->hw.mac.autoneg = DO_AUTO_NEG;
533	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
534	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
535
536	/* Copper options */
537	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
538		adapter->hw.phy.mdix = AUTO_ALL_MODES;
539		adapter->hw.phy.disable_polarity_correction = FALSE;
540		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
541	}
542
543	/*
544	 * Set the frame limits assuming
545	 * standard ethernet sized frames.
546	 */
547	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
548	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
549
550	/*
551	** Allocate and Setup Queues
552	*/
553	if (igb_allocate_queues(adapter)) {
554		error = ENOMEM;
555		goto err_pci;
556	}
557
558	/* Allocate the appropriate stats memory */
559	if (adapter->vf_ifp) {
560		adapter->stats =
561		    (struct e1000_vf_stats *)malloc(sizeof \
562		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
563		igb_vf_init_stats(adapter);
564	} else
565		adapter->stats =
566		    (struct e1000_hw_stats *)malloc(sizeof \
567		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
568	if (adapter->stats == NULL) {
569		device_printf(dev, "Can not allocate stats memory\n");
570		error = ENOMEM;
571		goto err_late;
572	}
573
574	/* Allocate multicast array memory. */
575	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
576	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
577	if (adapter->mta == NULL) {
578		device_printf(dev, "Can not allocate multicast setup array\n");
579		error = ENOMEM;
580		goto err_late;
581	}
582
583	/* Some adapter-specific advanced features */
584	if (adapter->hw.mac.type >= e1000_i350) {
585		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
586		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
587		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
588		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
589		igb_set_sysctl_value(adapter, "eee_disabled",
590		    "enable Energy Efficient Ethernet",
591		    (int *)&adapter->hw.dev_spec._82575.eee_disable,
592		    TRUE);
593		if (adapter->hw.phy.media_type == e1000_media_type_copper)
594			e1000_set_eee_i350(&adapter->hw);
595	}
596
597	/*
598	** Start from a known state, this is
599	** important in reading the nvm and
600	** mac from that.
601	*/
602	e1000_reset_hw(&adapter->hw);
603
604	/* Make sure we have a good EEPROM before we read from it */
605	if (((adapter->hw.mac.type != e1000_i210) &&
606	    (adapter->hw.mac.type != e1000_i211)) &&
607	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
608		/*
609		** Some PCI-E parts fail the first check due to
610		** the link being in sleep state, call it again,
611		** if it fails a second time its a real issue.
612		*/
613		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
614			device_printf(dev,
615			    "The EEPROM Checksum Is Not Valid\n");
616			error = EIO;
617			goto err_late;
618		}
619	}
620
621	/*
622	** Copy the permanent MAC address out of the EEPROM
623	*/
624	if (e1000_read_mac_addr(&adapter->hw) < 0) {
625		device_printf(dev, "EEPROM read error while reading MAC"
626		    " address\n");
627		error = EIO;
628		goto err_late;
629	}
630	/* Check its sanity */
631	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
632		device_printf(dev, "Invalid MAC address\n");
633		error = EIO;
634		goto err_late;
635	}
636
637	/* Setup OS specific network interface */
638	if (igb_setup_interface(dev, adapter) != 0)
639		goto err_late;
640
641	/* Now get a good starting state */
642	igb_reset(adapter);
643
644	/* Initialize statistics */
645	igb_update_stats_counters(adapter);
646
647	adapter->hw.mac.get_link_status = 1;
648	igb_update_link_status(adapter);
649
650	/* Indicate SOL/IDER usage */
651	if (e1000_check_reset_block(&adapter->hw))
652		device_printf(dev,
653		    "PHY reset is blocked due to SOL/IDER session.\n");
654
655	/* Determine if we have to control management hardware */
656	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
657
658	/*
659	 * Setup Wake-on-Lan
660	 */
661	/* APME bit in EEPROM is mapped to WUC.APME */
662	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
663	if (eeprom_data)
664		adapter->wol = E1000_WUFC_MAG;
665
666	/* Register for VLAN events */
667	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
668	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
669	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
670	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
671
672	igb_add_hw_stats(adapter);
673
674	/* Tell the stack that the interface is not active */
675	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
676	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
677
678	adapter->led_dev = led_create(igb_led_func, adapter,
679	    device_get_nameunit(dev));
680
681	/*
682	** Configure Interrupts
683	*/
684	if ((adapter->msix > 1) && (igb_enable_msix))
685		error = igb_allocate_msix(adapter);
686	else /* MSI or Legacy */
687		error = igb_allocate_legacy(adapter);
688	if (error)
689		goto err_late;
690
691#ifdef DEV_NETMAP
692	igb_netmap_attach(adapter);
693#endif /* DEV_NETMAP */
694	INIT_DEBUGOUT("igb_attach: end");
695
696	return (0);
697
698err_late:
699	igb_detach(dev);
700	igb_free_transmit_structures(adapter);
701	igb_free_receive_structures(adapter);
702	igb_release_hw_control(adapter);
703err_pci:
704	igb_free_pci_resources(adapter);
705	if (adapter->ifp != NULL)
706		if_free(adapter->ifp);
707	free(adapter->mta, M_DEVBUF);
708	IGB_CORE_LOCK_DESTROY(adapter);
709
710	return (error);
711}
712
713/*********************************************************************
714 *  Device removal routine
715 *
716 *  The detach entry point is called when the driver is being removed.
717 *  This routine stops the adapter and deallocates all the resources
718 *  that were allocated for driver operation.
719 *
720 *  return 0 on success, positive on failure
721 *********************************************************************/
722
723static int
724igb_detach(device_t dev)
725{
726	struct adapter	*adapter = device_get_softc(dev);
727	struct ifnet	*ifp = adapter->ifp;
728
729	INIT_DEBUGOUT("igb_detach: begin");
730
731	/* Make sure VLANS are not using driver */
732	if (adapter->ifp->if_vlantrunk != NULL) {
733		device_printf(dev,"Vlan in use, detach first\n");
734		return (EBUSY);
735	}
736
737	ether_ifdetach(adapter->ifp);
738
739	if (adapter->led_dev != NULL)
740		led_destroy(adapter->led_dev);
741
742#ifdef DEVICE_POLLING
743	if (ifp->if_capenable & IFCAP_POLLING)
744		ether_poll_deregister(ifp);
745#endif
746
747	IGB_CORE_LOCK(adapter);
748	adapter->in_detach = 1;
749	igb_stop(adapter);
750	IGB_CORE_UNLOCK(adapter);
751
752	e1000_phy_hw_reset(&adapter->hw);
753
754	/* Give control back to firmware */
755	igb_release_manageability(adapter);
756	igb_release_hw_control(adapter);
757
758	if (adapter->wol) {
759		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
760		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
761		igb_enable_wakeup(dev);
762	}
763
764	/* Unregister VLAN events */
765	if (adapter->vlan_attach != NULL)
766		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
767	if (adapter->vlan_detach != NULL)
768		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
769
770	callout_drain(&adapter->timer);
771
772#ifdef DEV_NETMAP
773	netmap_detach(adapter->ifp);
774#endif /* DEV_NETMAP */
775	igb_free_pci_resources(adapter);
776	bus_generic_detach(dev);
777	if_free(ifp);
778
779	igb_free_transmit_structures(adapter);
780	igb_free_receive_structures(adapter);
781	if (adapter->mta != NULL)
782		free(adapter->mta, M_DEVBUF);
783
784	IGB_CORE_LOCK_DESTROY(adapter);
785
786	return (0);
787}
788
789/*********************************************************************
790 *
791 *  Shutdown entry point
792 *
793 **********************************************************************/
794
795static int
796igb_shutdown(device_t dev)
797{
798	return igb_suspend(dev);
799}
800
801/*
802 * Suspend/resume device methods.
803 */
804static int
805igb_suspend(device_t dev)
806{
807	struct adapter *adapter = device_get_softc(dev);
808
809	IGB_CORE_LOCK(adapter);
810
811	igb_stop(adapter);
812
813        igb_release_manageability(adapter);
814	igb_release_hw_control(adapter);
815
816        if (adapter->wol) {
817                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
818                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
819                igb_enable_wakeup(dev);
820        }
821
822	IGB_CORE_UNLOCK(adapter);
823
824	return bus_generic_suspend(dev);
825}
826
827static int
828igb_resume(device_t dev)
829{
830	struct adapter *adapter = device_get_softc(dev);
831	struct tx_ring	*txr = adapter->tx_rings;
832	struct ifnet *ifp = adapter->ifp;
833
834	IGB_CORE_LOCK(adapter);
835	igb_init_locked(adapter);
836	igb_init_manageability(adapter);
837
838	if ((ifp->if_flags & IFF_UP) &&
839	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
840		for (int i = 0; i < adapter->num_queues; i++, txr++) {
841			IGB_TX_LOCK(txr);
842#if __FreeBSD_version >= 800000
843			/* Process the stack queue only if not depleted */
844			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
845			    !drbr_empty(ifp, txr->br))
846				igb_mq_start_locked(ifp, txr, NULL);
847#else
848			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
849				igb_start_locked(txr, ifp);
850#endif
851			IGB_TX_UNLOCK(txr);
852		}
853	}
854	IGB_CORE_UNLOCK(adapter);
855
856	return bus_generic_resume(dev);
857}
858
859
860#if __FreeBSD_version < 800000
861
862/*********************************************************************
863 *  Transmit entry point
864 *
865 *  igb_start is called by the stack to initiate a transmit.
866 *  The driver will remain in this routine as long as there are
867 *  packets to transmit and transmit resources are available.
868 *  In case resources are not available stack is notified and
869 *  the packet is requeued.
870 **********************************************************************/
871
872static void
873igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
874{
875	struct adapter	*adapter = ifp->if_softc;
876	struct mbuf	*m_head;
877
878	IGB_TX_LOCK_ASSERT(txr);
879
880	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
881	    IFF_DRV_RUNNING)
882		return;
883	if (!adapter->link_active)
884		return;
885
886	/* Call cleanup if number of TX descriptors low */
887	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
888		igb_txeof(txr);
889
890	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
891		if (txr->tx_avail <= IGB_MAX_SCATTER) {
892			txr->queue_status |= IGB_QUEUE_DEPLETED;
893			break;
894		}
895		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
896		if (m_head == NULL)
897			break;
898		/*
899		 *  Encapsulation can modify our pointer, and or make it
900		 *  NULL on failure.  In that event, we can't requeue.
901		 */
902		if (igb_xmit(txr, &m_head)) {
903			if (m_head != NULL)
904				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
905			if (txr->tx_avail <= IGB_MAX_SCATTER)
906				txr->queue_status |= IGB_QUEUE_DEPLETED;
907			break;
908		}
909
910		/* Send a copy of the frame to the BPF listener */
911		ETHER_BPF_MTAP(ifp, m_head);
912
913		/* Set watchdog on */
914		txr->watchdog_time = ticks;
915		txr->queue_status |= IGB_QUEUE_WORKING;
916	}
917}
918
919/*
920 * Legacy TX driver routine, called from the
921 * stack, always uses tx[0], and spins for it.
922 * Should not be used with multiqueue tx
923 */
924static void
925igb_start(struct ifnet *ifp)
926{
927	struct adapter	*adapter = ifp->if_softc;
928	struct tx_ring	*txr = adapter->tx_rings;
929
930	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
931		IGB_TX_LOCK(txr);
932		igb_start_locked(txr, ifp);
933		IGB_TX_UNLOCK(txr);
934	}
935	return;
936}
937
938#else /* __FreeBSD_version >= 800000 */
939
940/*
941** Multiqueue Transmit driver
942**
943*/
944static int
945igb_mq_start(struct ifnet *ifp, struct mbuf *m)
946{
947	struct adapter		*adapter = ifp->if_softc;
948	struct igb_queue	*que;
949	struct tx_ring		*txr;
950	int 			i, err = 0;
951
952	/* Which queue to use */
953	if ((m->m_flags & M_FLOWID) != 0)
954		i = m->m_pkthdr.flowid % adapter->num_queues;
955	else
956		i = curcpu % adapter->num_queues;
957
958	txr = &adapter->tx_rings[i];
959	que = &adapter->queues[i];
960	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
961	    IGB_TX_TRYLOCK(txr)) {
962		err = igb_mq_start_locked(ifp, txr, m);
963		IGB_TX_UNLOCK(txr);
964	} else {
965		err = drbr_enqueue(ifp, txr->br, m);
966		taskqueue_enqueue(que->tq, &txr->txq_task);
967	}
968
969	return (err);
970}
971
972static int
973igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
974{
975	struct adapter  *adapter = txr->adapter;
976        struct mbuf     *next;
977        int             err = 0, enq;
978
979	IGB_TX_LOCK_ASSERT(txr);
980
981	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
982	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
983	    adapter->link_active == 0) {
984		if (m != NULL)
985			err = drbr_enqueue(ifp, txr->br, m);
986		return (err);
987	}
988
989	enq = 0;
990	if (m == NULL) {
991		next = drbr_dequeue(ifp, txr->br);
992	} else if (drbr_needs_enqueue(ifp, txr->br)) {
993		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
994			return (err);
995		next = drbr_dequeue(ifp, txr->br);
996	} else
997		next = m;
998
999	/* Process the queue */
1000	while (next != NULL) {
1001		if ((err = igb_xmit(txr, &next)) != 0) {
1002			if (next != NULL)
1003				err = drbr_enqueue(ifp, txr->br, next);
1004			break;
1005		}
1006		enq++;
1007		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
1008		ETHER_BPF_MTAP(ifp, next);
1009		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1010			break;
1011		next = drbr_dequeue(ifp, txr->br);
1012	}
1013	if (enq > 0) {
1014		/* Set the watchdog */
1015		txr->queue_status |= IGB_QUEUE_WORKING;
1016		txr->watchdog_time = ticks;
1017	}
1018	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1019		igb_txeof(txr);
1020	if (txr->tx_avail <= IGB_MAX_SCATTER)
1021		txr->queue_status |= IGB_QUEUE_DEPLETED;
1022	return (err);
1023}
1024
1025/*
1026 * Called from a taskqueue to drain queued transmit packets.
1027 */
1028static void
1029igb_deferred_mq_start(void *arg, int pending)
1030{
1031	struct tx_ring *txr = arg;
1032	struct adapter *adapter = txr->adapter;
1033	struct ifnet *ifp = adapter->ifp;
1034
1035	IGB_TX_LOCK(txr);
1036	if (!drbr_empty(ifp, txr->br))
1037		igb_mq_start_locked(ifp, txr, NULL);
1038	IGB_TX_UNLOCK(txr);
1039}
1040
1041/*
1042** Flush all ring buffers
1043*/
1044static void
1045igb_qflush(struct ifnet *ifp)
1046{
1047	struct adapter	*adapter = ifp->if_softc;
1048	struct tx_ring	*txr = adapter->tx_rings;
1049	struct mbuf	*m;
1050
1051	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1052		IGB_TX_LOCK(txr);
1053		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1054			m_freem(m);
1055		IGB_TX_UNLOCK(txr);
1056	}
1057	if_qflush(ifp);
1058}
1059#endif /* __FreeBSD_version >= 800000 */
1060
1061/*********************************************************************
1062 *  Ioctl entry point
1063 *
1064 *  igb_ioctl is called when the user wants to configure the
1065 *  interface.
1066 *
1067 *  return 0 on success, positive on failure
1068 **********************************************************************/
1069
1070static int
1071igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1072{
1073	struct adapter	*adapter = ifp->if_softc;
1074	struct ifreq	*ifr = (struct ifreq *)data;
1075#if defined(INET) || defined(INET6)
1076	struct ifaddr	*ifa = (struct ifaddr *)data;
1077#endif
1078	bool		avoid_reset = FALSE;
1079	int		error = 0;
1080
1081	if (adapter->in_detach)
1082		return (error);
1083
1084	switch (command) {
1085	case SIOCSIFADDR:
1086#ifdef INET
1087		if (ifa->ifa_addr->sa_family == AF_INET)
1088			avoid_reset = TRUE;
1089#endif
1090#ifdef INET6
1091		if (ifa->ifa_addr->sa_family == AF_INET6)
1092			avoid_reset = TRUE;
1093#endif
1094		/*
1095		** Calling init results in link renegotiation,
1096		** so we avoid doing it when possible.
1097		*/
1098		if (avoid_reset) {
1099			ifp->if_flags |= IFF_UP;
1100			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1101				igb_init(adapter);
1102#ifdef INET
1103			if (!(ifp->if_flags & IFF_NOARP))
1104				arp_ifinit(ifp, ifa);
1105#endif
1106		} else
1107			error = ether_ioctl(ifp, command, data);
1108		break;
1109	case SIOCSIFMTU:
1110	    {
1111		int max_frame_size;
1112
1113		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1114
1115		IGB_CORE_LOCK(adapter);
1116		max_frame_size = 9234;
1117		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1118		    ETHER_CRC_LEN) {
1119			IGB_CORE_UNLOCK(adapter);
1120			error = EINVAL;
1121			break;
1122		}
1123
1124		ifp->if_mtu = ifr->ifr_mtu;
1125		adapter->max_frame_size =
1126		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1127		igb_init_locked(adapter);
1128		IGB_CORE_UNLOCK(adapter);
1129		break;
1130	    }
1131	case SIOCSIFFLAGS:
1132		IOCTL_DEBUGOUT("ioctl rcv'd:\
1133		    SIOCSIFFLAGS (Set Interface Flags)");
1134		IGB_CORE_LOCK(adapter);
1135		if (ifp->if_flags & IFF_UP) {
1136			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1137				if ((ifp->if_flags ^ adapter->if_flags) &
1138				    (IFF_PROMISC | IFF_ALLMULTI)) {
1139					igb_disable_promisc(adapter);
1140					igb_set_promisc(adapter);
1141				}
1142			} else
1143				igb_init_locked(adapter);
1144		} else
1145			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1146				igb_stop(adapter);
1147		adapter->if_flags = ifp->if_flags;
1148		IGB_CORE_UNLOCK(adapter);
1149		break;
1150	case SIOCADDMULTI:
1151	case SIOCDELMULTI:
1152		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1153		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1154			IGB_CORE_LOCK(adapter);
1155			igb_disable_intr(adapter);
1156			igb_set_multi(adapter);
1157#ifdef DEVICE_POLLING
1158			if (!(ifp->if_capenable & IFCAP_POLLING))
1159#endif
1160				igb_enable_intr(adapter);
1161			IGB_CORE_UNLOCK(adapter);
1162		}
1163		break;
1164	case SIOCSIFMEDIA:
1165		/* Check SOL/IDER usage */
1166		IGB_CORE_LOCK(adapter);
1167		if (e1000_check_reset_block(&adapter->hw)) {
1168			IGB_CORE_UNLOCK(adapter);
1169			device_printf(adapter->dev, "Media change is"
1170			    " blocked due to SOL/IDER session.\n");
1171			break;
1172		}
1173		IGB_CORE_UNLOCK(adapter);
1174	case SIOCGIFMEDIA:
1175		IOCTL_DEBUGOUT("ioctl rcv'd: \
1176		    SIOCxIFMEDIA (Get/Set Interface Media)");
1177		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1178		break;
1179	case SIOCSIFCAP:
1180	    {
1181		int mask, reinit;
1182
1183		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1184		reinit = 0;
1185		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1186#ifdef DEVICE_POLLING
1187		if (mask & IFCAP_POLLING) {
1188			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1189				error = ether_poll_register(igb_poll, ifp);
1190				if (error)
1191					return (error);
1192				IGB_CORE_LOCK(adapter);
1193				igb_disable_intr(adapter);
1194				ifp->if_capenable |= IFCAP_POLLING;
1195				IGB_CORE_UNLOCK(adapter);
1196			} else {
1197				error = ether_poll_deregister(ifp);
1198				/* Enable interrupt even in error case */
1199				IGB_CORE_LOCK(adapter);
1200				igb_enable_intr(adapter);
1201				ifp->if_capenable &= ~IFCAP_POLLING;
1202				IGB_CORE_UNLOCK(adapter);
1203			}
1204		}
1205#endif
1206		if (mask & IFCAP_HWCSUM) {
1207			ifp->if_capenable ^= IFCAP_HWCSUM;
1208			reinit = 1;
1209		}
1210		if (mask & IFCAP_TSO4) {
1211			ifp->if_capenable ^= IFCAP_TSO4;
1212			reinit = 1;
1213		}
1214		if (mask & IFCAP_VLAN_HWTAGGING) {
1215			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1216			reinit = 1;
1217		}
1218		if (mask & IFCAP_VLAN_HWFILTER) {
1219			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1220			reinit = 1;
1221		}
1222		if (mask & IFCAP_VLAN_HWTSO) {
1223			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1224			reinit = 1;
1225		}
1226		if (mask & IFCAP_LRO) {
1227			ifp->if_capenable ^= IFCAP_LRO;
1228			reinit = 1;
1229		}
1230		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1231			igb_init(adapter);
1232		VLAN_CAPABILITIES(ifp);
1233		break;
1234	    }
1235
1236	default:
1237		error = ether_ioctl(ifp, command, data);
1238		break;
1239	}
1240
1241	return (error);
1242}
1243
1244
1245/*********************************************************************
1246 *  Init entry point
1247 *
1248 *  This routine is used in two ways. It is used by the stack as
1249 *  init entry point in network interface structure. It is also used
1250 *  by the driver as a hw/sw initialization routine to get to a
1251 *  consistent state.
1252 *
1253 *  return 0 on success, positive on failure
1254 **********************************************************************/
1255
1256static void
1257igb_init_locked(struct adapter *adapter)
1258{
1259	struct ifnet	*ifp = adapter->ifp;
1260	device_t	dev = adapter->dev;
1261
1262	INIT_DEBUGOUT("igb_init: begin");
1263
1264	IGB_CORE_LOCK_ASSERT(adapter);
1265
1266	igb_disable_intr(adapter);
1267	callout_stop(&adapter->timer);
1268
1269	/* Get the latest mac address, User can use a LAA */
1270        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1271              ETHER_ADDR_LEN);
1272
1273	/* Put the address into the Receive Address Array */
1274	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1275
1276	igb_reset(adapter);
1277	igb_update_link_status(adapter);
1278
1279	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1280
1281	/* Set hardware offload abilities */
1282	ifp->if_hwassist = 0;
1283	if (ifp->if_capenable & IFCAP_TXCSUM) {
1284		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1285#if __FreeBSD_version >= 800000
1286		if (adapter->hw.mac.type == e1000_82576)
1287			ifp->if_hwassist |= CSUM_SCTP;
1288#endif
1289	}
1290
1291	if (ifp->if_capenable & IFCAP_TSO4)
1292		ifp->if_hwassist |= CSUM_TSO;
1293
1294	/* Configure for OS presence */
1295	igb_init_manageability(adapter);
1296
1297	/* Prepare transmit descriptors and buffers */
1298	igb_setup_transmit_structures(adapter);
1299	igb_initialize_transmit_units(adapter);
1300
1301	/* Setup Multicast table */
1302	igb_set_multi(adapter);
1303
1304	/*
1305	** Figure out the desired mbuf pool
1306	** for doing jumbo/packetsplit
1307	*/
1308	if (adapter->max_frame_size <= 2048)
1309		adapter->rx_mbuf_sz = MCLBYTES;
1310	else if (adapter->max_frame_size <= 4096)
1311		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1312	else
1313		adapter->rx_mbuf_sz = MJUM9BYTES;
1314
1315	/* Prepare receive descriptors and buffers */
1316	if (igb_setup_receive_structures(adapter)) {
1317		device_printf(dev, "Could not setup receive structures\n");
1318		return;
1319	}
1320	igb_initialize_receive_units(adapter);
1321
1322        /* Enable VLAN support */
1323	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1324		igb_setup_vlan_hw_support(adapter);
1325
1326	/* Don't lose promiscuous settings */
1327	igb_set_promisc(adapter);
1328
1329	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1330	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1331
1332	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1333	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1334
1335	if (adapter->msix > 1) /* Set up queue routing */
1336		igb_configure_queues(adapter);
1337
1338	/* this clears any pending interrupts */
1339	E1000_READ_REG(&adapter->hw, E1000_ICR);
1340#ifdef DEVICE_POLLING
1341	/*
1342	 * Only enable interrupts if we are not polling, make sure
1343	 * they are off otherwise.
1344	 */
1345	if (ifp->if_capenable & IFCAP_POLLING)
1346		igb_disable_intr(adapter);
1347	else
1348#endif /* DEVICE_POLLING */
1349	{
1350		igb_enable_intr(adapter);
1351		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1352	}
1353
1354	/* Set Energy Efficient Ethernet */
1355	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1356		e1000_set_eee_i350(&adapter->hw);
1357}
1358
1359static void
1360igb_init(void *arg)
1361{
1362	struct adapter *adapter = arg;
1363
1364	IGB_CORE_LOCK(adapter);
1365	igb_init_locked(adapter);
1366	IGB_CORE_UNLOCK(adapter);
1367}
1368
1369
1370static void
1371igb_handle_que(void *context, int pending)
1372{
1373	struct igb_queue *que = context;
1374	struct adapter *adapter = que->adapter;
1375	struct tx_ring *txr = que->txr;
1376	struct ifnet	*ifp = adapter->ifp;
1377
1378	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1379		bool	more;
1380
1381		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1382
1383		IGB_TX_LOCK(txr);
1384		igb_txeof(txr);
1385#if __FreeBSD_version >= 800000
1386		/* Process the stack queue only if not depleted */
1387		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1388		    !drbr_empty(ifp, txr->br))
1389			igb_mq_start_locked(ifp, txr, NULL);
1390#else
1391		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1392			igb_start_locked(txr, ifp);
1393#endif
1394		IGB_TX_UNLOCK(txr);
1395		/* Do we need another? */
1396		if (more) {
1397			taskqueue_enqueue(que->tq, &que->que_task);
1398			return;
1399		}
1400	}
1401
1402#ifdef DEVICE_POLLING
1403	if (ifp->if_capenable & IFCAP_POLLING)
1404		return;
1405#endif
1406	/* Reenable this interrupt */
1407	if (que->eims)
1408		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1409	else
1410		igb_enable_intr(adapter);
1411}
1412
1413/* Deal with link in a sleepable context */
1414static void
1415igb_handle_link(void *context, int pending)
1416{
1417	struct adapter *adapter = context;
1418
1419	IGB_CORE_LOCK(adapter);
1420	igb_handle_link_locked(adapter);
1421	IGB_CORE_UNLOCK(adapter);
1422}
1423
1424static void
1425igb_handle_link_locked(struct adapter *adapter)
1426{
1427	struct tx_ring	*txr = adapter->tx_rings;
1428	struct ifnet *ifp = adapter->ifp;
1429
1430	IGB_CORE_LOCK_ASSERT(adapter);
1431	adapter->hw.mac.get_link_status = 1;
1432	igb_update_link_status(adapter);
1433	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1434		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1435			IGB_TX_LOCK(txr);
1436#if __FreeBSD_version >= 800000
1437			/* Process the stack queue only if not depleted */
1438			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1439			    !drbr_empty(ifp, txr->br))
1440				igb_mq_start_locked(ifp, txr, NULL);
1441#else
1442			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443				igb_start_locked(txr, ifp);
1444#endif
1445			IGB_TX_UNLOCK(txr);
1446		}
1447	}
1448}
1449
1450/*********************************************************************
1451 *
1452 *  MSI/Legacy Deferred
1453 *  Interrupt Service routine
1454 *
1455 *********************************************************************/
1456static int
1457igb_irq_fast(void *arg)
1458{
1459	struct adapter		*adapter = arg;
1460	struct igb_queue	*que = adapter->queues;
1461	u32			reg_icr;
1462
1463
1464	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1465
1466	/* Hot eject?  */
1467	if (reg_icr == 0xffffffff)
1468		return FILTER_STRAY;
1469
1470	/* Definitely not our interrupt.  */
1471	if (reg_icr == 0x0)
1472		return FILTER_STRAY;
1473
1474	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1475		return FILTER_STRAY;
1476
1477	/*
1478	 * Mask interrupts until the taskqueue is finished running.  This is
1479	 * cheap, just assume that it is needed.  This also works around the
1480	 * MSI message reordering errata on certain systems.
1481	 */
1482	igb_disable_intr(adapter);
1483	taskqueue_enqueue(que->tq, &que->que_task);
1484
1485	/* Link status change */
1486	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1487		taskqueue_enqueue(que->tq, &adapter->link_task);
1488
1489	if (reg_icr & E1000_ICR_RXO)
1490		adapter->rx_overruns++;
1491	return FILTER_HANDLED;
1492}
1493
1494#ifdef DEVICE_POLLING
1495/*********************************************************************
1496 *
1497 *  Legacy polling routine : if using this code you MUST be sure that
1498 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1499 *
1500 *********************************************************************/
1501#if __FreeBSD_version >= 800000
1502#define POLL_RETURN_COUNT(a) (a)
1503static int
1504#else
1505#define POLL_RETURN_COUNT(a)
1506static void
1507#endif
1508igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1509{
1510	struct adapter		*adapter = ifp->if_softc;
1511	struct igb_queue	*que = adapter->queues;
1512	struct tx_ring		*txr = adapter->tx_rings;
1513	u32			reg_icr, rx_done = 0;
1514	u32			loop = IGB_MAX_LOOP;
1515	bool			more;
1516
1517	IGB_CORE_LOCK(adapter);
1518	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1519		IGB_CORE_UNLOCK(adapter);
1520		return POLL_RETURN_COUNT(rx_done);
1521	}
1522
1523	if (cmd == POLL_AND_CHECK_STATUS) {
1524		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1525		/* Link status change */
1526		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1527			igb_handle_link_locked(adapter);
1528
1529		if (reg_icr & E1000_ICR_RXO)
1530			adapter->rx_overruns++;
1531	}
1532	IGB_CORE_UNLOCK(adapter);
1533
1534	igb_rxeof(que, count, &rx_done);
1535
1536	IGB_TX_LOCK(txr);
1537	do {
1538		more = igb_txeof(txr);
1539	} while (loop-- && more);
1540#if __FreeBSD_version >= 800000
1541	if (!drbr_empty(ifp, txr->br))
1542		igb_mq_start_locked(ifp, txr, NULL);
1543#else
1544	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1545		igb_start_locked(txr, ifp);
1546#endif
1547	IGB_TX_UNLOCK(txr);
1548	return POLL_RETURN_COUNT(rx_done);
1549}
1550#endif /* DEVICE_POLLING */
1551
1552/*********************************************************************
1553 *
1554 *  MSIX Que Interrupt Service routine
1555 *
1556 **********************************************************************/
1557static void
1558igb_msix_que(void *arg)
1559{
1560	struct igb_queue *que = arg;
1561	struct adapter *adapter = que->adapter;
1562	struct ifnet   *ifp = adapter->ifp;
1563	struct tx_ring *txr = que->txr;
1564	struct rx_ring *rxr = que->rxr;
1565	u32		newitr = 0;
1566	bool		more_rx;
1567
1568	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1569	++que->irqs;
1570
1571	IGB_TX_LOCK(txr);
1572	igb_txeof(txr);
1573#if __FreeBSD_version >= 800000
1574	/* Process the stack queue only if not depleted */
1575	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1576	    !drbr_empty(ifp, txr->br))
1577		igb_mq_start_locked(ifp, txr, NULL);
1578#else
1579	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1580		igb_start_locked(txr, ifp);
1581#endif
1582	IGB_TX_UNLOCK(txr);
1583
1584	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1585
1586	if (adapter->enable_aim == FALSE)
1587		goto no_calc;
1588	/*
1589	** Do Adaptive Interrupt Moderation:
1590        **  - Write out last calculated setting
1591	**  - Calculate based on average size over
1592	**    the last interval.
1593	*/
1594        if (que->eitr_setting)
1595                E1000_WRITE_REG(&adapter->hw,
1596                    E1000_EITR(que->msix), que->eitr_setting);
1597
1598        que->eitr_setting = 0;
1599
1600        /* Idle, do nothing */
1601        if ((txr->bytes == 0) && (rxr->bytes == 0))
1602                goto no_calc;
1603
1604        /* Used half Default if sub-gig */
1605        if (adapter->link_speed != 1000)
1606                newitr = IGB_DEFAULT_ITR / 2;
1607        else {
1608		if ((txr->bytes) && (txr->packets))
1609                	newitr = txr->bytes/txr->packets;
1610		if ((rxr->bytes) && (rxr->packets))
1611			newitr = max(newitr,
1612			    (rxr->bytes / rxr->packets));
1613                newitr += 24; /* account for hardware frame, crc */
1614		/* set an upper boundary */
1615		newitr = min(newitr, 3000);
1616		/* Be nice to the mid range */
1617                if ((newitr > 300) && (newitr < 1200))
1618                        newitr = (newitr / 3);
1619                else
1620                        newitr = (newitr / 2);
1621        }
1622        newitr &= 0x7FFC;  /* Mask invalid bits */
1623        if (adapter->hw.mac.type == e1000_82575)
1624                newitr |= newitr << 16;
1625        else
1626                newitr |= E1000_EITR_CNT_IGNR;
1627
1628        /* save for next interrupt */
1629        que->eitr_setting = newitr;
1630
1631        /* Reset state */
1632        txr->bytes = 0;
1633        txr->packets = 0;
1634        rxr->bytes = 0;
1635        rxr->packets = 0;
1636
1637no_calc:
1638	/* Schedule a clean task if needed*/
1639	if (more_rx)
1640		taskqueue_enqueue(que->tq, &que->que_task);
1641	else
1642		/* Reenable this interrupt */
1643		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1644	return;
1645}
1646
1647
1648/*********************************************************************
1649 *
1650 *  MSIX Link Interrupt Service routine
1651 *
1652 **********************************************************************/
1653
1654static void
1655igb_msix_link(void *arg)
1656{
1657	struct adapter	*adapter = arg;
1658	u32       	icr;
1659
1660	++adapter->link_irq;
1661	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1662	if (!(icr & E1000_ICR_LSC))
1663		goto spurious;
1664	igb_handle_link(adapter, 0);
1665
1666spurious:
1667	/* Rearm */
1668	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1669	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1670	return;
1671}
1672
1673
1674/*********************************************************************
1675 *
1676 *  Media Ioctl callback
1677 *
1678 *  This routine is called whenever the user queries the status of
1679 *  the interface using ifconfig.
1680 *
1681 **********************************************************************/
1682static void
1683igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1684{
1685	struct adapter *adapter = ifp->if_softc;
1686	u_char fiber_type = IFM_1000_SX;
1687
1688	INIT_DEBUGOUT("igb_media_status: begin");
1689
1690	IGB_CORE_LOCK(adapter);
1691	igb_update_link_status(adapter);
1692
1693	ifmr->ifm_status = IFM_AVALID;
1694	ifmr->ifm_active = IFM_ETHER;
1695
1696	if (!adapter->link_active) {
1697		IGB_CORE_UNLOCK(adapter);
1698		return;
1699	}
1700
1701	ifmr->ifm_status |= IFM_ACTIVE;
1702
1703	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1704	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1705		ifmr->ifm_active |= fiber_type | IFM_FDX;
1706	else {
1707		switch (adapter->link_speed) {
1708		case 10:
1709			ifmr->ifm_active |= IFM_10_T;
1710			break;
1711		case 100:
1712			ifmr->ifm_active |= IFM_100_TX;
1713			break;
1714		case 1000:
1715			ifmr->ifm_active |= IFM_1000_T;
1716			break;
1717		}
1718		if (adapter->link_duplex == FULL_DUPLEX)
1719			ifmr->ifm_active |= IFM_FDX;
1720		else
1721			ifmr->ifm_active |= IFM_HDX;
1722	}
1723	IGB_CORE_UNLOCK(adapter);
1724}
1725
1726/*********************************************************************
1727 *
1728 *  Media Ioctl callback
1729 *
1730 *  This routine is called when the user changes speed/duplex using
1731 *  media/mediopt option with ifconfig.
1732 *
1733 **********************************************************************/
1734static int
1735igb_media_change(struct ifnet *ifp)
1736{
1737	struct adapter *adapter = ifp->if_softc;
1738	struct ifmedia  *ifm = &adapter->media;
1739
1740	INIT_DEBUGOUT("igb_media_change: begin");
1741
1742	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1743		return (EINVAL);
1744
1745	IGB_CORE_LOCK(adapter);
1746	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1747	case IFM_AUTO:
1748		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1749		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1750		break;
1751	case IFM_1000_LX:
1752	case IFM_1000_SX:
1753	case IFM_1000_T:
1754		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1755		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1756		break;
1757	case IFM_100_TX:
1758		adapter->hw.mac.autoneg = FALSE;
1759		adapter->hw.phy.autoneg_advertised = 0;
1760		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1761			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1762		else
1763			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1764		break;
1765	case IFM_10_T:
1766		adapter->hw.mac.autoneg = FALSE;
1767		adapter->hw.phy.autoneg_advertised = 0;
1768		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1769			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1770		else
1771			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1772		break;
1773	default:
1774		device_printf(adapter->dev, "Unsupported media type\n");
1775	}
1776
1777	igb_init_locked(adapter);
1778	IGB_CORE_UNLOCK(adapter);
1779
1780	return (0);
1781}
1782
1783
1784/*********************************************************************
1785 *
1786 *  This routine maps the mbufs to Advanced TX descriptors.
1787 *
1788 **********************************************************************/
1789static int
1790igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1791{
1792	struct adapter		*adapter = txr->adapter;
1793	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1794	bus_dmamap_t		map;
1795	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1796	union e1000_adv_tx_desc	*txd = NULL;
1797	struct mbuf		*m_head = *m_headp;
1798	struct ether_vlan_header *eh = NULL;
1799	struct ip		*ip = NULL;
1800	struct tcphdr		*th = NULL;
1801	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1802	int			ehdrlen, poff;
1803	int			nsegs, i, first, last = 0;
1804	int			error, do_tso, remap = 1;
1805
1806	/* Set basic descriptor constants */
1807	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1808	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1809	if (m_head->m_flags & M_VLANTAG)
1810		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1811
1812retry:
1813	m_head = *m_headp;
1814	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1815	hdrlen = ehdrlen = poff = 0;
1816
1817	/*
1818	 * Intel recommends entire IP/TCP header length reside in a single
1819	 * buffer. If multiple descriptors are used to describe the IP and
1820	 * TCP header, each descriptor should describe one or more
1821	 * complete headers; descriptors referencing only parts of headers
1822	 * are not supported. If all layer headers are not coalesced into
1823	 * a single buffer, each buffer should not cross a 4KB boundary,
1824	 * or be larger than the maximum read request size.
1825	 * Controller also requires modifing IP/TCP header to make TSO work
1826	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1827	 * IP/TCP header into a single buffer to meet the requirement of
1828	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1829	 * which also has similiar restrictions.
1830	 */
1831	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1832		if (do_tso || (m_head->m_next != NULL &&
1833		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1834			if (M_WRITABLE(*m_headp) == 0) {
1835				m_head = m_dup(*m_headp, M_DONTWAIT);
1836				m_freem(*m_headp);
1837				if (m_head == NULL) {
1838					*m_headp = NULL;
1839					return (ENOBUFS);
1840				}
1841				*m_headp = m_head;
1842			}
1843		}
1844		/*
1845		 * Assume IPv4, we don't have TSO/checksum offload support
1846		 * for IPv6 yet.
1847		 */
1848		ehdrlen = sizeof(struct ether_header);
1849		m_head = m_pullup(m_head, ehdrlen);
1850		if (m_head == NULL) {
1851			*m_headp = NULL;
1852			return (ENOBUFS);
1853		}
1854		eh = mtod(m_head, struct ether_vlan_header *);
1855		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1856			ehdrlen = sizeof(struct ether_vlan_header);
1857			m_head = m_pullup(m_head, ehdrlen);
1858			if (m_head == NULL) {
1859				*m_headp = NULL;
1860				return (ENOBUFS);
1861			}
1862		}
1863		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1864		if (m_head == NULL) {
1865			*m_headp = NULL;
1866			return (ENOBUFS);
1867		}
1868		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1869		poff = ehdrlen + (ip->ip_hl << 2);
1870		if (do_tso) {
1871			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1872			if (m_head == NULL) {
1873				*m_headp = NULL;
1874				return (ENOBUFS);
1875			}
1876			/*
1877			 * The pseudo TCP checksum does not include TCP payload
1878			 * length so driver should recompute the checksum here
1879			 * what hardware expect to see. This is adherence of
1880			 * Microsoft's Large Send specification.
1881			 */
1882			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1883			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1884			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1885			/* Keep track of the full header length */
1886			hdrlen = poff + (th->th_off << 2);
1887		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1888			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1889			if (m_head == NULL) {
1890				*m_headp = NULL;
1891				return (ENOBUFS);
1892			}
1893			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1895			if (m_head == NULL) {
1896				*m_headp = NULL;
1897				return (ENOBUFS);
1898			}
1899			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1900			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1901		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1902			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1903			if (m_head == NULL) {
1904				*m_headp = NULL;
1905				return (ENOBUFS);
1906			}
1907			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1908		}
1909		*m_headp = m_head;
1910	}
1911
1912	/*
1913	 * Map the packet for DMA
1914	 *
1915	 * Capture the first descriptor index,
1916	 * this descriptor will have the index
1917	 * of the EOP which is the only one that
1918	 * now gets a DONE bit writeback.
1919	 */
1920	first = txr->next_avail_desc;
1921	tx_buffer = &txr->tx_buffers[first];
1922	tx_buffer_mapped = tx_buffer;
1923	map = tx_buffer->map;
1924
1925	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1926	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1927
1928	/*
1929	 * There are two types of errors we can (try) to handle:
1930	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1931	 *   out of segments.  Defragment the mbuf chain and try again.
1932	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1933	 *   at this point in time.  Defer sending and try again later.
1934	 * All other errors, in particular EINVAL, are fatal and prevent the
1935	 * mbuf chain from ever going through.  Drop it and report error.
1936	 */
1937	if (error == EFBIG && remap) {
1938		struct mbuf *m;
1939
1940		m = m_defrag(*m_headp, M_DONTWAIT);
1941		if (m == NULL) {
1942			adapter->mbuf_defrag_failed++;
1943			m_freem(*m_headp);
1944			*m_headp = NULL;
1945			return (ENOBUFS);
1946		}
1947		*m_headp = m;
1948
1949		/* Try it again, but only once */
1950		remap = 0;
1951		goto retry;
1952	} else if (error == ENOMEM) {
1953		adapter->no_tx_dma_setup++;
1954		return (error);
1955	} else if (error != 0) {
1956		adapter->no_tx_dma_setup++;
1957		m_freem(*m_headp);
1958		*m_headp = NULL;
1959		return (error);
1960	}
1961
1962	/*
1963	** Make sure we don't overrun the ring,
1964	** we need nsegs descriptors and one for
1965	** the context descriptor used for the
1966	** offloads.
1967	*/
1968        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1969                txr->no_desc_avail++;
1970		bus_dmamap_unload(txr->txtag, map);
1971		return (ENOBUFS);
1972        }
1973	m_head = *m_headp;
1974
1975	/* Do hardware assists:
1976         * Set up the context descriptor, used
1977         * when any hardware offload is done.
1978         * This includes CSUM, VLAN, and TSO.
1979         * It will use the first descriptor.
1980         */
1981
1982	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1983		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1984			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1985			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1986			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1987		} else
1988			return (ENXIO);
1989	} else if (igb_tx_ctx_setup(txr, m_head))
1990			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1991
1992	/* Calculate payload length */
1993	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1994	    << E1000_ADVTXD_PAYLEN_SHIFT);
1995
1996	/* 82575 needs the queue index added */
1997	if (adapter->hw.mac.type == e1000_82575)
1998		olinfo_status |= txr->me << 4;
1999
2000	/* Set up our transmit descriptors */
2001	i = txr->next_avail_desc;
2002	for (int j = 0; j < nsegs; j++) {
2003		bus_size_t seg_len;
2004		bus_addr_t seg_addr;
2005
2006		tx_buffer = &txr->tx_buffers[i];
2007		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2008		seg_addr = segs[j].ds_addr;
2009		seg_len  = segs[j].ds_len;
2010
2011		txd->read.buffer_addr = htole64(seg_addr);
2012		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2013		txd->read.olinfo_status = htole32(olinfo_status);
2014		last = i;
2015		if (++i == adapter->num_tx_desc)
2016			i = 0;
2017		tx_buffer->m_head = NULL;
2018		tx_buffer->next_eop = -1;
2019	}
2020
2021	txr->next_avail_desc = i;
2022	txr->tx_avail -= nsegs;
2023        tx_buffer->m_head = m_head;
2024
2025	/*
2026	** Here we swap the map so the last descriptor,
2027	** which gets the completion interrupt has the
2028	** real map, and the first descriptor gets the
2029	** unused map from this descriptor.
2030	*/
2031	tx_buffer_mapped->map = tx_buffer->map;
2032	tx_buffer->map = map;
2033        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2034
2035        /*
2036         * Last Descriptor of Packet
2037	 * needs End Of Packet (EOP)
2038	 * and Report Status (RS)
2039         */
2040        txd->read.cmd_type_len |=
2041	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2042	/*
2043	 * Keep track in the first buffer which
2044	 * descriptor will be written back
2045	 */
2046	tx_buffer = &txr->tx_buffers[first];
2047	tx_buffer->next_eop = last;
2048	/* Update the watchdog time early and often */
2049	txr->watchdog_time = ticks;
2050
2051	/*
2052	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2053	 * that this frame is available to transmit.
2054	 */
2055	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2056	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2057	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2058	++txr->tx_packets;
2059
2060	return (0);
2061}
2062static void
2063igb_set_promisc(struct adapter *adapter)
2064{
2065	struct ifnet	*ifp = adapter->ifp;
2066	struct e1000_hw *hw = &adapter->hw;
2067	u32		reg;
2068
2069	if (adapter->vf_ifp) {
2070		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2071		return;
2072	}
2073
2074	reg = E1000_READ_REG(hw, E1000_RCTL);
2075	if (ifp->if_flags & IFF_PROMISC) {
2076		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2077		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2078	} else if (ifp->if_flags & IFF_ALLMULTI) {
2079		reg |= E1000_RCTL_MPE;
2080		reg &= ~E1000_RCTL_UPE;
2081		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2082	}
2083}
2084
2085static void
2086igb_disable_promisc(struct adapter *adapter)
2087{
2088	struct e1000_hw *hw = &adapter->hw;
2089	u32		reg;
2090
2091	if (adapter->vf_ifp) {
2092		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2093		return;
2094	}
2095	reg = E1000_READ_REG(hw, E1000_RCTL);
2096	reg &=  (~E1000_RCTL_UPE);
2097	reg &=  (~E1000_RCTL_MPE);
2098	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2099}
2100
2101
2102/*********************************************************************
2103 *  Multicast Update
2104 *
2105 *  This routine is called whenever multicast address list is updated.
2106 *
2107 **********************************************************************/
2108
2109static void
2110igb_set_multi(struct adapter *adapter)
2111{
2112	struct ifnet	*ifp = adapter->ifp;
2113	struct ifmultiaddr *ifma;
2114	u32 reg_rctl = 0;
2115	u8  *mta;
2116
2117	int mcnt = 0;
2118
2119	IOCTL_DEBUGOUT("igb_set_multi: begin");
2120
2121	mta = adapter->mta;
2122	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2123	    MAX_NUM_MULTICAST_ADDRESSES);
2124
2125#if __FreeBSD_version < 800000
2126	IF_ADDR_LOCK(ifp);
2127#else
2128	if_maddr_rlock(ifp);
2129#endif
2130	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2131		if (ifma->ifma_addr->sa_family != AF_LINK)
2132			continue;
2133
2134		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2135			break;
2136
2137		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2138		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2139		mcnt++;
2140	}
2141#if __FreeBSD_version < 800000
2142	IF_ADDR_UNLOCK(ifp);
2143#else
2144	if_maddr_runlock(ifp);
2145#endif
2146
2147	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2148		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2149		reg_rctl |= E1000_RCTL_MPE;
2150		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2151	} else
2152		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2153}
2154
2155
2156/*********************************************************************
2157 *  Timer routine:
2158 *  	This routine checks for link status,
2159 *	updates statistics, and does the watchdog.
2160 *
2161 **********************************************************************/
2162
2163static void
2164igb_local_timer(void *arg)
2165{
2166	struct adapter		*adapter = arg;
2167	device_t		dev = adapter->dev;
2168	struct ifnet		*ifp = adapter->ifp;
2169	struct tx_ring		*txr = adapter->tx_rings;
2170	struct igb_queue	*que = adapter->queues;
2171	int			hung = 0, busy = 0;
2172
2173
2174	IGB_CORE_LOCK_ASSERT(adapter);
2175
2176	igb_update_link_status(adapter);
2177	igb_update_stats_counters(adapter);
2178
2179        /*
2180        ** Check the TX queues status
2181	**	- central locked handling of OACTIVE
2182	**	- watchdog only if all queues show hung
2183        */
2184	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2185		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2186		    (adapter->pause_frames == 0))
2187			++hung;
2188		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2189			++busy;
2190		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2191			taskqueue_enqueue(que->tq, &que->que_task);
2192	}
2193	if (hung == adapter->num_queues)
2194		goto timeout;
2195	if (busy == adapter->num_queues)
2196		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2197	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2198	    (busy < adapter->num_queues))
2199		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2200
2201	adapter->pause_frames = 0;
2202	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2203#ifndef DEVICE_POLLING
2204	/* Schedule all queue interrupts - deadlock protection */
2205	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2206#endif
2207	return;
2208
2209timeout:
2210	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2211	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2212            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2213            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2214	device_printf(dev,"TX(%d) desc avail = %d,"
2215            "Next TX to Clean = %d\n",
2216            txr->me, txr->tx_avail, txr->next_to_clean);
2217	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2218	adapter->watchdog_events++;
2219	igb_init_locked(adapter);
2220}
2221
2222static void
2223igb_update_link_status(struct adapter *adapter)
2224{
2225	struct e1000_hw *hw = &adapter->hw;
2226	struct ifnet *ifp = adapter->ifp;
2227	device_t dev = adapter->dev;
2228	struct tx_ring *txr = adapter->tx_rings;
2229	u32 link_check, thstat, ctrl;
2230
2231	link_check = thstat = ctrl = 0;
2232
2233	/* Get the cached link value or read for real */
2234        switch (hw->phy.media_type) {
2235        case e1000_media_type_copper:
2236                if (hw->mac.get_link_status) {
2237			/* Do the work to read phy */
2238                        e1000_check_for_link(hw);
2239                        link_check = !hw->mac.get_link_status;
2240                } else
2241                        link_check = TRUE;
2242                break;
2243        case e1000_media_type_fiber:
2244                e1000_check_for_link(hw);
2245                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2246                                 E1000_STATUS_LU);
2247                break;
2248        case e1000_media_type_internal_serdes:
2249                e1000_check_for_link(hw);
2250                link_check = adapter->hw.mac.serdes_has_link;
2251                break;
2252	/* VF device is type_unknown */
2253        case e1000_media_type_unknown:
2254                e1000_check_for_link(hw);
2255		link_check = !hw->mac.get_link_status;
2256		/* Fall thru */
2257        default:
2258                break;
2259        }
2260
2261	/* Check for thermal downshift or shutdown */
2262	if (hw->mac.type == e1000_i350) {
2263		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2264		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2265	}
2266
2267	/* Now we check if a transition has happened */
2268	if (link_check && (adapter->link_active == 0)) {
2269		e1000_get_speed_and_duplex(&adapter->hw,
2270		    &adapter->link_speed, &adapter->link_duplex);
2271		if (bootverbose)
2272			device_printf(dev, "Link is up %d Mbps %s\n",
2273			    adapter->link_speed,
2274			    ((adapter->link_duplex == FULL_DUPLEX) ?
2275			    "Full Duplex" : "Half Duplex"));
2276		adapter->link_active = 1;
2277		ifp->if_baudrate = adapter->link_speed * 1000000;
2278		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2279		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2280			device_printf(dev, "Link: thermal downshift\n");
2281		/* This can sleep */
2282		if_link_state_change(ifp, LINK_STATE_UP);
2283	} else if (!link_check && (adapter->link_active == 1)) {
2284		ifp->if_baudrate = adapter->link_speed = 0;
2285		adapter->link_duplex = 0;
2286		if (bootverbose)
2287			device_printf(dev, "Link is Down\n");
2288		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2289		    (thstat & E1000_THSTAT_PWR_DOWN))
2290			device_printf(dev, "Link: thermal shutdown\n");
2291		adapter->link_active = 0;
2292		/* This can sleep */
2293		if_link_state_change(ifp, LINK_STATE_DOWN);
2294		/* Reset queue state */
2295		for (int i = 0; i < adapter->num_queues; i++, txr++)
2296			txr->queue_status = IGB_QUEUE_IDLE;
2297	}
2298}
2299
2300/*********************************************************************
2301 *
2302 *  This routine disables all traffic on the adapter by issuing a
2303 *  global reset on the MAC and deallocates TX/RX buffers.
2304 *
2305 **********************************************************************/
2306
2307static void
2308igb_stop(void *arg)
2309{
2310	struct adapter	*adapter = arg;
2311	struct ifnet	*ifp = adapter->ifp;
2312	struct tx_ring *txr = adapter->tx_rings;
2313
2314	IGB_CORE_LOCK_ASSERT(adapter);
2315
2316	INIT_DEBUGOUT("igb_stop: begin");
2317
2318	igb_disable_intr(adapter);
2319
2320	callout_stop(&adapter->timer);
2321
2322	/* Tell the stack that the interface is no longer active */
2323	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2324	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2325
2326	/* Disarm watchdog timer. */
2327	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2328		IGB_TX_LOCK(txr);
2329		txr->queue_status = IGB_QUEUE_IDLE;
2330		IGB_TX_UNLOCK(txr);
2331	}
2332
2333	e1000_reset_hw(&adapter->hw);
2334	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2335
2336	e1000_led_off(&adapter->hw);
2337	e1000_cleanup_led(&adapter->hw);
2338}
2339
2340
2341/*********************************************************************
2342 *
2343 *  Determine hardware revision.
2344 *
2345 **********************************************************************/
2346static void
2347igb_identify_hardware(struct adapter *adapter)
2348{
2349	device_t dev = adapter->dev;
2350
2351	/* Make sure our PCI config space has the necessary stuff set */
2352	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2353	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2354	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2355		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2356		    "bits were not set!\n");
2357		adapter->hw.bus.pci_cmd_word |=
2358		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2359		pci_write_config(dev, PCIR_COMMAND,
2360		    adapter->hw.bus.pci_cmd_word, 2);
2361	}
2362
2363	/* Save off the information about this board */
2364	adapter->hw.vendor_id = pci_get_vendor(dev);
2365	adapter->hw.device_id = pci_get_device(dev);
2366	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2367	adapter->hw.subsystem_vendor_id =
2368	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2369	adapter->hw.subsystem_device_id =
2370	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2371
2372	/* Set MAC type early for PCI setup */
2373	e1000_set_mac_type(&adapter->hw);
2374
2375	/* Are we a VF device? */
2376	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2377	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2378		adapter->vf_ifp = 1;
2379	else
2380		adapter->vf_ifp = 0;
2381}
2382
2383static int
2384igb_allocate_pci_resources(struct adapter *adapter)
2385{
2386	device_t	dev = adapter->dev;
2387	int		rid;
2388
2389	rid = PCIR_BAR(0);
2390	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2391	    &rid, RF_ACTIVE);
2392	if (adapter->pci_mem == NULL) {
2393		device_printf(dev, "Unable to allocate bus resource: memory\n");
2394		return (ENXIO);
2395	}
2396	adapter->osdep.mem_bus_space_tag =
2397	    rman_get_bustag(adapter->pci_mem);
2398	adapter->osdep.mem_bus_space_handle =
2399	    rman_get_bushandle(adapter->pci_mem);
2400	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2401
2402	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2403
2404	/* This will setup either MSI/X or MSI */
2405	adapter->msix = igb_setup_msix(adapter);
2406	adapter->hw.back = &adapter->osdep;
2407
2408	return (0);
2409}
2410
2411/*********************************************************************
2412 *
2413 *  Setup the Legacy or MSI Interrupt handler
2414 *
2415 **********************************************************************/
2416static int
2417igb_allocate_legacy(struct adapter *adapter)
2418{
2419	device_t		dev = adapter->dev;
2420	struct igb_queue	*que = adapter->queues;
2421	struct tx_ring		*txr = adapter->tx_rings;
2422	int			error, rid = 0;
2423
2424	/* Turn off all interrupts */
2425	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2426
2427	/* MSI RID is 1 */
2428	if (adapter->msix == 1)
2429		rid = 1;
2430
2431	/* We allocate a single interrupt resource */
2432	adapter->res = bus_alloc_resource_any(dev,
2433	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2434	if (adapter->res == NULL) {
2435		device_printf(dev, "Unable to allocate bus resource: "
2436		    "interrupt\n");
2437		return (ENXIO);
2438	}
2439
2440#if __FreeBSD_version >= 800000
2441	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2442#endif
2443
2444	/*
2445	 * Try allocating a fast interrupt and the associated deferred
2446	 * processing contexts.
2447	 */
2448	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2449	/* Make tasklet for deferred link handling */
2450	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2451	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2452	    taskqueue_thread_enqueue, &que->tq);
2453	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2454	    device_get_nameunit(adapter->dev));
2455	if ((error = bus_setup_intr(dev, adapter->res,
2456	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2457	    adapter, &adapter->tag)) != 0) {
2458		device_printf(dev, "Failed to register fast interrupt "
2459			    "handler: %d\n", error);
2460		taskqueue_free(que->tq);
2461		que->tq = NULL;
2462		return (error);
2463	}
2464
2465	return (0);
2466}
2467
2468
2469/*********************************************************************
2470 *
2471 *  Setup the MSIX Queue Interrupt handlers:
2472 *
2473 **********************************************************************/
2474static int
2475igb_allocate_msix(struct adapter *adapter)
2476{
2477	device_t		dev = adapter->dev;
2478	struct igb_queue	*que = adapter->queues;
2479	int			error, rid, vector = 0;
2480
2481	/* Be sure to start with all interrupts disabled */
2482	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2483	E1000_WRITE_FLUSH(&adapter->hw);
2484
2485	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2486		rid = vector +1;
2487		que->res = bus_alloc_resource_any(dev,
2488		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2489		if (que->res == NULL) {
2490			device_printf(dev,
2491			    "Unable to allocate bus resource: "
2492			    "MSIX Queue Interrupt\n");
2493			return (ENXIO);
2494		}
2495		error = bus_setup_intr(dev, que->res,
2496	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2497		    igb_msix_que, que, &que->tag);
2498		if (error) {
2499			que->res = NULL;
2500			device_printf(dev, "Failed to register Queue handler");
2501			return (error);
2502		}
2503#if __FreeBSD_version >= 800504
2504		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2505#endif
2506		que->msix = vector;
2507		if (adapter->hw.mac.type == e1000_82575)
2508			que->eims = E1000_EICR_TX_QUEUE0 << i;
2509		else
2510			que->eims = 1 << vector;
2511		/*
2512		** Bind the msix vector, and thus the
2513		** rings to the corresponding cpu.
2514		*/
2515		if (adapter->num_queues > 1) {
2516			if (igb_last_bind_cpu < 0)
2517				igb_last_bind_cpu = CPU_FIRST();
2518			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2519			device_printf(dev,
2520				"Bound queue %d to cpu %d\n",
2521				i,igb_last_bind_cpu);
2522			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2523			igb_last_bind_cpu = igb_last_bind_cpu % mp_ncpus;
2524		}
2525#if __FreeBSD_version >= 800000
2526		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2527		    que->txr);
2528#endif
2529		/* Make tasklet for deferred handling */
2530		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2531		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2532		    taskqueue_thread_enqueue, &que->tq);
2533		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2534		    device_get_nameunit(adapter->dev));
2535	}
2536
2537	/* And Link */
2538	rid = vector + 1;
2539	adapter->res = bus_alloc_resource_any(dev,
2540	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2541	if (adapter->res == NULL) {
2542		device_printf(dev,
2543		    "Unable to allocate bus resource: "
2544		    "MSIX Link Interrupt\n");
2545		return (ENXIO);
2546	}
2547	if ((error = bus_setup_intr(dev, adapter->res,
2548	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2549	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2550		device_printf(dev, "Failed to register Link handler");
2551		return (error);
2552	}
2553#if __FreeBSD_version >= 800504
2554	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2555#endif
2556	adapter->linkvec = vector;
2557
2558	return (0);
2559}
2560
2561
2562static void
2563igb_configure_queues(struct adapter *adapter)
2564{
2565	struct	e1000_hw	*hw = &adapter->hw;
2566	struct	igb_queue	*que;
2567	u32			tmp, ivar = 0, newitr = 0;
2568
2569	/* First turn on RSS capability */
2570	if (adapter->hw.mac.type != e1000_82575)
2571		E1000_WRITE_REG(hw, E1000_GPIE,
2572		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2573		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2574
2575	/* Turn on MSIX */
2576	switch (adapter->hw.mac.type) {
2577	case e1000_82580:
2578	case e1000_i350:
2579	case e1000_i210:
2580	case e1000_i211:
2581	case e1000_vfadapt:
2582	case e1000_vfadapt_i350:
2583		/* RX entries */
2584		for (int i = 0; i < adapter->num_queues; i++) {
2585			u32 index = i >> 1;
2586			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2587			que = &adapter->queues[i];
2588			if (i & 1) {
2589				ivar &= 0xFF00FFFF;
2590				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2591			} else {
2592				ivar &= 0xFFFFFF00;
2593				ivar |= que->msix | E1000_IVAR_VALID;
2594			}
2595			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2596		}
2597		/* TX entries */
2598		for (int i = 0; i < adapter->num_queues; i++) {
2599			u32 index = i >> 1;
2600			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2601			que = &adapter->queues[i];
2602			if (i & 1) {
2603				ivar &= 0x00FFFFFF;
2604				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2605			} else {
2606				ivar &= 0xFFFF00FF;
2607				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2608			}
2609			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2610			adapter->que_mask |= que->eims;
2611		}
2612
2613		/* And for the link interrupt */
2614		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2615		adapter->link_mask = 1 << adapter->linkvec;
2616		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2617		break;
2618	case e1000_82576:
2619		/* RX entries */
2620		for (int i = 0; i < adapter->num_queues; i++) {
2621			u32 index = i & 0x7; /* Each IVAR has two entries */
2622			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2623			que = &adapter->queues[i];
2624			if (i < 8) {
2625				ivar &= 0xFFFFFF00;
2626				ivar |= que->msix | E1000_IVAR_VALID;
2627			} else {
2628				ivar &= 0xFF00FFFF;
2629				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2630			}
2631			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2632			adapter->que_mask |= que->eims;
2633		}
2634		/* TX entries */
2635		for (int i = 0; i < adapter->num_queues; i++) {
2636			u32 index = i & 0x7; /* Each IVAR has two entries */
2637			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2638			que = &adapter->queues[i];
2639			if (i < 8) {
2640				ivar &= 0xFFFF00FF;
2641				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2642			} else {
2643				ivar &= 0x00FFFFFF;
2644				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2645			}
2646			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2647			adapter->que_mask |= que->eims;
2648		}
2649
2650		/* And for the link interrupt */
2651		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2652		adapter->link_mask = 1 << adapter->linkvec;
2653		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2654		break;
2655
2656	case e1000_82575:
2657                /* enable MSI-X support*/
2658		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2659                tmp |= E1000_CTRL_EXT_PBA_CLR;
2660                /* Auto-Mask interrupts upon ICR read. */
2661                tmp |= E1000_CTRL_EXT_EIAME;
2662                tmp |= E1000_CTRL_EXT_IRCA;
2663                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2664
2665		/* Queues */
2666		for (int i = 0; i < adapter->num_queues; i++) {
2667			que = &adapter->queues[i];
2668			tmp = E1000_EICR_RX_QUEUE0 << i;
2669			tmp |= E1000_EICR_TX_QUEUE0 << i;
2670			que->eims = tmp;
2671			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2672			    i, que->eims);
2673			adapter->que_mask |= que->eims;
2674		}
2675
2676		/* Link */
2677		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2678		    E1000_EIMS_OTHER);
2679		adapter->link_mask |= E1000_EIMS_OTHER;
2680	default:
2681		break;
2682	}
2683
2684	/* Set the starting interrupt rate */
2685	if (igb_max_interrupt_rate > 0)
2686		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2687
2688        if (hw->mac.type == e1000_82575)
2689                newitr |= newitr << 16;
2690        else
2691                newitr |= E1000_EITR_CNT_IGNR;
2692
2693	for (int i = 0; i < adapter->num_queues; i++) {
2694		que = &adapter->queues[i];
2695		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2696	}
2697
2698	return;
2699}
2700
2701
2702static void
2703igb_free_pci_resources(struct adapter *adapter)
2704{
2705	struct		igb_queue *que = adapter->queues;
2706	device_t	dev = adapter->dev;
2707	int		rid;
2708
2709	/*
2710	** There is a slight possibility of a failure mode
2711	** in attach that will result in entering this function
2712	** before interrupt resources have been initialized, and
2713	** in that case we do not want to execute the loops below
2714	** We can detect this reliably by the state of the adapter
2715	** res pointer.
2716	*/
2717	if (adapter->res == NULL)
2718		goto mem;
2719
2720	/*
2721	 * First release all the interrupt resources:
2722	 */
2723	for (int i = 0; i < adapter->num_queues; i++, que++) {
2724		rid = que->msix + 1;
2725		if (que->tag != NULL) {
2726			bus_teardown_intr(dev, que->res, que->tag);
2727			que->tag = NULL;
2728		}
2729		if (que->res != NULL)
2730			bus_release_resource(dev,
2731			    SYS_RES_IRQ, rid, que->res);
2732	}
2733
2734	/* Clean the Legacy or Link interrupt last */
2735	if (adapter->linkvec) /* we are doing MSIX */
2736		rid = adapter->linkvec + 1;
2737	else
2738		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2739
2740	que = adapter->queues;
2741	if (adapter->tag != NULL) {
2742		taskqueue_drain(que->tq, &adapter->link_task);
2743		bus_teardown_intr(dev, adapter->res, adapter->tag);
2744		adapter->tag = NULL;
2745	}
2746	if (adapter->res != NULL)
2747		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2748
2749	for (int i = 0; i < adapter->num_queues; i++, que++) {
2750		if (que->tq != NULL) {
2751#if __FreeBSD_version >= 800000
2752			taskqueue_drain(que->tq, &que->txr->txq_task);
2753#endif
2754			taskqueue_drain(que->tq, &que->que_task);
2755			taskqueue_free(que->tq);
2756		}
2757	}
2758mem:
2759	if (adapter->msix)
2760		pci_release_msi(dev);
2761
2762	if (adapter->msix_mem != NULL)
2763		bus_release_resource(dev, SYS_RES_MEMORY,
2764		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2765
2766	if (adapter->pci_mem != NULL)
2767		bus_release_resource(dev, SYS_RES_MEMORY,
2768		    PCIR_BAR(0), adapter->pci_mem);
2769
2770}
2771
2772/*
2773 * Setup Either MSI/X or MSI
2774 */
2775static int
2776igb_setup_msix(struct adapter *adapter)
2777{
2778	device_t dev = adapter->dev;
2779	int rid, want, queues, msgs, maxqueues;
2780
2781	/* tuneable override */
2782	if (igb_enable_msix == 0)
2783		goto msi;
2784
2785	/* First try MSI/X */
2786	rid = PCIR_BAR(IGB_MSIX_BAR);
2787	adapter->msix_mem = bus_alloc_resource_any(dev,
2788	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2789       	if (!adapter->msix_mem) {
2790		/* May not be enabled */
2791		device_printf(adapter->dev,
2792		    "Unable to map MSIX table \n");
2793		goto msi;
2794	}
2795
2796	msgs = pci_msix_count(dev);
2797	if (msgs == 0) { /* system has msix disabled */
2798		bus_release_resource(dev, SYS_RES_MEMORY,
2799		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2800		adapter->msix_mem = NULL;
2801		goto msi;
2802	}
2803
2804	/* Figure out a reasonable auto config value */
2805	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2806
2807	/* Manual override */
2808	if (igb_num_queues != 0)
2809		queues = igb_num_queues;
2810
2811	/* Sanity check based on HW */
2812	switch (adapter->hw.mac.type) {
2813		case e1000_82575:
2814			maxqueues = 4;
2815			break;
2816		case e1000_82576:
2817		case e1000_82580:
2818		case e1000_i350:
2819			maxqueues = 8;
2820			break;
2821		case e1000_i210:
2822			maxqueues = 4;
2823			break;
2824		case e1000_i211:
2825			maxqueues = 2;
2826			break;
2827		default:  /* VF interfaces */
2828			maxqueues = 1;
2829			break;
2830	}
2831	if (queues > maxqueues)
2832		queues = maxqueues;
2833
2834	/*
2835	** One vector (RX/TX pair) per queue
2836	** plus an additional for Link interrupt
2837	*/
2838	want = queues + 1;
2839	if (msgs >= want)
2840		msgs = want;
2841	else {
2842               	device_printf(adapter->dev,
2843		    "MSIX Configuration Problem, "
2844		    "%d vectors configured, but %d queues wanted!\n",
2845		    msgs, want);
2846		return (0);
2847	}
2848	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2849               	device_printf(adapter->dev,
2850		    "Using MSIX interrupts with %d vectors\n", msgs);
2851		adapter->num_queues = queues;
2852		return (msgs);
2853	}
2854msi:
2855       	msgs = pci_msi_count(dev);
2856	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2857		device_printf(adapter->dev," Using MSI interrupt\n");
2858		return (msgs);
2859	}
2860	return (0);
2861}
2862
2863/*********************************************************************
2864 *
2865 *  Set up an fresh starting state
2866 *
2867 **********************************************************************/
2868static void
2869igb_reset(struct adapter *adapter)
2870{
2871	device_t	dev = adapter->dev;
2872	struct e1000_hw *hw = &adapter->hw;
2873	struct e1000_fc_info *fc = &hw->fc;
2874	struct ifnet	*ifp = adapter->ifp;
2875	u32		pba = 0;
2876	u16		hwm;
2877
2878	INIT_DEBUGOUT("igb_reset: begin");
2879
2880	/* Let the firmware know the OS is in control */
2881	igb_get_hw_control(adapter);
2882
2883	/*
2884	 * Packet Buffer Allocation (PBA)
2885	 * Writing PBA sets the receive portion of the buffer
2886	 * the remainder is used for the transmit buffer.
2887	 */
2888	switch (hw->mac.type) {
2889	case e1000_82575:
2890		pba = E1000_PBA_32K;
2891		break;
2892	case e1000_82576:
2893	case e1000_vfadapt:
2894		pba = E1000_READ_REG(hw, E1000_RXPBS);
2895		pba &= E1000_RXPBS_SIZE_MASK_82576;
2896		break;
2897	case e1000_82580:
2898	case e1000_i350:
2899	case e1000_vfadapt_i350:
2900		pba = E1000_READ_REG(hw, E1000_RXPBS);
2901		pba = e1000_rxpbs_adjust_82580(pba);
2902		break;
2903	case e1000_i210:
2904	case e1000_i211:
2905		pba = E1000_PBA_34K;
2906	default:
2907		break;
2908	}
2909
2910	/* Special needs in case of Jumbo frames */
2911	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2912		u32 tx_space, min_tx, min_rx;
2913		pba = E1000_READ_REG(hw, E1000_PBA);
2914		tx_space = pba >> 16;
2915		pba &= 0xffff;
2916		min_tx = (adapter->max_frame_size +
2917		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2918		min_tx = roundup2(min_tx, 1024);
2919		min_tx >>= 10;
2920                min_rx = adapter->max_frame_size;
2921                min_rx = roundup2(min_rx, 1024);
2922                min_rx >>= 10;
2923		if (tx_space < min_tx &&
2924		    ((min_tx - tx_space) < pba)) {
2925			pba = pba - (min_tx - tx_space);
2926			/*
2927                         * if short on rx space, rx wins
2928                         * and must trump tx adjustment
2929			 */
2930                        if (pba < min_rx)
2931                                pba = min_rx;
2932		}
2933		E1000_WRITE_REG(hw, E1000_PBA, pba);
2934	}
2935
2936	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2937
2938	/*
2939	 * These parameters control the automatic generation (Tx) and
2940	 * response (Rx) to Ethernet PAUSE frames.
2941	 * - High water mark should allow for at least two frames to be
2942	 *   received after sending an XOFF.
2943	 * - Low water mark works best when it is very near the high water mark.
2944	 *   This allows the receiver to restart by sending XON when it has
2945	 *   drained a bit.
2946	 */
2947	hwm = min(((pba << 10) * 9 / 10),
2948	    ((pba << 10) - 2 * adapter->max_frame_size));
2949
2950	if (hw->mac.type < e1000_82576) {
2951		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2952		fc->low_water = fc->high_water - 8;
2953	} else {
2954		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2955		fc->low_water = fc->high_water - 16;
2956	}
2957
2958	fc->pause_time = IGB_FC_PAUSE_TIME;
2959	fc->send_xon = TRUE;
2960	if (adapter->fc)
2961		fc->requested_mode = adapter->fc;
2962	else
2963		fc->requested_mode = e1000_fc_default;
2964
2965	/* Issue a global reset */
2966	e1000_reset_hw(hw);
2967	E1000_WRITE_REG(hw, E1000_WUC, 0);
2968
2969	if (e1000_init_hw(hw) < 0)
2970		device_printf(dev, "Hardware Initialization Failed\n");
2971
2972	/* Setup DMA Coalescing */
2973	if ((hw->mac.type > e1000_82580) &&
2974	    (hw->mac.type != e1000_i211)) {
2975		u32 dmac;
2976		u32 reg = ~E1000_DMACR_DMAC_EN;
2977
2978		if (adapter->dmac == 0) { /* Disabling it */
2979			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2980			goto reset_out;
2981		}
2982
2983		/* Set starting thresholds */
2984		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2985		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2986
2987		hwm = 64 * pba - adapter->max_frame_size / 16;
2988		if (hwm < 64 * (pba - 6))
2989			hwm = 64 * (pba - 6);
2990		reg = E1000_READ_REG(hw, E1000_FCRTC);
2991		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2992		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2993		    & E1000_FCRTC_RTH_COAL_MASK);
2994		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2995
2996
2997		dmac = pba - adapter->max_frame_size / 512;
2998		if (dmac < pba - 10)
2999			dmac = pba - 10;
3000		reg = E1000_READ_REG(hw, E1000_DMACR);
3001		reg &= ~E1000_DMACR_DMACTHR_MASK;
3002		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3003		    & E1000_DMACR_DMACTHR_MASK);
3004		/* transition to L0x or L1 if available..*/
3005		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3006		/* timer = value in adapter->dmac in 32usec intervals */
3007		reg |= (adapter->dmac >> 5);
3008		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3009
3010		/* Set the interval before transition */
3011		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3012		reg |= 0x80000004;
3013		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3014
3015		/* free space in tx packet buffer to wake from DMA coal */
3016		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3017		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3018
3019		/* make low power state decision controlled by DMA coal */
3020		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3021		reg &= ~E1000_PCIEMISC_LX_DECISION;
3022		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3023		device_printf(dev, "DMA Coalescing enabled\n");
3024
3025	} else if (hw->mac.type == e1000_82580) {
3026		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3027		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3028		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3029		    reg & ~E1000_PCIEMISC_LX_DECISION);
3030	}
3031
3032reset_out:
3033	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3034	e1000_get_phy_info(hw);
3035	e1000_check_for_link(hw);
3036	return;
3037}
3038
3039/*********************************************************************
3040 *
3041 *  Setup networking device structure and register an interface.
3042 *
3043 **********************************************************************/
3044static int
3045igb_setup_interface(device_t dev, struct adapter *adapter)
3046{
3047	struct ifnet   *ifp;
3048
3049	INIT_DEBUGOUT("igb_setup_interface: begin");
3050
3051	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3052	if (ifp == NULL) {
3053		device_printf(dev, "can not allocate ifnet structure\n");
3054		return (-1);
3055	}
3056	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3057	ifp->if_init =  igb_init;
3058	ifp->if_softc = adapter;
3059	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3060	ifp->if_ioctl = igb_ioctl;
3061#if __FreeBSD_version >= 800000
3062	ifp->if_transmit = igb_mq_start;
3063	ifp->if_qflush = igb_qflush;
3064#else
3065	ifp->if_start = igb_start;
3066	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3067	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3068	IFQ_SET_READY(&ifp->if_snd);
3069#endif
3070
3071	ether_ifattach(ifp, adapter->hw.mac.addr);
3072
3073	ifp->if_capabilities = ifp->if_capenable = 0;
3074
3075	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3076	ifp->if_capabilities |= IFCAP_TSO4;
3077	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3078	ifp->if_capenable = ifp->if_capabilities;
3079
3080	/* Don't enable LRO by default */
3081	ifp->if_capabilities |= IFCAP_LRO;
3082
3083#ifdef DEVICE_POLLING
3084	ifp->if_capabilities |= IFCAP_POLLING;
3085#endif
3086
3087	/*
3088	 * Tell the upper layer(s) we
3089	 * support full VLAN capability.
3090	 */
3091	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3092	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3093			     |  IFCAP_VLAN_HWTSO
3094			     |  IFCAP_VLAN_MTU;
3095	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3096			  |  IFCAP_VLAN_HWTSO
3097			  |  IFCAP_VLAN_MTU;
3098
3099	/*
3100	** Don't turn this on by default, if vlans are
3101	** created on another pseudo device (eg. lagg)
3102	** then vlan events are not passed thru, breaking
3103	** operation, but with HW FILTER off it works. If
3104	** using vlans directly on the igb driver you can
3105	** enable this and get full hardware tag filtering.
3106	*/
3107	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3108
3109	/*
3110	 * Specify the media types supported by this adapter and register
3111	 * callbacks to update media and link information
3112	 */
3113	ifmedia_init(&adapter->media, IFM_IMASK,
3114	    igb_media_change, igb_media_status);
3115	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3116	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3117		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3118			    0, NULL);
3119		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3120	} else {
3121		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3122		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3123			    0, NULL);
3124		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3125			    0, NULL);
3126		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3127			    0, NULL);
3128		if (adapter->hw.phy.type != e1000_phy_ife) {
3129			ifmedia_add(&adapter->media,
3130				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3131			ifmedia_add(&adapter->media,
3132				IFM_ETHER | IFM_1000_T, 0, NULL);
3133		}
3134	}
3135	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3136	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3137	return (0);
3138}
3139
3140
3141/*
3142 * Manage DMA'able memory.
3143 */
3144static void
3145igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3146{
3147	if (error)
3148		return;
3149	*(bus_addr_t *) arg = segs[0].ds_addr;
3150}
3151
3152static int
3153igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3154        struct igb_dma_alloc *dma, int mapflags)
3155{
3156	int error;
3157
3158	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3159				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3160				BUS_SPACE_MAXADDR,	/* lowaddr */
3161				BUS_SPACE_MAXADDR,	/* highaddr */
3162				NULL, NULL,		/* filter, filterarg */
3163				size,			/* maxsize */
3164				1,			/* nsegments */
3165				size,			/* maxsegsize */
3166				0,			/* flags */
3167				NULL,			/* lockfunc */
3168				NULL,			/* lockarg */
3169				&dma->dma_tag);
3170	if (error) {
3171		device_printf(adapter->dev,
3172		    "%s: bus_dma_tag_create failed: %d\n",
3173		    __func__, error);
3174		goto fail_0;
3175	}
3176
3177	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3178	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3179	if (error) {
3180		device_printf(adapter->dev,
3181		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3182		    __func__, (uintmax_t)size, error);
3183		goto fail_2;
3184	}
3185
3186	dma->dma_paddr = 0;
3187	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3188	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3189	if (error || dma->dma_paddr == 0) {
3190		device_printf(adapter->dev,
3191		    "%s: bus_dmamap_load failed: %d\n",
3192		    __func__, error);
3193		goto fail_3;
3194	}
3195
3196	return (0);
3197
3198fail_3:
3199	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3200fail_2:
3201	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3202	bus_dma_tag_destroy(dma->dma_tag);
3203fail_0:
3204	dma->dma_map = NULL;
3205	dma->dma_tag = NULL;
3206
3207	return (error);
3208}
3209
3210static void
3211igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3212{
3213	if (dma->dma_tag == NULL)
3214		return;
3215	if (dma->dma_map != NULL) {
3216		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3217		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3218		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3219		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3220		dma->dma_map = NULL;
3221	}
3222	bus_dma_tag_destroy(dma->dma_tag);
3223	dma->dma_tag = NULL;
3224}
3225
3226
3227/*********************************************************************
3228 *
3229 *  Allocate memory for the transmit and receive rings, and then
3230 *  the descriptors associated with each, called only once at attach.
3231 *
3232 **********************************************************************/
3233static int
3234igb_allocate_queues(struct adapter *adapter)
3235{
3236	device_t dev = adapter->dev;
3237	struct igb_queue	*que = NULL;
3238	struct tx_ring		*txr = NULL;
3239	struct rx_ring		*rxr = NULL;
3240	int rsize, tsize, error = E1000_SUCCESS;
3241	int txconf = 0, rxconf = 0;
3242
3243	/* First allocate the top level queue structs */
3244	if (!(adapter->queues =
3245	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3246	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3247		device_printf(dev, "Unable to allocate queue memory\n");
3248		error = ENOMEM;
3249		goto fail;
3250	}
3251
3252	/* Next allocate the TX ring struct memory */
3253	if (!(adapter->tx_rings =
3254	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3255	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3256		device_printf(dev, "Unable to allocate TX ring memory\n");
3257		error = ENOMEM;
3258		goto tx_fail;
3259	}
3260
3261	/* Now allocate the RX */
3262	if (!(adapter->rx_rings =
3263	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3264	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3265		device_printf(dev, "Unable to allocate RX ring memory\n");
3266		error = ENOMEM;
3267		goto rx_fail;
3268	}
3269
3270	tsize = roundup2(adapter->num_tx_desc *
3271	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3272	/*
3273	 * Now set up the TX queues, txconf is needed to handle the
3274	 * possibility that things fail midcourse and we need to
3275	 * undo memory gracefully
3276	 */
3277	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3278		/* Set up some basics */
3279		txr = &adapter->tx_rings[i];
3280		txr->adapter = adapter;
3281		txr->me = i;
3282
3283		/* Initialize the TX lock */
3284		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3285		    device_get_nameunit(dev), txr->me);
3286		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3287
3288		if (igb_dma_malloc(adapter, tsize,
3289			&txr->txdma, BUS_DMA_NOWAIT)) {
3290			device_printf(dev,
3291			    "Unable to allocate TX Descriptor memory\n");
3292			error = ENOMEM;
3293			goto err_tx_desc;
3294		}
3295		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3296		bzero((void *)txr->tx_base, tsize);
3297
3298        	/* Now allocate transmit buffers for the ring */
3299        	if (igb_allocate_transmit_buffers(txr)) {
3300			device_printf(dev,
3301			    "Critical Failure setting up transmit buffers\n");
3302			error = ENOMEM;
3303			goto err_tx_desc;
3304        	}
3305#if __FreeBSD_version >= 800000
3306		/* Allocate a buf ring */
3307		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3308		    M_WAITOK, &txr->tx_mtx);
3309#endif
3310	}
3311
3312	/*
3313	 * Next the RX queues...
3314	 */
3315	rsize = roundup2(adapter->num_rx_desc *
3316	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3317	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3318		rxr = &adapter->rx_rings[i];
3319		rxr->adapter = adapter;
3320		rxr->me = i;
3321
3322		/* Initialize the RX lock */
3323		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3324		    device_get_nameunit(dev), txr->me);
3325		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3326
3327		if (igb_dma_malloc(adapter, rsize,
3328			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3329			device_printf(dev,
3330			    "Unable to allocate RxDescriptor memory\n");
3331			error = ENOMEM;
3332			goto err_rx_desc;
3333		}
3334		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3335		bzero((void *)rxr->rx_base, rsize);
3336
3337        	/* Allocate receive buffers for the ring*/
3338		if (igb_allocate_receive_buffers(rxr)) {
3339			device_printf(dev,
3340			    "Critical Failure setting up receive buffers\n");
3341			error = ENOMEM;
3342			goto err_rx_desc;
3343		}
3344	}
3345
3346	/*
3347	** Finally set up the queue holding structs
3348	*/
3349	for (int i = 0; i < adapter->num_queues; i++) {
3350		que = &adapter->queues[i];
3351		que->adapter = adapter;
3352		que->txr = &adapter->tx_rings[i];
3353		que->rxr = &adapter->rx_rings[i];
3354	}
3355
3356	return (0);
3357
3358err_rx_desc:
3359	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3360		igb_dma_free(adapter, &rxr->rxdma);
3361err_tx_desc:
3362	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3363		igb_dma_free(adapter, &txr->txdma);
3364	free(adapter->rx_rings, M_DEVBUF);
3365rx_fail:
3366#if __FreeBSD_version >= 800000
3367	buf_ring_free(txr->br, M_DEVBUF);
3368#endif
3369	free(adapter->tx_rings, M_DEVBUF);
3370tx_fail:
3371	free(adapter->queues, M_DEVBUF);
3372fail:
3373	return (error);
3374}
3375
3376/*********************************************************************
3377 *
3378 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3379 *  the information needed to transmit a packet on the wire. This is
3380 *  called only once at attach, setup is done every reset.
3381 *
3382 **********************************************************************/
3383static int
3384igb_allocate_transmit_buffers(struct tx_ring *txr)
3385{
3386	struct adapter *adapter = txr->adapter;
3387	device_t dev = adapter->dev;
3388	struct igb_tx_buffer *txbuf;
3389	int error, i;
3390
3391	/*
3392	 * Setup DMA descriptor areas.
3393	 */
3394	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3395			       1, 0,			/* alignment, bounds */
3396			       BUS_SPACE_MAXADDR,	/* lowaddr */
3397			       BUS_SPACE_MAXADDR,	/* highaddr */
3398			       NULL, NULL,		/* filter, filterarg */
3399			       IGB_TSO_SIZE,		/* maxsize */
3400			       IGB_MAX_SCATTER,		/* nsegments */
3401			       PAGE_SIZE,		/* maxsegsize */
3402			       0,			/* flags */
3403			       NULL,			/* lockfunc */
3404			       NULL,			/* lockfuncarg */
3405			       &txr->txtag))) {
3406		device_printf(dev,"Unable to allocate TX DMA tag\n");
3407		goto fail;
3408	}
3409
3410	if (!(txr->tx_buffers =
3411	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3412	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3413		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3414		error = ENOMEM;
3415		goto fail;
3416	}
3417
3418        /* Create the descriptor buffer dma maps */
3419	txbuf = txr->tx_buffers;
3420	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3421		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3422		if (error != 0) {
3423			device_printf(dev, "Unable to create TX DMA map\n");
3424			goto fail;
3425		}
3426	}
3427
3428	return 0;
3429fail:
3430	/* We free all, it handles case where we are in the middle */
3431	igb_free_transmit_structures(adapter);
3432	return (error);
3433}
3434
3435/*********************************************************************
3436 *
3437 *  Initialize a transmit ring.
3438 *
3439 **********************************************************************/
3440static void
3441igb_setup_transmit_ring(struct tx_ring *txr)
3442{
3443	struct adapter *adapter = txr->adapter;
3444	struct igb_tx_buffer *txbuf;
3445	int i;
3446#ifdef DEV_NETMAP
3447	struct netmap_adapter *na = NA(adapter->ifp);
3448	struct netmap_slot *slot;
3449#endif /* DEV_NETMAP */
3450
3451	/* Clear the old descriptor contents */
3452	IGB_TX_LOCK(txr);
3453#ifdef DEV_NETMAP
3454	slot = netmap_reset(na, NR_TX, txr->me, 0);
3455#endif /* DEV_NETMAP */
3456	bzero((void *)txr->tx_base,
3457	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3458	/* Reset indices */
3459	txr->next_avail_desc = 0;
3460	txr->next_to_clean = 0;
3461
3462	/* Free any existing tx buffers. */
3463        txbuf = txr->tx_buffers;
3464	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3465		if (txbuf->m_head != NULL) {
3466			bus_dmamap_sync(txr->txtag, txbuf->map,
3467			    BUS_DMASYNC_POSTWRITE);
3468			bus_dmamap_unload(txr->txtag, txbuf->map);
3469			m_freem(txbuf->m_head);
3470			txbuf->m_head = NULL;
3471		}
3472#ifdef DEV_NETMAP
3473		if (slot) {
3474			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3475			/* no need to set the address */
3476			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3477		}
3478#endif /* DEV_NETMAP */
3479		/* clear the watch index */
3480		txbuf->next_eop = -1;
3481        }
3482
3483	/* Set number of descriptors available */
3484	txr->tx_avail = adapter->num_tx_desc;
3485
3486	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3487	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3488	IGB_TX_UNLOCK(txr);
3489}
3490
3491/*********************************************************************
3492 *
3493 *  Initialize all transmit rings.
3494 *
3495 **********************************************************************/
3496static void
3497igb_setup_transmit_structures(struct adapter *adapter)
3498{
3499	struct tx_ring *txr = adapter->tx_rings;
3500
3501	for (int i = 0; i < adapter->num_queues; i++, txr++)
3502		igb_setup_transmit_ring(txr);
3503
3504	return;
3505}
3506
3507/*********************************************************************
3508 *
3509 *  Enable transmit unit.
3510 *
3511 **********************************************************************/
3512static void
3513igb_initialize_transmit_units(struct adapter *adapter)
3514{
3515	struct tx_ring	*txr = adapter->tx_rings;
3516	struct e1000_hw *hw = &adapter->hw;
3517	u32		tctl, txdctl;
3518
3519	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3520	tctl = txdctl = 0;
3521
3522	/* Setup the Tx Descriptor Rings */
3523	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3524		u64 bus_addr = txr->txdma.dma_paddr;
3525
3526		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3527		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3528		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3529		    (uint32_t)(bus_addr >> 32));
3530		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3531		    (uint32_t)bus_addr);
3532
3533		/* Setup the HW Tx Head and Tail descriptor pointers */
3534		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3535		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3536
3537		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3538		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3539		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3540
3541		txr->queue_status = IGB_QUEUE_IDLE;
3542
3543		txdctl |= IGB_TX_PTHRESH;
3544		txdctl |= IGB_TX_HTHRESH << 8;
3545		txdctl |= IGB_TX_WTHRESH << 16;
3546		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3547		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3548	}
3549
3550	if (adapter->vf_ifp)
3551		return;
3552
3553	e1000_config_collision_dist(hw);
3554
3555	/* Program the Transmit Control Register */
3556	tctl = E1000_READ_REG(hw, E1000_TCTL);
3557	tctl &= ~E1000_TCTL_CT;
3558	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3559		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3560
3561	/* This write will effectively turn on the transmit unit. */
3562	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3563}
3564
3565/*********************************************************************
3566 *
3567 *  Free all transmit rings.
3568 *
3569 **********************************************************************/
3570static void
3571igb_free_transmit_structures(struct adapter *adapter)
3572{
3573	struct tx_ring *txr = adapter->tx_rings;
3574
3575	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3576		IGB_TX_LOCK(txr);
3577		igb_free_transmit_buffers(txr);
3578		igb_dma_free(adapter, &txr->txdma);
3579		IGB_TX_UNLOCK(txr);
3580		IGB_TX_LOCK_DESTROY(txr);
3581	}
3582	free(adapter->tx_rings, M_DEVBUF);
3583}
3584
3585/*********************************************************************
3586 *
3587 *  Free transmit ring related data structures.
3588 *
3589 **********************************************************************/
3590static void
3591igb_free_transmit_buffers(struct tx_ring *txr)
3592{
3593	struct adapter *adapter = txr->adapter;
3594	struct igb_tx_buffer *tx_buffer;
3595	int             i;
3596
3597	INIT_DEBUGOUT("free_transmit_ring: begin");
3598
3599	if (txr->tx_buffers == NULL)
3600		return;
3601
3602	tx_buffer = txr->tx_buffers;
3603	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3604		if (tx_buffer->m_head != NULL) {
3605			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3606			    BUS_DMASYNC_POSTWRITE);
3607			bus_dmamap_unload(txr->txtag,
3608			    tx_buffer->map);
3609			m_freem(tx_buffer->m_head);
3610			tx_buffer->m_head = NULL;
3611			if (tx_buffer->map != NULL) {
3612				bus_dmamap_destroy(txr->txtag,
3613				    tx_buffer->map);
3614				tx_buffer->map = NULL;
3615			}
3616		} else if (tx_buffer->map != NULL) {
3617			bus_dmamap_unload(txr->txtag,
3618			    tx_buffer->map);
3619			bus_dmamap_destroy(txr->txtag,
3620			    tx_buffer->map);
3621			tx_buffer->map = NULL;
3622		}
3623	}
3624#if __FreeBSD_version >= 800000
3625	if (txr->br != NULL)
3626		buf_ring_free(txr->br, M_DEVBUF);
3627#endif
3628	if (txr->tx_buffers != NULL) {
3629		free(txr->tx_buffers, M_DEVBUF);
3630		txr->tx_buffers = NULL;
3631	}
3632	if (txr->txtag != NULL) {
3633		bus_dma_tag_destroy(txr->txtag);
3634		txr->txtag = NULL;
3635	}
3636	return;
3637}
3638
3639/**********************************************************************
3640 *
3641 *  Setup work for hardware segmentation offload (TSO)
3642 *
3643 **********************************************************************/
3644static bool
3645igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3646	struct ip *ip, struct tcphdr *th)
3647{
3648	struct adapter *adapter = txr->adapter;
3649	struct e1000_adv_tx_context_desc *TXD;
3650	struct igb_tx_buffer        *tx_buffer;
3651	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3652	u32 mss_l4len_idx = 0;
3653	u16 vtag = 0;
3654	int ctxd, ip_hlen, tcp_hlen;
3655
3656	ctxd = txr->next_avail_desc;
3657	tx_buffer = &txr->tx_buffers[ctxd];
3658	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3659
3660	ip->ip_sum = 0;
3661	ip_hlen = ip->ip_hl << 2;
3662	tcp_hlen = th->th_off << 2;
3663
3664	/* VLAN MACLEN IPLEN */
3665	if (mp->m_flags & M_VLANTAG) {
3666		vtag = htole16(mp->m_pkthdr.ether_vtag);
3667		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3668	}
3669
3670	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3671	vlan_macip_lens |= ip_hlen;
3672	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3673
3674	/* ADV DTYPE TUCMD */
3675	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3676	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3677	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3678	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3679
3680	/* MSS L4LEN IDX */
3681	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3682	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3683	/* 82575 needs the queue index added */
3684	if (adapter->hw.mac.type == e1000_82575)
3685		mss_l4len_idx |= txr->me << 4;
3686	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3687
3688	TXD->seqnum_seed = htole32(0);
3689	tx_buffer->m_head = NULL;
3690	tx_buffer->next_eop = -1;
3691
3692	if (++ctxd == adapter->num_tx_desc)
3693		ctxd = 0;
3694
3695	txr->tx_avail--;
3696	txr->next_avail_desc = ctxd;
3697	return TRUE;
3698}
3699
3700
3701/*********************************************************************
3702 *
3703 *  Context Descriptor setup for VLAN or CSUM
3704 *
3705 **********************************************************************/
3706
3707static bool
3708igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3709{
3710	struct adapter *adapter = txr->adapter;
3711	struct e1000_adv_tx_context_desc *TXD;
3712	struct igb_tx_buffer        *tx_buffer;
3713	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3714	struct ether_vlan_header *eh;
3715	struct ip *ip = NULL;
3716	struct ip6_hdr *ip6;
3717	int  ehdrlen, ctxd, ip_hlen = 0;
3718	u16	etype, vtag = 0;
3719	u8	ipproto = 0;
3720	bool	offload = TRUE;
3721
3722	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3723		offload = FALSE;
3724
3725	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3726	ctxd = txr->next_avail_desc;
3727	tx_buffer = &txr->tx_buffers[ctxd];
3728	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3729
3730	/*
3731	** In advanced descriptors the vlan tag must
3732	** be placed into the context descriptor, thus
3733	** we need to be here just for that setup.
3734	*/
3735	if (mp->m_flags & M_VLANTAG) {
3736		vtag = htole16(mp->m_pkthdr.ether_vtag);
3737		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3738	} else if (offload == FALSE)
3739		return FALSE;
3740
3741	/*
3742	 * Determine where frame payload starts.
3743	 * Jump over vlan headers if already present,
3744	 * helpful for QinQ too.
3745	 */
3746	eh = mtod(mp, struct ether_vlan_header *);
3747	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3748		etype = ntohs(eh->evl_proto);
3749		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3750	} else {
3751		etype = ntohs(eh->evl_encap_proto);
3752		ehdrlen = ETHER_HDR_LEN;
3753	}
3754
3755	/* Set the ether header length */
3756	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3757
3758	switch (etype) {
3759		case ETHERTYPE_IP:
3760			ip = (struct ip *)(mp->m_data + ehdrlen);
3761			ip_hlen = ip->ip_hl << 2;
3762			if (mp->m_len < ehdrlen + ip_hlen) {
3763				offload = FALSE;
3764				break;
3765			}
3766			ipproto = ip->ip_p;
3767			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3768			break;
3769		case ETHERTYPE_IPV6:
3770			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3771			ip_hlen = sizeof(struct ip6_hdr);
3772			ipproto = ip6->ip6_nxt;
3773			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3774			break;
3775		default:
3776			offload = FALSE;
3777			break;
3778	}
3779
3780	vlan_macip_lens |= ip_hlen;
3781	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3782
3783	switch (ipproto) {
3784		case IPPROTO_TCP:
3785			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3786				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3787			break;
3788		case IPPROTO_UDP:
3789			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3790				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3791			break;
3792#if __FreeBSD_version >= 800000
3793		case IPPROTO_SCTP:
3794			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3795				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3796			break;
3797#endif
3798		default:
3799			offload = FALSE;
3800			break;
3801	}
3802
3803	/* 82575 needs the queue index added */
3804	if (adapter->hw.mac.type == e1000_82575)
3805		mss_l4len_idx = txr->me << 4;
3806
3807	/* Now copy bits into descriptor */
3808	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3809	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3810	TXD->seqnum_seed = htole32(0);
3811	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3812
3813	tx_buffer->m_head = NULL;
3814	tx_buffer->next_eop = -1;
3815
3816	/* We've consumed the first desc, adjust counters */
3817	if (++ctxd == adapter->num_tx_desc)
3818		ctxd = 0;
3819	txr->next_avail_desc = ctxd;
3820	--txr->tx_avail;
3821
3822        return (offload);
3823}
3824
3825
3826/**********************************************************************
3827 *
3828 *  Examine each tx_buffer in the used queue. If the hardware is done
3829 *  processing the packet then free associated resources. The
3830 *  tx_buffer is put back on the free queue.
3831 *
3832 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3833 **********************************************************************/
3834static bool
3835igb_txeof(struct tx_ring *txr)
3836{
3837	struct adapter	*adapter = txr->adapter;
3838        int first, last, done, processed;
3839        struct igb_tx_buffer *tx_buffer;
3840        struct e1000_tx_desc   *tx_desc, *eop_desc;
3841	struct ifnet   *ifp = adapter->ifp;
3842
3843	IGB_TX_LOCK_ASSERT(txr);
3844
3845#ifdef DEV_NETMAP
3846	if (ifp->if_capenable & IFCAP_NETMAP) {
3847		struct netmap_adapter *na = NA(ifp);
3848
3849		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3850		IGB_TX_UNLOCK(txr);
3851		IGB_CORE_LOCK(adapter);
3852		selwakeuppri(&na->tx_si, PI_NET);
3853		IGB_CORE_UNLOCK(adapter);
3854		IGB_TX_LOCK(txr);
3855		return FALSE;
3856	}
3857#endif /* DEV_NETMAP */
3858        if (txr->tx_avail == adapter->num_tx_desc) {
3859		txr->queue_status = IGB_QUEUE_IDLE;
3860                return FALSE;
3861	}
3862
3863	processed = 0;
3864        first = txr->next_to_clean;
3865        tx_desc = &txr->tx_base[first];
3866        tx_buffer = &txr->tx_buffers[first];
3867	last = tx_buffer->next_eop;
3868        eop_desc = &txr->tx_base[last];
3869
3870	/*
3871	 * What this does is get the index of the
3872	 * first descriptor AFTER the EOP of the
3873	 * first packet, that way we can do the
3874	 * simple comparison on the inner while loop.
3875	 */
3876	if (++last == adapter->num_tx_desc)
3877 		last = 0;
3878	done = last;
3879
3880        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3881            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3882
3883        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3884		/* We clean the range of the packet */
3885		while (first != done) {
3886                	tx_desc->upper.data = 0;
3887                	tx_desc->lower.data = 0;
3888                	tx_desc->buffer_addr = 0;
3889                	++txr->tx_avail;
3890			++processed;
3891
3892			if (tx_buffer->m_head) {
3893				txr->bytes +=
3894				    tx_buffer->m_head->m_pkthdr.len;
3895				bus_dmamap_sync(txr->txtag,
3896				    tx_buffer->map,
3897				    BUS_DMASYNC_POSTWRITE);
3898				bus_dmamap_unload(txr->txtag,
3899				    tx_buffer->map);
3900
3901                        	m_freem(tx_buffer->m_head);
3902                        	tx_buffer->m_head = NULL;
3903                	}
3904			tx_buffer->next_eop = -1;
3905			txr->watchdog_time = ticks;
3906
3907	                if (++first == adapter->num_tx_desc)
3908				first = 0;
3909
3910	                tx_buffer = &txr->tx_buffers[first];
3911			tx_desc = &txr->tx_base[first];
3912		}
3913		++txr->packets;
3914		++ifp->if_opackets;
3915		/* See if we can continue to the next packet */
3916		last = tx_buffer->next_eop;
3917		if (last != -1) {
3918        		eop_desc = &txr->tx_base[last];
3919			/* Get new done point */
3920			if (++last == adapter->num_tx_desc) last = 0;
3921			done = last;
3922		} else
3923			break;
3924        }
3925        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3926            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3927
3928        txr->next_to_clean = first;
3929
3930	/*
3931	** Watchdog calculation, we know there's
3932	** work outstanding or the first return
3933	** would have been taken, so none processed
3934	** for too long indicates a hang.
3935	*/
3936	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3937		txr->queue_status |= IGB_QUEUE_HUNG;
3938        /*
3939         * If we have a minimum free,
3940         * clear depleted state bit
3941         */
3942        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3943                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3944
3945	/* All clean, turn off the watchdog */
3946	if (txr->tx_avail == adapter->num_tx_desc) {
3947		txr->queue_status = IGB_QUEUE_IDLE;
3948		return (FALSE);
3949        }
3950
3951	return (TRUE);
3952}
3953
3954/*********************************************************************
3955 *
3956 *  Refresh mbuf buffers for RX descriptor rings
3957 *   - now keeps its own state so discards due to resource
3958 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3959 *     it just returns, keeping its placeholder, thus it can simply
3960 *     be recalled to try again.
3961 *
3962 **********************************************************************/
3963static void
3964igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3965{
3966	struct adapter		*adapter = rxr->adapter;
3967	bus_dma_segment_t	hseg[1];
3968	bus_dma_segment_t	pseg[1];
3969	struct igb_rx_buf	*rxbuf;
3970	struct mbuf		*mh, *mp;
3971	int			i, j, nsegs, error;
3972	bool			refreshed = FALSE;
3973
3974	i = j = rxr->next_to_refresh;
3975	/*
3976	** Get one descriptor beyond
3977	** our work mark to control
3978	** the loop.
3979        */
3980	if (++j == adapter->num_rx_desc)
3981		j = 0;
3982
3983	while (j != limit) {
3984		rxbuf = &rxr->rx_buffers[i];
3985		/* No hdr mbuf used with header split off */
3986		if (rxr->hdr_split == FALSE)
3987			goto no_split;
3988		if (rxbuf->m_head == NULL) {
3989			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3990			if (mh == NULL)
3991				goto update;
3992		} else
3993			mh = rxbuf->m_head;
3994
3995		mh->m_pkthdr.len = mh->m_len = MHLEN;
3996		mh->m_len = MHLEN;
3997		mh->m_flags |= M_PKTHDR;
3998		/* Get the memory mapping */
3999		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4000		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4001		if (error != 0) {
4002			printf("Refresh mbufs: hdr dmamap load"
4003			    " failure - %d\n", error);
4004			m_free(mh);
4005			rxbuf->m_head = NULL;
4006			goto update;
4007		}
4008		rxbuf->m_head = mh;
4009		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4010		    BUS_DMASYNC_PREREAD);
4011		rxr->rx_base[i].read.hdr_addr =
4012		    htole64(hseg[0].ds_addr);
4013no_split:
4014		if (rxbuf->m_pack == NULL) {
4015			mp = m_getjcl(M_DONTWAIT, MT_DATA,
4016			    M_PKTHDR, adapter->rx_mbuf_sz);
4017			if (mp == NULL)
4018				goto update;
4019		} else
4020			mp = rxbuf->m_pack;
4021
4022		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4023		/* Get the memory mapping */
4024		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4025		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4026		if (error != 0) {
4027			printf("Refresh mbufs: payload dmamap load"
4028			    " failure - %d\n", error);
4029			m_free(mp);
4030			rxbuf->m_pack = NULL;
4031			goto update;
4032		}
4033		rxbuf->m_pack = mp;
4034		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4035		    BUS_DMASYNC_PREREAD);
4036		rxr->rx_base[i].read.pkt_addr =
4037		    htole64(pseg[0].ds_addr);
4038		refreshed = TRUE; /* I feel wefreshed :) */
4039
4040		i = j; /* our next is precalculated */
4041		rxr->next_to_refresh = i;
4042		if (++j == adapter->num_rx_desc)
4043			j = 0;
4044	}
4045update:
4046	if (refreshed) /* update tail */
4047		E1000_WRITE_REG(&adapter->hw,
4048		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4049	return;
4050}
4051
4052
4053/*********************************************************************
4054 *
4055 *  Allocate memory for rx_buffer structures. Since we use one
4056 *  rx_buffer per received packet, the maximum number of rx_buffer's
4057 *  that we'll need is equal to the number of receive descriptors
4058 *  that we've allocated.
4059 *
4060 **********************************************************************/
4061static int
4062igb_allocate_receive_buffers(struct rx_ring *rxr)
4063{
4064	struct	adapter 	*adapter = rxr->adapter;
4065	device_t 		dev = adapter->dev;
4066	struct igb_rx_buf	*rxbuf;
4067	int             	i, bsize, error;
4068
4069	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4070	if (!(rxr->rx_buffers =
4071	    (struct igb_rx_buf *) malloc(bsize,
4072	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4073		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4074		error = ENOMEM;
4075		goto fail;
4076	}
4077
4078	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4079				   1, 0,		/* alignment, bounds */
4080				   BUS_SPACE_MAXADDR,	/* lowaddr */
4081				   BUS_SPACE_MAXADDR,	/* highaddr */
4082				   NULL, NULL,		/* filter, filterarg */
4083				   MSIZE,		/* maxsize */
4084				   1,			/* nsegments */
4085				   MSIZE,		/* maxsegsize */
4086				   0,			/* flags */
4087				   NULL,		/* lockfunc */
4088				   NULL,		/* lockfuncarg */
4089				   &rxr->htag))) {
4090		device_printf(dev, "Unable to create RX DMA tag\n");
4091		goto fail;
4092	}
4093
4094	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4095				   1, 0,		/* alignment, bounds */
4096				   BUS_SPACE_MAXADDR,	/* lowaddr */
4097				   BUS_SPACE_MAXADDR,	/* highaddr */
4098				   NULL, NULL,		/* filter, filterarg */
4099				   MJUM9BYTES,		/* maxsize */
4100				   1,			/* nsegments */
4101				   MJUM9BYTES,		/* maxsegsize */
4102				   0,			/* flags */
4103				   NULL,		/* lockfunc */
4104				   NULL,		/* lockfuncarg */
4105				   &rxr->ptag))) {
4106		device_printf(dev, "Unable to create RX payload DMA tag\n");
4107		goto fail;
4108	}
4109
4110	for (i = 0; i < adapter->num_rx_desc; i++) {
4111		rxbuf = &rxr->rx_buffers[i];
4112		error = bus_dmamap_create(rxr->htag,
4113		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4114		if (error) {
4115			device_printf(dev,
4116			    "Unable to create RX head DMA maps\n");
4117			goto fail;
4118		}
4119		error = bus_dmamap_create(rxr->ptag,
4120		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4121		if (error) {
4122			device_printf(dev,
4123			    "Unable to create RX packet DMA maps\n");
4124			goto fail;
4125		}
4126	}
4127
4128	return (0);
4129
4130fail:
4131	/* Frees all, but can handle partial completion */
4132	igb_free_receive_structures(adapter);
4133	return (error);
4134}
4135
4136
4137static void
4138igb_free_receive_ring(struct rx_ring *rxr)
4139{
4140	struct	adapter		*adapter = rxr->adapter;
4141	struct igb_rx_buf	*rxbuf;
4142
4143
4144	for (int i = 0; i < adapter->num_rx_desc; i++) {
4145		rxbuf = &rxr->rx_buffers[i];
4146		if (rxbuf->m_head != NULL) {
4147			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4148			    BUS_DMASYNC_POSTREAD);
4149			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4150			rxbuf->m_head->m_flags |= M_PKTHDR;
4151			m_freem(rxbuf->m_head);
4152		}
4153		if (rxbuf->m_pack != NULL) {
4154			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4155			    BUS_DMASYNC_POSTREAD);
4156			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4157			rxbuf->m_pack->m_flags |= M_PKTHDR;
4158			m_freem(rxbuf->m_pack);
4159		}
4160		rxbuf->m_head = NULL;
4161		rxbuf->m_pack = NULL;
4162	}
4163}
4164
4165
4166/*********************************************************************
4167 *
4168 *  Initialize a receive ring and its buffers.
4169 *
4170 **********************************************************************/
4171static int
4172igb_setup_receive_ring(struct rx_ring *rxr)
4173{
4174	struct	adapter		*adapter;
4175	struct  ifnet		*ifp;
4176	device_t		dev;
4177	struct igb_rx_buf	*rxbuf;
4178	bus_dma_segment_t	pseg[1], hseg[1];
4179	struct lro_ctrl		*lro = &rxr->lro;
4180	int			rsize, nsegs, error = 0;
4181#ifdef DEV_NETMAP
4182	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4183	struct netmap_slot *slot;
4184#endif /* DEV_NETMAP */
4185
4186	adapter = rxr->adapter;
4187	dev = adapter->dev;
4188	ifp = adapter->ifp;
4189
4190	/* Clear the ring contents */
4191	IGB_RX_LOCK(rxr);
4192#ifdef DEV_NETMAP
4193	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4194#endif /* DEV_NETMAP */
4195	rsize = roundup2(adapter->num_rx_desc *
4196	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4197	bzero((void *)rxr->rx_base, rsize);
4198
4199	/*
4200	** Free current RX buffer structures and their mbufs
4201	*/
4202	igb_free_receive_ring(rxr);
4203
4204	/* Configure for header split? */
4205	if (igb_header_split)
4206		rxr->hdr_split = TRUE;
4207
4208        /* Now replenish the ring mbufs */
4209	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4210		struct mbuf	*mh, *mp;
4211
4212		rxbuf = &rxr->rx_buffers[j];
4213#ifdef DEV_NETMAP
4214		if (slot) {
4215			/* slot sj is mapped to the i-th NIC-ring entry */
4216			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4217			uint64_t paddr;
4218			void *addr;
4219
4220			addr = PNMB(slot + sj, &paddr);
4221			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4222			/* Update descriptor */
4223			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4224			continue;
4225		}
4226#endif /* DEV_NETMAP */
4227		if (rxr->hdr_split == FALSE)
4228			goto skip_head;
4229
4230		/* First the header */
4231		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4232		if (rxbuf->m_head == NULL) {
4233			error = ENOBUFS;
4234                        goto fail;
4235		}
4236		m_adj(rxbuf->m_head, ETHER_ALIGN);
4237		mh = rxbuf->m_head;
4238		mh->m_len = mh->m_pkthdr.len = MHLEN;
4239		mh->m_flags |= M_PKTHDR;
4240		/* Get the memory mapping */
4241		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4242		    rxbuf->hmap, rxbuf->m_head, hseg,
4243		    &nsegs, BUS_DMA_NOWAIT);
4244		if (error != 0) /* Nothing elegant to do here */
4245                        goto fail;
4246		bus_dmamap_sync(rxr->htag,
4247		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4248		/* Update descriptor */
4249		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4250
4251skip_head:
4252		/* Now the payload cluster */
4253		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4254		    M_PKTHDR, adapter->rx_mbuf_sz);
4255		if (rxbuf->m_pack == NULL) {
4256			error = ENOBUFS;
4257                        goto fail;
4258		}
4259		mp = rxbuf->m_pack;
4260		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4261		/* Get the memory mapping */
4262		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4263		    rxbuf->pmap, mp, pseg,
4264		    &nsegs, BUS_DMA_NOWAIT);
4265		if (error != 0)
4266                        goto fail;
4267		bus_dmamap_sync(rxr->ptag,
4268		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4269		/* Update descriptor */
4270		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4271        }
4272
4273	/* Setup our descriptor indices */
4274	rxr->next_to_check = 0;
4275	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4276	rxr->lro_enabled = FALSE;
4277	rxr->rx_split_packets = 0;
4278	rxr->rx_bytes = 0;
4279
4280	rxr->fmp = NULL;
4281	rxr->lmp = NULL;
4282	rxr->discard = FALSE;
4283
4284	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4285	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4286
4287	/*
4288	** Now set up the LRO interface, we
4289	** also only do head split when LRO
4290	** is enabled, since so often they
4291	** are undesireable in similar setups.
4292	*/
4293	if (ifp->if_capenable & IFCAP_LRO) {
4294		error = tcp_lro_init(lro);
4295		if (error) {
4296			device_printf(dev, "LRO Initialization failed!\n");
4297			goto fail;
4298		}
4299		INIT_DEBUGOUT("RX LRO Initialized\n");
4300		rxr->lro_enabled = TRUE;
4301		lro->ifp = adapter->ifp;
4302	}
4303
4304	IGB_RX_UNLOCK(rxr);
4305	return (0);
4306
4307fail:
4308	igb_free_receive_ring(rxr);
4309	IGB_RX_UNLOCK(rxr);
4310	return (error);
4311}
4312
4313
4314/*********************************************************************
4315 *
4316 *  Initialize all receive rings.
4317 *
4318 **********************************************************************/
4319static int
4320igb_setup_receive_structures(struct adapter *adapter)
4321{
4322	struct rx_ring *rxr = adapter->rx_rings;
4323	int i;
4324
4325	for (i = 0; i < adapter->num_queues; i++, rxr++)
4326		if (igb_setup_receive_ring(rxr))
4327			goto fail;
4328
4329	return (0);
4330fail:
4331	/*
4332	 * Free RX buffers allocated so far, we will only handle
4333	 * the rings that completed, the failing case will have
4334	 * cleaned up for itself. 'i' is the endpoint.
4335	 */
4336	for (int j = 0; j > i; ++j) {
4337		rxr = &adapter->rx_rings[i];
4338		IGB_RX_LOCK(rxr);
4339		igb_free_receive_ring(rxr);
4340		IGB_RX_UNLOCK(rxr);
4341	}
4342
4343	return (ENOBUFS);
4344}
4345
4346/*********************************************************************
4347 *
4348 *  Enable receive unit.
4349 *
4350 **********************************************************************/
4351static void
4352igb_initialize_receive_units(struct adapter *adapter)
4353{
4354	struct rx_ring	*rxr = adapter->rx_rings;
4355	struct ifnet	*ifp = adapter->ifp;
4356	struct e1000_hw *hw = &adapter->hw;
4357	u32		rctl, rxcsum, psize, srrctl = 0;
4358
4359	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4360
4361	/*
4362	 * Make sure receives are disabled while setting
4363	 * up the descriptor ring
4364	 */
4365	rctl = E1000_READ_REG(hw, E1000_RCTL);
4366	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4367
4368	/*
4369	** Set up for header split
4370	*/
4371	if (igb_header_split) {
4372		/* Use a standard mbuf for the header */
4373		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4374		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4375	} else
4376		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4377
4378	/*
4379	** Set up for jumbo frames
4380	*/
4381	if (ifp->if_mtu > ETHERMTU) {
4382		rctl |= E1000_RCTL_LPE;
4383		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4384			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4385			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4386		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4387			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4388			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4389		}
4390		/* Set maximum packet len */
4391		psize = adapter->max_frame_size;
4392		/* are we on a vlan? */
4393		if (adapter->ifp->if_vlantrunk != NULL)
4394			psize += VLAN_TAG_SIZE;
4395		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4396	} else {
4397		rctl &= ~E1000_RCTL_LPE;
4398		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4399		rctl |= E1000_RCTL_SZ_2048;
4400	}
4401
4402	/* Setup the Base and Length of the Rx Descriptor Rings */
4403	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4404		u64 bus_addr = rxr->rxdma.dma_paddr;
4405		u32 rxdctl;
4406
4407		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4408		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4409		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4410		    (uint32_t)(bus_addr >> 32));
4411		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4412		    (uint32_t)bus_addr);
4413		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4414		/* Enable this Queue */
4415		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4416		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4417		rxdctl &= 0xFFF00000;
4418		rxdctl |= IGB_RX_PTHRESH;
4419		rxdctl |= IGB_RX_HTHRESH << 8;
4420		rxdctl |= IGB_RX_WTHRESH << 16;
4421		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4422	}
4423
4424	/*
4425	** Setup for RX MultiQueue
4426	*/
4427	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4428	if (adapter->num_queues >1) {
4429		u32 random[10], mrqc, shift = 0;
4430		union igb_reta {
4431			u32 dword;
4432			u8  bytes[4];
4433		} reta;
4434
4435		arc4rand(&random, sizeof(random), 0);
4436		if (adapter->hw.mac.type == e1000_82575)
4437			shift = 6;
4438		/* Warning FM follows */
4439		for (int i = 0; i < 128; i++) {
4440			reta.bytes[i & 3] =
4441			    (i % adapter->num_queues) << shift;
4442			if ((i & 3) == 3)
4443				E1000_WRITE_REG(hw,
4444				    E1000_RETA(i >> 2), reta.dword);
4445		}
4446		/* Now fill in hash table */
4447		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4448		for (int i = 0; i < 10; i++)
4449			E1000_WRITE_REG_ARRAY(hw,
4450			    E1000_RSSRK(0), i, random[i]);
4451
4452		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4453		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4454		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4455		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4456		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4457		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4458		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4459		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4460
4461		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4462
4463		/*
4464		** NOTE: Receive Full-Packet Checksum Offload
4465		** is mutually exclusive with Multiqueue. However
4466		** this is not the same as TCP/IP checksums which
4467		** still work.
4468		*/
4469		rxcsum |= E1000_RXCSUM_PCSD;
4470#if __FreeBSD_version >= 800000
4471		/* For SCTP Offload */
4472		if ((hw->mac.type == e1000_82576)
4473		    && (ifp->if_capenable & IFCAP_RXCSUM))
4474			rxcsum |= E1000_RXCSUM_CRCOFL;
4475#endif
4476	} else {
4477		/* Non RSS setup */
4478		if (ifp->if_capenable & IFCAP_RXCSUM) {
4479			rxcsum |= E1000_RXCSUM_IPPCSE;
4480#if __FreeBSD_version >= 800000
4481			if (adapter->hw.mac.type == e1000_82576)
4482				rxcsum |= E1000_RXCSUM_CRCOFL;
4483#endif
4484		} else
4485			rxcsum &= ~E1000_RXCSUM_TUOFL;
4486	}
4487	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4488
4489	/* Setup the Receive Control Register */
4490	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4491	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4492		   E1000_RCTL_RDMTS_HALF |
4493		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4494	/* Strip CRC bytes. */
4495	rctl |= E1000_RCTL_SECRC;
4496	/* Make sure VLAN Filters are off */
4497	rctl &= ~E1000_RCTL_VFE;
4498	/* Don't store bad packets */
4499	rctl &= ~E1000_RCTL_SBP;
4500
4501	/* Enable Receives */
4502	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4503
4504	/*
4505	 * Setup the HW Rx Head and Tail Descriptor Pointers
4506	 *   - needs to be after enable
4507	 */
4508	for (int i = 0; i < adapter->num_queues; i++) {
4509		rxr = &adapter->rx_rings[i];
4510		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4511#ifdef DEV_NETMAP
4512		/*
4513		 * an init() while a netmap client is active must
4514		 * preserve the rx buffers passed to userspace.
4515		 * In this driver it means we adjust RDT to
4516		 * somthing different from next_to_refresh
4517		 * (which is not used in netmap mode).
4518		 */
4519		if (ifp->if_capenable & IFCAP_NETMAP) {
4520			struct netmap_adapter *na = NA(adapter->ifp);
4521			struct netmap_kring *kring = &na->rx_rings[i];
4522			int t = rxr->next_to_refresh - kring->nr_hwavail;
4523
4524			if (t >= adapter->num_rx_desc)
4525				t -= adapter->num_rx_desc;
4526			else if (t < 0)
4527				t += adapter->num_rx_desc;
4528			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4529		} else
4530#endif /* DEV_NETMAP */
4531		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4532	}
4533	return;
4534}
4535
4536/*********************************************************************
4537 *
4538 *  Free receive rings.
4539 *
4540 **********************************************************************/
4541static void
4542igb_free_receive_structures(struct adapter *adapter)
4543{
4544	struct rx_ring *rxr = adapter->rx_rings;
4545
4546	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4547		struct lro_ctrl	*lro = &rxr->lro;
4548		igb_free_receive_buffers(rxr);
4549		tcp_lro_free(lro);
4550		igb_dma_free(adapter, &rxr->rxdma);
4551	}
4552
4553	free(adapter->rx_rings, M_DEVBUF);
4554}
4555
4556/*********************************************************************
4557 *
4558 *  Free receive ring data structures.
4559 *
4560 **********************************************************************/
4561static void
4562igb_free_receive_buffers(struct rx_ring *rxr)
4563{
4564	struct adapter		*adapter = rxr->adapter;
4565	struct igb_rx_buf	*rxbuf;
4566	int i;
4567
4568	INIT_DEBUGOUT("free_receive_structures: begin");
4569
4570	/* Cleanup any existing buffers */
4571	if (rxr->rx_buffers != NULL) {
4572		for (i = 0; i < adapter->num_rx_desc; i++) {
4573			rxbuf = &rxr->rx_buffers[i];
4574			if (rxbuf->m_head != NULL) {
4575				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4576				    BUS_DMASYNC_POSTREAD);
4577				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4578				rxbuf->m_head->m_flags |= M_PKTHDR;
4579				m_freem(rxbuf->m_head);
4580			}
4581			if (rxbuf->m_pack != NULL) {
4582				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4583				    BUS_DMASYNC_POSTREAD);
4584				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4585				rxbuf->m_pack->m_flags |= M_PKTHDR;
4586				m_freem(rxbuf->m_pack);
4587			}
4588			rxbuf->m_head = NULL;
4589			rxbuf->m_pack = NULL;
4590			if (rxbuf->hmap != NULL) {
4591				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4592				rxbuf->hmap = NULL;
4593			}
4594			if (rxbuf->pmap != NULL) {
4595				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4596				rxbuf->pmap = NULL;
4597			}
4598		}
4599		if (rxr->rx_buffers != NULL) {
4600			free(rxr->rx_buffers, M_DEVBUF);
4601			rxr->rx_buffers = NULL;
4602		}
4603	}
4604
4605	if (rxr->htag != NULL) {
4606		bus_dma_tag_destroy(rxr->htag);
4607		rxr->htag = NULL;
4608	}
4609	if (rxr->ptag != NULL) {
4610		bus_dma_tag_destroy(rxr->ptag);
4611		rxr->ptag = NULL;
4612	}
4613}
4614
4615static __inline void
4616igb_rx_discard(struct rx_ring *rxr, int i)
4617{
4618	struct igb_rx_buf	*rbuf;
4619
4620	rbuf = &rxr->rx_buffers[i];
4621
4622	/* Partially received? Free the chain */
4623	if (rxr->fmp != NULL) {
4624		rxr->fmp->m_flags |= M_PKTHDR;
4625		m_freem(rxr->fmp);
4626		rxr->fmp = NULL;
4627		rxr->lmp = NULL;
4628	}
4629
4630	/*
4631	** With advanced descriptors the writeback
4632	** clobbers the buffer addrs, so its easier
4633	** to just free the existing mbufs and take
4634	** the normal refresh path to get new buffers
4635	** and mapping.
4636	*/
4637	if (rbuf->m_head) {
4638		m_free(rbuf->m_head);
4639		rbuf->m_head = NULL;
4640	}
4641
4642	if (rbuf->m_pack) {
4643		m_free(rbuf->m_pack);
4644		rbuf->m_pack = NULL;
4645	}
4646
4647	return;
4648}
4649
4650static __inline void
4651igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4652{
4653
4654	/*
4655	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4656	 * should be computed by hardware. Also it should not have VLAN tag in
4657	 * ethernet header.
4658	 */
4659	if (rxr->lro_enabled &&
4660	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4661	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4662	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4663	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4664	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4665	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4666		/*
4667		 * Send to the stack if:
4668		 **  - LRO not enabled, or
4669		 **  - no LRO resources, or
4670		 **  - lro enqueue fails
4671		 */
4672		if (rxr->lro.lro_cnt != 0)
4673			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4674				return;
4675	}
4676	IGB_RX_UNLOCK(rxr);
4677	(*ifp->if_input)(ifp, m);
4678	IGB_RX_LOCK(rxr);
4679}
4680
4681/*********************************************************************
4682 *
4683 *  This routine executes in interrupt context. It replenishes
4684 *  the mbufs in the descriptor and sends data which has been
4685 *  dma'ed into host memory to upper layer.
4686 *
4687 *  We loop at most count times if count is > 0, or until done if
4688 *  count < 0.
4689 *
4690 *  Return TRUE if more to clean, FALSE otherwise
4691 *********************************************************************/
4692static bool
4693igb_rxeof(struct igb_queue *que, int count, int *done)
4694{
4695	struct adapter		*adapter = que->adapter;
4696	struct rx_ring		*rxr = que->rxr;
4697	struct ifnet		*ifp = adapter->ifp;
4698	struct lro_ctrl		*lro = &rxr->lro;
4699	struct lro_entry	*queued;
4700	int			i, processed = 0, rxdone = 0;
4701	u32			ptype, staterr = 0;
4702	union e1000_adv_rx_desc	*cur;
4703
4704	IGB_RX_LOCK(rxr);
4705	/* Sync the ring. */
4706	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4707	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4708
4709#ifdef DEV_NETMAP
4710	if (ifp->if_capenable & IFCAP_NETMAP) {
4711		struct netmap_adapter *na = NA(ifp);
4712
4713		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4714		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4715		IGB_RX_UNLOCK(rxr);
4716		IGB_CORE_LOCK(adapter);
4717		selwakeuppri(&na->rx_si, PI_NET);
4718		IGB_CORE_UNLOCK(adapter);
4719		return (0);
4720	}
4721#endif /* DEV_NETMAP */
4722
4723	/* Main clean loop */
4724	for (i = rxr->next_to_check; count != 0;) {
4725		struct mbuf		*sendmp, *mh, *mp;
4726		struct igb_rx_buf	*rxbuf;
4727		u16			hlen, plen, hdr, vtag;
4728		bool			eop = FALSE;
4729
4730		cur = &rxr->rx_base[i];
4731		staterr = le32toh(cur->wb.upper.status_error);
4732		if ((staterr & E1000_RXD_STAT_DD) == 0)
4733			break;
4734		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4735			break;
4736		count--;
4737		sendmp = mh = mp = NULL;
4738		cur->wb.upper.status_error = 0;
4739		rxbuf = &rxr->rx_buffers[i];
4740		plen = le16toh(cur->wb.upper.length);
4741		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4742		if ((adapter->hw.mac.type == e1000_i350) &&
4743		    (staterr & E1000_RXDEXT_STATERR_LB))
4744			vtag = be16toh(cur->wb.upper.vlan);
4745		else
4746			vtag = le16toh(cur->wb.upper.vlan);
4747		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4748		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4749
4750		/* Make sure all segments of a bad packet are discarded */
4751		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4752		    (rxr->discard)) {
4753			ifp->if_ierrors++;
4754			++rxr->rx_discarded;
4755			if (!eop) /* Catch subsequent segs */
4756				rxr->discard = TRUE;
4757			else
4758				rxr->discard = FALSE;
4759			igb_rx_discard(rxr, i);
4760			goto next_desc;
4761		}
4762
4763		/*
4764		** The way the hardware is configured to
4765		** split, it will ONLY use the header buffer
4766		** when header split is enabled, otherwise we
4767		** get normal behavior, ie, both header and
4768		** payload are DMA'd into the payload buffer.
4769		**
4770		** The fmp test is to catch the case where a
4771		** packet spans multiple descriptors, in that
4772		** case only the first header is valid.
4773		*/
4774		if (rxr->hdr_split && rxr->fmp == NULL) {
4775			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4776			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4777			if (hlen > IGB_HDR_BUF)
4778				hlen = IGB_HDR_BUF;
4779			mh = rxr->rx_buffers[i].m_head;
4780			mh->m_len = hlen;
4781			/* clear buf pointer for refresh */
4782			rxbuf->m_head = NULL;
4783			/*
4784			** Get the payload length, this
4785			** could be zero if its a small
4786			** packet.
4787			*/
4788			if (plen > 0) {
4789				mp = rxr->rx_buffers[i].m_pack;
4790				mp->m_len = plen;
4791				mh->m_next = mp;
4792				/* clear buf pointer */
4793				rxbuf->m_pack = NULL;
4794				rxr->rx_split_packets++;
4795			}
4796		} else {
4797			/*
4798			** Either no header split, or a
4799			** secondary piece of a fragmented
4800			** split packet.
4801			*/
4802			mh = rxr->rx_buffers[i].m_pack;
4803			mh->m_len = plen;
4804			/* clear buf info for refresh */
4805			rxbuf->m_pack = NULL;
4806		}
4807
4808		++processed; /* So we know when to refresh */
4809
4810		/* Initial frame - setup */
4811		if (rxr->fmp == NULL) {
4812			mh->m_pkthdr.len = mh->m_len;
4813			/* Save the head of the chain */
4814			rxr->fmp = mh;
4815			rxr->lmp = mh;
4816			if (mp != NULL) {
4817				/* Add payload if split */
4818				mh->m_pkthdr.len += mp->m_len;
4819				rxr->lmp = mh->m_next;
4820			}
4821		} else {
4822			/* Chain mbuf's together */
4823			rxr->lmp->m_next = mh;
4824			rxr->lmp = rxr->lmp->m_next;
4825			rxr->fmp->m_pkthdr.len += mh->m_len;
4826		}
4827
4828		if (eop) {
4829			rxr->fmp->m_pkthdr.rcvif = ifp;
4830			ifp->if_ipackets++;
4831			rxr->rx_packets++;
4832			/* capture data for AIM */
4833			rxr->packets++;
4834			rxr->bytes += rxr->fmp->m_pkthdr.len;
4835			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4836
4837			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4838				igb_rx_checksum(staterr, rxr->fmp, ptype);
4839
4840			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4841			    (staterr & E1000_RXD_STAT_VP) != 0) {
4842				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4843				rxr->fmp->m_flags |= M_VLANTAG;
4844			}
4845#if __FreeBSD_version >= 800000
4846			rxr->fmp->m_pkthdr.flowid = que->msix;
4847			rxr->fmp->m_flags |= M_FLOWID;
4848#endif
4849			sendmp = rxr->fmp;
4850			/* Make sure to set M_PKTHDR. */
4851			sendmp->m_flags |= M_PKTHDR;
4852			rxr->fmp = NULL;
4853			rxr->lmp = NULL;
4854		}
4855
4856next_desc:
4857		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4858		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4859
4860		/* Advance our pointers to the next descriptor. */
4861		if (++i == adapter->num_rx_desc)
4862			i = 0;
4863		/*
4864		** Send to the stack or LRO
4865		*/
4866		if (sendmp != NULL) {
4867			rxr->next_to_check = i;
4868			igb_rx_input(rxr, ifp, sendmp, ptype);
4869			i = rxr->next_to_check;
4870			rxdone++;
4871		}
4872
4873		/* Every 8 descriptors we go to refresh mbufs */
4874		if (processed == 8) {
4875                        igb_refresh_mbufs(rxr, i);
4876                        processed = 0;
4877		}
4878	}
4879
4880	/* Catch any remainders */
4881	if (igb_rx_unrefreshed(rxr))
4882		igb_refresh_mbufs(rxr, i);
4883
4884	rxr->next_to_check = i;
4885
4886	/*
4887	 * Flush any outstanding LRO work
4888	 */
4889	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4890		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4891		tcp_lro_flush(lro, queued);
4892	}
4893
4894	if (done != NULL)
4895		*done = rxdone;
4896
4897	IGB_RX_UNLOCK(rxr);
4898	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4899}
4900
4901/*********************************************************************
4902 *
4903 *  Verify that the hardware indicated that the checksum is valid.
4904 *  Inform the stack about the status of checksum so that stack
4905 *  doesn't spend time verifying the checksum.
4906 *
4907 *********************************************************************/
4908static void
4909igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4910{
4911	u16 status = (u16)staterr;
4912	u8  errors = (u8) (staterr >> 24);
4913	int sctp;
4914
4915	/* Ignore Checksum bit is set */
4916	if (status & E1000_RXD_STAT_IXSM) {
4917		mp->m_pkthdr.csum_flags = 0;
4918		return;
4919	}
4920
4921	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4922	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4923		sctp = 1;
4924	else
4925		sctp = 0;
4926	if (status & E1000_RXD_STAT_IPCS) {
4927		/* Did it pass? */
4928		if (!(errors & E1000_RXD_ERR_IPE)) {
4929			/* IP Checksum Good */
4930			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4931			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4932		} else
4933			mp->m_pkthdr.csum_flags = 0;
4934	}
4935
4936	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4937		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4938#if __FreeBSD_version >= 800000
4939		if (sctp) /* reassign */
4940			type = CSUM_SCTP_VALID;
4941#endif
4942		/* Did it pass? */
4943		if (!(errors & E1000_RXD_ERR_TCPE)) {
4944			mp->m_pkthdr.csum_flags |= type;
4945			if (sctp == 0)
4946				mp->m_pkthdr.csum_data = htons(0xffff);
4947		}
4948	}
4949	return;
4950}
4951
4952/*
4953 * This routine is run via an vlan
4954 * config EVENT
4955 */
4956static void
4957igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4958{
4959	struct adapter	*adapter = ifp->if_softc;
4960	u32		index, bit;
4961
4962	if (ifp->if_softc !=  arg)   /* Not our event */
4963		return;
4964
4965	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4966                return;
4967
4968	IGB_CORE_LOCK(adapter);
4969	index = (vtag >> 5) & 0x7F;
4970	bit = vtag & 0x1F;
4971	adapter->shadow_vfta[index] |= (1 << bit);
4972	++adapter->num_vlans;
4973	/* Change hw filter setting */
4974	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4975		igb_setup_vlan_hw_support(adapter);
4976	IGB_CORE_UNLOCK(adapter);
4977}
4978
4979/*
4980 * This routine is run via an vlan
4981 * unconfig EVENT
4982 */
4983static void
4984igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4985{
4986	struct adapter	*adapter = ifp->if_softc;
4987	u32		index, bit;
4988
4989	if (ifp->if_softc !=  arg)
4990		return;
4991
4992	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4993                return;
4994
4995	IGB_CORE_LOCK(adapter);
4996	index = (vtag >> 5) & 0x7F;
4997	bit = vtag & 0x1F;
4998	adapter->shadow_vfta[index] &= ~(1 << bit);
4999	--adapter->num_vlans;
5000	/* Change hw filter setting */
5001	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5002		igb_setup_vlan_hw_support(adapter);
5003	IGB_CORE_UNLOCK(adapter);
5004}
5005
5006static void
5007igb_setup_vlan_hw_support(struct adapter *adapter)
5008{
5009	struct e1000_hw *hw = &adapter->hw;
5010	struct ifnet	*ifp = adapter->ifp;
5011	u32             reg;
5012
5013	if (adapter->vf_ifp) {
5014		e1000_rlpml_set_vf(hw,
5015		    adapter->max_frame_size + VLAN_TAG_SIZE);
5016		return;
5017	}
5018
5019	reg = E1000_READ_REG(hw, E1000_CTRL);
5020	reg |= E1000_CTRL_VME;
5021	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5022
5023	/* Enable the Filter Table */
5024	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5025		reg = E1000_READ_REG(hw, E1000_RCTL);
5026		reg &= ~E1000_RCTL_CFIEN;
5027		reg |= E1000_RCTL_VFE;
5028		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5029	}
5030
5031	/* Update the frame size */
5032	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5033	    adapter->max_frame_size + VLAN_TAG_SIZE);
5034
5035	/* Don't bother with table if no vlans */
5036	if ((adapter->num_vlans == 0) ||
5037	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5038                return;
5039	/*
5040	** A soft reset zero's out the VFTA, so
5041	** we need to repopulate it now.
5042	*/
5043	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5044                if (adapter->shadow_vfta[i] != 0) {
5045			if (adapter->vf_ifp)
5046				e1000_vfta_set_vf(hw,
5047				    adapter->shadow_vfta[i], TRUE);
5048			else
5049				e1000_write_vfta(hw,
5050				    i, adapter->shadow_vfta[i]);
5051		}
5052}
5053
5054static void
5055igb_enable_intr(struct adapter *adapter)
5056{
5057	/* With RSS set up what to auto clear */
5058	if (adapter->msix_mem) {
5059		u32 mask = (adapter->que_mask | adapter->link_mask);
5060		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5061		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5062		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5063		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5064		    E1000_IMS_LSC);
5065	} else {
5066		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5067		    IMS_ENABLE_MASK);
5068	}
5069	E1000_WRITE_FLUSH(&adapter->hw);
5070
5071	return;
5072}
5073
5074static void
5075igb_disable_intr(struct adapter *adapter)
5076{
5077	if (adapter->msix_mem) {
5078		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5079		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5080	}
5081	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5082	E1000_WRITE_FLUSH(&adapter->hw);
5083	return;
5084}
5085
5086/*
5087 * Bit of a misnomer, what this really means is
5088 * to enable OS management of the system... aka
5089 * to disable special hardware management features
5090 */
5091static void
5092igb_init_manageability(struct adapter *adapter)
5093{
5094	if (adapter->has_manage) {
5095		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5096		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5097
5098		/* disable hardware interception of ARP */
5099		manc &= ~(E1000_MANC_ARP_EN);
5100
5101                /* enable receiving management packets to the host */
5102		manc |= E1000_MANC_EN_MNG2HOST;
5103		manc2h |= 1 << 5;  /* Mng Port 623 */
5104		manc2h |= 1 << 6;  /* Mng Port 664 */
5105		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5106		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5107	}
5108}
5109
5110/*
5111 * Give control back to hardware management
5112 * controller if there is one.
5113 */
5114static void
5115igb_release_manageability(struct adapter *adapter)
5116{
5117	if (adapter->has_manage) {
5118		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5119
5120		/* re-enable hardware interception of ARP */
5121		manc |= E1000_MANC_ARP_EN;
5122		manc &= ~E1000_MANC_EN_MNG2HOST;
5123
5124		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5125	}
5126}
5127
5128/*
5129 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5130 * For ASF and Pass Through versions of f/w this means that
5131 * the driver is loaded.
5132 *
5133 */
5134static void
5135igb_get_hw_control(struct adapter *adapter)
5136{
5137	u32 ctrl_ext;
5138
5139	if (adapter->vf_ifp)
5140		return;
5141
5142	/* Let firmware know the driver has taken over */
5143	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5144	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5145	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5146}
5147
5148/*
5149 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5150 * For ASF and Pass Through versions of f/w this means that the
5151 * driver is no longer loaded.
5152 *
5153 */
5154static void
5155igb_release_hw_control(struct adapter *adapter)
5156{
5157	u32 ctrl_ext;
5158
5159	if (adapter->vf_ifp)
5160		return;
5161
5162	/* Let firmware taken over control of h/w */
5163	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5164	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5165	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5166}
5167
5168static int
5169igb_is_valid_ether_addr(uint8_t *addr)
5170{
5171	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5172
5173	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5174		return (FALSE);
5175	}
5176
5177	return (TRUE);
5178}
5179
5180
5181/*
5182 * Enable PCI Wake On Lan capability
5183 */
5184static void
5185igb_enable_wakeup(device_t dev)
5186{
5187	u16     cap, status;
5188	u8      id;
5189
5190	/* First find the capabilities pointer*/
5191	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5192	/* Read the PM Capabilities */
5193	id = pci_read_config(dev, cap, 1);
5194	if (id != PCIY_PMG)     /* Something wrong */
5195		return;
5196	/* OK, we have the power capabilities, so
5197	   now get the status register */
5198	cap += PCIR_POWER_STATUS;
5199	status = pci_read_config(dev, cap, 2);
5200	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5201	pci_write_config(dev, cap, status, 2);
5202	return;
5203}
5204
5205static void
5206igb_led_func(void *arg, int onoff)
5207{
5208	struct adapter	*adapter = arg;
5209
5210	IGB_CORE_LOCK(adapter);
5211	if (onoff) {
5212		e1000_setup_led(&adapter->hw);
5213		e1000_led_on(&adapter->hw);
5214	} else {
5215		e1000_led_off(&adapter->hw);
5216		e1000_cleanup_led(&adapter->hw);
5217	}
5218	IGB_CORE_UNLOCK(adapter);
5219}
5220
5221/**********************************************************************
5222 *
5223 *  Update the board statistics counters.
5224 *
5225 **********************************************************************/
5226static void
5227igb_update_stats_counters(struct adapter *adapter)
5228{
5229	struct ifnet		*ifp;
5230        struct e1000_hw		*hw = &adapter->hw;
5231	struct e1000_hw_stats	*stats;
5232
5233	/*
5234	** The virtual function adapter has only a
5235	** small controlled set of stats, do only
5236	** those and return.
5237	*/
5238	if (adapter->vf_ifp) {
5239		igb_update_vf_stats_counters(adapter);
5240		return;
5241	}
5242
5243	stats = (struct e1000_hw_stats	*)adapter->stats;
5244
5245	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5246	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5247		stats->symerrs +=
5248		    E1000_READ_REG(hw,E1000_SYMERRS);
5249		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5250	}
5251
5252	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5253	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5254	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5255	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5256
5257	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5258	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5259	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5260	stats->dc += E1000_READ_REG(hw, E1000_DC);
5261	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5262	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5263	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5264	/*
5265	** For watchdog management we need to know if we have been
5266	** paused during the last interval, so capture that here.
5267	*/
5268        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5269        stats->xoffrxc += adapter->pause_frames;
5270	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5271	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5272	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5273	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5274	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5275	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5276	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5277	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5278	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5279	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5280	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5281	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5282
5283	/* For the 64-bit byte counters the low dword must be read first. */
5284	/* Both registers clear on the read of the high dword */
5285
5286	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5287	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5288	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5289	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5290
5291	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5292	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5293	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5294	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5295	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5296
5297	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5298	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5299
5300	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5301	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5302	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5303	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5304	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5305	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5306	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5307	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5308	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5309	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5310
5311	/* Interrupt Counts */
5312
5313	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5314	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5315	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5316	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5317	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5318	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5319	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5320	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5321	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5322
5323	/* Host to Card Statistics */
5324
5325	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5326	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5327	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5328	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5329	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5330	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5331	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5332	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5333	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5334	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5335	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5336	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5337	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5338	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5339
5340	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5341	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5342	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5343	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5344	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5345	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5346
5347	ifp = adapter->ifp;
5348	ifp->if_collisions = stats->colc;
5349
5350	/* Rx Errors */
5351	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5352	    stats->crcerrs + stats->algnerrc +
5353	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5354
5355	/* Tx Errors */
5356	ifp->if_oerrors = stats->ecol +
5357	    stats->latecol + adapter->watchdog_events;
5358
5359	/* Driver specific counters */
5360	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5361	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5362	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5363	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5364	adapter->packet_buf_alloc_tx =
5365	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5366	adapter->packet_buf_alloc_rx =
5367	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5368}
5369
5370
5371/**********************************************************************
5372 *
5373 *  Initialize the VF board statistics counters.
5374 *
5375 **********************************************************************/
5376static void
5377igb_vf_init_stats(struct adapter *adapter)
5378{
5379        struct e1000_hw *hw = &adapter->hw;
5380	struct e1000_vf_stats	*stats;
5381
5382	stats = (struct e1000_vf_stats	*)adapter->stats;
5383	if (stats == NULL)
5384		return;
5385        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5386        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5387        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5388        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5389        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5390}
5391
5392/**********************************************************************
5393 *
5394 *  Update the VF board statistics counters.
5395 *
5396 **********************************************************************/
5397static void
5398igb_update_vf_stats_counters(struct adapter *adapter)
5399{
5400	struct e1000_hw *hw = &adapter->hw;
5401	struct e1000_vf_stats	*stats;
5402
5403	if (adapter->link_speed == 0)
5404		return;
5405
5406	stats = (struct e1000_vf_stats	*)adapter->stats;
5407
5408	UPDATE_VF_REG(E1000_VFGPRC,
5409	    stats->last_gprc, stats->gprc);
5410	UPDATE_VF_REG(E1000_VFGORC,
5411	    stats->last_gorc, stats->gorc);
5412	UPDATE_VF_REG(E1000_VFGPTC,
5413	    stats->last_gptc, stats->gptc);
5414	UPDATE_VF_REG(E1000_VFGOTC,
5415	    stats->last_gotc, stats->gotc);
5416	UPDATE_VF_REG(E1000_VFMPRC,
5417	    stats->last_mprc, stats->mprc);
5418}
5419
5420/* Export a single 32-bit register via a read-only sysctl. */
5421static int
5422igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5423{
5424	struct adapter *adapter;
5425	u_int val;
5426
5427	adapter = oidp->oid_arg1;
5428	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5429	return (sysctl_handle_int(oidp, &val, 0, req));
5430}
5431
5432/*
5433**  Tuneable interrupt rate handler
5434*/
5435static int
5436igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5437{
5438	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5439	int			error;
5440	u32			reg, usec, rate;
5441
5442	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5443	usec = ((reg & 0x7FFC) >> 2);
5444	if (usec > 0)
5445		rate = 1000000 / usec;
5446	else
5447		rate = 0;
5448	error = sysctl_handle_int(oidp, &rate, 0, req);
5449	if (error || !req->newptr)
5450		return error;
5451	return 0;
5452}
5453
5454/*
5455 * Add sysctl variables, one per statistic, to the system.
5456 */
5457static void
5458igb_add_hw_stats(struct adapter *adapter)
5459{
5460	device_t dev = adapter->dev;
5461
5462	struct tx_ring *txr = adapter->tx_rings;
5463	struct rx_ring *rxr = adapter->rx_rings;
5464
5465	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5466	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5467	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5468	struct e1000_hw_stats *stats = adapter->stats;
5469
5470	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5471	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5472
5473#define QUEUE_NAME_LEN 32
5474	char namebuf[QUEUE_NAME_LEN];
5475
5476	/* Driver Statistics */
5477	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5478			CTLFLAG_RD, &adapter->link_irq, 0,
5479			"Link MSIX IRQ Handled");
5480	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5481			CTLFLAG_RD, &adapter->dropped_pkts,
5482			"Driver dropped packets");
5483	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5484			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5485			"Driver tx dma failure in xmit");
5486	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5487			CTLFLAG_RD, &adapter->rx_overruns,
5488			"RX overruns");
5489	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5490			CTLFLAG_RD, &adapter->watchdog_events,
5491			"Watchdog timeouts");
5492
5493	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5494			CTLFLAG_RD, &adapter->device_control,
5495			"Device Control Register");
5496	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5497			CTLFLAG_RD, &adapter->rx_control,
5498			"Receiver Control Register");
5499	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5500			CTLFLAG_RD, &adapter->int_mask,
5501			"Interrupt Mask");
5502	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5503			CTLFLAG_RD, &adapter->eint_mask,
5504			"Extended Interrupt Mask");
5505	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5506			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5507			"Transmit Buffer Packet Allocation");
5508	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5509			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5510			"Receive Buffer Packet Allocation");
5511	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5512			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5513			"Flow Control High Watermark");
5514	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5515			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5516			"Flow Control Low Watermark");
5517
5518	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5519		struct lro_ctrl *lro = &rxr->lro;
5520
5521		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5522		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5523					    CTLFLAG_RD, NULL, "Queue Name");
5524		queue_list = SYSCTL_CHILDREN(queue_node);
5525
5526		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5527				CTLFLAG_RD, &adapter->queues[i],
5528				sizeof(&adapter->queues[i]),
5529				igb_sysctl_interrupt_rate_handler,
5530				"IU", "Interrupt Rate");
5531
5532		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5533				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5534				igb_sysctl_reg_handler, "IU",
5535 				"Transmit Descriptor Head");
5536		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5537				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5538				igb_sysctl_reg_handler, "IU",
5539 				"Transmit Descriptor Tail");
5540		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5541				CTLFLAG_RD, &txr->no_desc_avail,
5542				"Queue No Descriptor Available");
5543		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5544				CTLFLAG_RD, &txr->tx_packets,
5545				"Queue Packets Transmitted");
5546
5547		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5548				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5549				igb_sysctl_reg_handler, "IU",
5550				"Receive Descriptor Head");
5551		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5552				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5553				igb_sysctl_reg_handler, "IU",
5554				"Receive Descriptor Tail");
5555		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5556				CTLFLAG_RD, &rxr->rx_packets,
5557				"Queue Packets Received");
5558		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5559				CTLFLAG_RD, &rxr->rx_bytes,
5560				"Queue Bytes Received");
5561		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5562				CTLFLAG_RD, &lro->lro_queued, 0,
5563				"LRO Queued");
5564		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5565				CTLFLAG_RD, &lro->lro_flushed, 0,
5566				"LRO Flushed");
5567	}
5568
5569	/* MAC stats get their own sub node */
5570
5571	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5572				    CTLFLAG_RD, NULL, "MAC Statistics");
5573	stat_list = SYSCTL_CHILDREN(stat_node);
5574
5575	/*
5576	** VF adapter has a very limited set of stats
5577	** since its not managing the metal, so to speak.
5578	*/
5579	if (adapter->vf_ifp) {
5580	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5581			CTLFLAG_RD, &stats->gprc,
5582			"Good Packets Received");
5583	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5584			CTLFLAG_RD, &stats->gptc,
5585			"Good Packets Transmitted");
5586 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5587 			CTLFLAG_RD, &stats->gorc,
5588 			"Good Octets Received");
5589 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5590 			CTLFLAG_RD, &stats->gotc,
5591 			"Good Octets Transmitted");
5592	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5593			CTLFLAG_RD, &stats->mprc,
5594			"Multicast Packets Received");
5595		return;
5596	}
5597
5598	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5599			CTLFLAG_RD, &stats->ecol,
5600			"Excessive collisions");
5601	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5602			CTLFLAG_RD, &stats->scc,
5603			"Single collisions");
5604	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5605			CTLFLAG_RD, &stats->mcc,
5606			"Multiple collisions");
5607	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5608			CTLFLAG_RD, &stats->latecol,
5609			"Late collisions");
5610	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5611			CTLFLAG_RD, &stats->colc,
5612			"Collision Count");
5613	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5614			CTLFLAG_RD, &stats->symerrs,
5615			"Symbol Errors");
5616	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5617			CTLFLAG_RD, &stats->sec,
5618			"Sequence Errors");
5619	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5620			CTLFLAG_RD, &stats->dc,
5621			"Defer Count");
5622	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5623			CTLFLAG_RD, &stats->mpc,
5624			"Missed Packets");
5625	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5626			CTLFLAG_RD, &stats->rnbc,
5627			"Receive No Buffers");
5628	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5629			CTLFLAG_RD, &stats->ruc,
5630			"Receive Undersize");
5631	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5632			CTLFLAG_RD, &stats->rfc,
5633			"Fragmented Packets Received ");
5634	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5635			CTLFLAG_RD, &stats->roc,
5636			"Oversized Packets Received");
5637	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5638			CTLFLAG_RD, &stats->rjc,
5639			"Recevied Jabber");
5640	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5641			CTLFLAG_RD, &stats->rxerrc,
5642			"Receive Errors");
5643	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5644			CTLFLAG_RD, &stats->crcerrs,
5645			"CRC errors");
5646	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5647			CTLFLAG_RD, &stats->algnerrc,
5648			"Alignment Errors");
5649	/* On 82575 these are collision counts */
5650	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5651			CTLFLAG_RD, &stats->cexterr,
5652			"Collision/Carrier extension errors");
5653	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5654			CTLFLAG_RD, &stats->xonrxc,
5655			"XON Received");
5656	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5657			CTLFLAG_RD, &stats->xontxc,
5658			"XON Transmitted");
5659	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5660			CTLFLAG_RD, &stats->xoffrxc,
5661			"XOFF Received");
5662	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5663			CTLFLAG_RD, &stats->xofftxc,
5664			"XOFF Transmitted");
5665	/* Packet Reception Stats */
5666	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5667			CTLFLAG_RD, &stats->tpr,
5668			"Total Packets Received ");
5669	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5670			CTLFLAG_RD, &stats->gprc,
5671			"Good Packets Received");
5672	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5673			CTLFLAG_RD, &stats->bprc,
5674			"Broadcast Packets Received");
5675	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5676			CTLFLAG_RD, &stats->mprc,
5677			"Multicast Packets Received");
5678	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5679			CTLFLAG_RD, &stats->prc64,
5680			"64 byte frames received ");
5681	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5682			CTLFLAG_RD, &stats->prc127,
5683			"65-127 byte frames received");
5684	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5685			CTLFLAG_RD, &stats->prc255,
5686			"128-255 byte frames received");
5687	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5688			CTLFLAG_RD, &stats->prc511,
5689			"256-511 byte frames received");
5690	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5691			CTLFLAG_RD, &stats->prc1023,
5692			"512-1023 byte frames received");
5693	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5694			CTLFLAG_RD, &stats->prc1522,
5695			"1023-1522 byte frames received");
5696 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5697 			CTLFLAG_RD, &stats->gorc,
5698 			"Good Octets Received");
5699
5700	/* Packet Transmission Stats */
5701 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5702 			CTLFLAG_RD, &stats->gotc,
5703 			"Good Octets Transmitted");
5704	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5705			CTLFLAG_RD, &stats->tpt,
5706			"Total Packets Transmitted");
5707	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5708			CTLFLAG_RD, &stats->gptc,
5709			"Good Packets Transmitted");
5710	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5711			CTLFLAG_RD, &stats->bptc,
5712			"Broadcast Packets Transmitted");
5713	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5714			CTLFLAG_RD, &stats->mptc,
5715			"Multicast Packets Transmitted");
5716	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5717			CTLFLAG_RD, &stats->ptc64,
5718			"64 byte frames transmitted ");
5719	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5720			CTLFLAG_RD, &stats->ptc127,
5721			"65-127 byte frames transmitted");
5722	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5723			CTLFLAG_RD, &stats->ptc255,
5724			"128-255 byte frames transmitted");
5725	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5726			CTLFLAG_RD, &stats->ptc511,
5727			"256-511 byte frames transmitted");
5728	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5729			CTLFLAG_RD, &stats->ptc1023,
5730			"512-1023 byte frames transmitted");
5731	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5732			CTLFLAG_RD, &stats->ptc1522,
5733			"1024-1522 byte frames transmitted");
5734	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5735			CTLFLAG_RD, &stats->tsctc,
5736			"TSO Contexts Transmitted");
5737	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5738			CTLFLAG_RD, &stats->tsctfc,
5739			"TSO Contexts Failed");
5740
5741
5742	/* Interrupt Stats */
5743
5744	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5745				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5746	int_list = SYSCTL_CHILDREN(int_node);
5747
5748	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5749			CTLFLAG_RD, &stats->iac,
5750			"Interrupt Assertion Count");
5751
5752	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5753			CTLFLAG_RD, &stats->icrxptc,
5754			"Interrupt Cause Rx Pkt Timer Expire Count");
5755
5756	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5757			CTLFLAG_RD, &stats->icrxatc,
5758			"Interrupt Cause Rx Abs Timer Expire Count");
5759
5760	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5761			CTLFLAG_RD, &stats->ictxptc,
5762			"Interrupt Cause Tx Pkt Timer Expire Count");
5763
5764	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5765			CTLFLAG_RD, &stats->ictxatc,
5766			"Interrupt Cause Tx Abs Timer Expire Count");
5767
5768	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5769			CTLFLAG_RD, &stats->ictxqec,
5770			"Interrupt Cause Tx Queue Empty Count");
5771
5772	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5773			CTLFLAG_RD, &stats->ictxqmtc,
5774			"Interrupt Cause Tx Queue Min Thresh Count");
5775
5776	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5777			CTLFLAG_RD, &stats->icrxdmtc,
5778			"Interrupt Cause Rx Desc Min Thresh Count");
5779
5780	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5781			CTLFLAG_RD, &stats->icrxoc,
5782			"Interrupt Cause Receiver Overrun Count");
5783
5784	/* Host to Card Stats */
5785
5786	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5787				    CTLFLAG_RD, NULL,
5788				    "Host to Card Statistics");
5789
5790	host_list = SYSCTL_CHILDREN(host_node);
5791
5792	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5793			CTLFLAG_RD, &stats->cbtmpc,
5794			"Circuit Breaker Tx Packet Count");
5795
5796	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5797			CTLFLAG_RD, &stats->htdpmc,
5798			"Host Transmit Discarded Packets");
5799
5800	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5801			CTLFLAG_RD, &stats->rpthc,
5802			"Rx Packets To Host");
5803
5804	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5805			CTLFLAG_RD, &stats->cbrmpc,
5806			"Circuit Breaker Rx Packet Count");
5807
5808	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5809			CTLFLAG_RD, &stats->cbrdpc,
5810			"Circuit Breaker Rx Dropped Count");
5811
5812	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5813			CTLFLAG_RD, &stats->hgptc,
5814			"Host Good Packets Tx Count");
5815
5816	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5817			CTLFLAG_RD, &stats->htcbdpc,
5818			"Host Tx Circuit Breaker Dropped Count");
5819
5820	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5821			CTLFLAG_RD, &stats->hgorc,
5822			"Host Good Octets Received Count");
5823
5824	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5825			CTLFLAG_RD, &stats->hgotc,
5826			"Host Good Octets Transmit Count");
5827
5828	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5829			CTLFLAG_RD, &stats->lenerrs,
5830			"Length Errors");
5831
5832	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5833			CTLFLAG_RD, &stats->scvpc,
5834			"SerDes/SGMII Code Violation Pkt Count");
5835
5836	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5837			CTLFLAG_RD, &stats->hrmpc,
5838			"Header Redirection Missed Packet Count");
5839}
5840
5841
5842/**********************************************************************
5843 *
5844 *  This routine provides a way to dump out the adapter eeprom,
5845 *  often a useful debug/service tool. This only dumps the first
5846 *  32 words, stuff that matters is in that extent.
5847 *
5848 **********************************************************************/
5849static int
5850igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5851{
5852	struct adapter *adapter;
5853	int error;
5854	int result;
5855
5856	result = -1;
5857	error = sysctl_handle_int(oidp, &result, 0, req);
5858
5859	if (error || !req->newptr)
5860		return (error);
5861
5862	/*
5863	 * This value will cause a hex dump of the
5864	 * first 32 16-bit words of the EEPROM to
5865	 * the screen.
5866	 */
5867	if (result == 1) {
5868		adapter = (struct adapter *)arg1;
5869		igb_print_nvm_info(adapter);
5870        }
5871
5872	return (error);
5873}
5874
5875static void
5876igb_print_nvm_info(struct adapter *adapter)
5877{
5878	u16	eeprom_data;
5879	int	i, j, row = 0;
5880
5881	/* Its a bit crude, but it gets the job done */
5882	printf("\nInterface EEPROM Dump:\n");
5883	printf("Offset\n0x0000  ");
5884	for (i = 0, j = 0; i < 32; i++, j++) {
5885		if (j == 8) { /* Make the offset block */
5886			j = 0; ++row;
5887			printf("\n0x00%x0  ",row);
5888		}
5889		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5890		printf("%04x ", eeprom_data);
5891	}
5892	printf("\n");
5893}
5894
5895static void
5896igb_set_sysctl_value(struct adapter *adapter, const char *name,
5897	const char *description, int *limit, int value)
5898{
5899	*limit = value;
5900	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5901	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5902	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5903}
5904
5905/*
5906** Set flow control using sysctl:
5907** Flow control values:
5908** 	0 - off
5909**	1 - rx pause
5910**	2 - tx pause
5911**	3 - full
5912*/
5913static int
5914igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5915{
5916	int		error;
5917	static int	input = 3; /* default is full */
5918	struct adapter	*adapter = (struct adapter *) arg1;
5919
5920	error = sysctl_handle_int(oidp, &input, 0, req);
5921
5922	if ((error) || (req->newptr == NULL))
5923		return (error);
5924
5925	switch (input) {
5926		case e1000_fc_rx_pause:
5927		case e1000_fc_tx_pause:
5928		case e1000_fc_full:
5929		case e1000_fc_none:
5930			adapter->hw.fc.requested_mode = input;
5931			adapter->fc = input;
5932			break;
5933		default:
5934			/* Do nothing */
5935			return (error);
5936	}
5937
5938	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5939	e1000_force_mac_fc(&adapter->hw);
5940	return (error);
5941}
5942
5943/*
5944** Manage DMA Coalesce:
5945** Control values:
5946** 	0/1 - off/on
5947**	Legal timer values are:
5948**	250,500,1000-10000 in thousands
5949*/
5950static int
5951igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5952{
5953	struct adapter *adapter = (struct adapter *) arg1;
5954	int		error;
5955
5956	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5957
5958	if ((error) || (req->newptr == NULL))
5959		return (error);
5960
5961	switch (adapter->dmac) {
5962		case 0:
5963			/*Disabling */
5964			break;
5965		case 1: /* Just enable and use default */
5966			adapter->dmac = 1000;
5967			break;
5968		case 250:
5969		case 500:
5970		case 1000:
5971		case 2000:
5972		case 3000:
5973		case 4000:
5974		case 5000:
5975		case 6000:
5976		case 7000:
5977		case 8000:
5978		case 9000:
5979		case 10000:
5980			/* Legal values - allow */
5981			break;
5982		default:
5983			/* Do nothing, illegal value */
5984			adapter->dmac = 0;
5985			return (error);
5986	}
5987	/* Reinit the interface */
5988	igb_init(adapter);
5989	return (error);
5990}
5991