if_igb.c revision 241856
1/******************************************************************************
2
3  Copyright (c) 2001-2012, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 241856 2012-10-22 03:41:14Z eadler $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#if __FreeBSD_version >= 800000
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *,
185		    struct tx_ring *, struct mbuf *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282#ifdef DEVICE_POLLING
283static poll_handler_t igb_poll;
284#endif /* POLLING */
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t igb_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, igb_probe),
293	DEVMETHOD(device_attach, igb_attach),
294	DEVMETHOD(device_detach, igb_detach),
295	DEVMETHOD(device_shutdown, igb_shutdown),
296	DEVMETHOD(device_suspend, igb_suspend),
297	DEVMETHOD(device_resume, igb_resume),
298	{0, 0}
299};
300
301static driver_t igb_driver = {
302	"igb", igb_methods, sizeof(struct adapter),
303};
304
305static devclass_t igb_devclass;
306DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307MODULE_DEPEND(igb, pci, 1, 1, 1);
308MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316/* Descriptor defaults */
317static int igb_rxd = IGB_DEFAULT_RXD;
318static int igb_txd = IGB_DEFAULT_TXD;
319TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320TUNABLE_INT("hw.igb.txd", &igb_txd);
321SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322    "Number of receive descriptors per queue");
323SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324    "Number of transmit descriptors per queue");
325
326/*
327** AIM: Adaptive Interrupt Moderation
328** which means that the interrupt rate
329** is varied over time based on the
330** traffic for that interrupt vector
331*/
332static int igb_enable_aim = TRUE;
333TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335    "Enable adaptive interrupt moderation");
336
337/*
338 * MSIX should be the default for best performance,
339 * but this allows it to be forced off for testing.
340 */
341static int igb_enable_msix = 1;
342TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344    "Enable MSI-X interrupts");
345
346/*
347** Tuneable Interrupt rate
348*/
349static int igb_max_interrupt_rate = 8000;
350TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354/*
355** Header split causes the packet header to
356** be dma'd to a seperate mbuf from the payload.
357** this can have memory alignment benefits. But
358** another plus is that small packets often fit
359** into the header and thus use no cluster. Its
360** a very workload dependent type feature.
361*/
362static int igb_header_split = FALSE;
363TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
364SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
365    "Enable receive mbuf header split");
366
367/*
368** This will autoconfigure based on
369** the number of CPUs if left at 0.
370*/
371static int igb_num_queues = 0;
372TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
373SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
374    "Number of queues to configure, 0 indicates autoconfigure");
375
376/*
377** Global variable to store last used CPU when binding queues
378** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
379** queue is bound to a cpu.
380*/
381static int igb_last_bind_cpu = -1;
382
383/* How many packets rxeof tries to clean at a time */
384static int igb_rx_process_limit = 100;
385TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
386SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387    &igb_rx_process_limit, 0,
388    "Maximum number of received packets to process at a time, -1 means unlimited");
389
390#ifdef DEV_NETMAP	/* see ixgbe.c for details */
391#include <dev/netmap/if_igb_netmap.h>
392#endif /* DEV_NETMAP */
393/*********************************************************************
394 *  Device identification routine
395 *
396 *  igb_probe determines if the driver should be loaded on
397 *  adapter based on PCI vendor/device id of the adapter.
398 *
399 *  return BUS_PROBE_DEFAULT on success, positive on failure
400 *********************************************************************/
401
402static int
403igb_probe(device_t dev)
404{
405	char		adapter_name[60];
406	uint16_t	pci_vendor_id = 0;
407	uint16_t	pci_device_id = 0;
408	uint16_t	pci_subvendor_id = 0;
409	uint16_t	pci_subdevice_id = 0;
410	igb_vendor_info_t *ent;
411
412	INIT_DEBUGOUT("igb_probe: begin");
413
414	pci_vendor_id = pci_get_vendor(dev);
415	if (pci_vendor_id != IGB_VENDOR_ID)
416		return (ENXIO);
417
418	pci_device_id = pci_get_device(dev);
419	pci_subvendor_id = pci_get_subvendor(dev);
420	pci_subdevice_id = pci_get_subdevice(dev);
421
422	ent = igb_vendor_info_array;
423	while (ent->vendor_id != 0) {
424		if ((pci_vendor_id == ent->vendor_id) &&
425		    (pci_device_id == ent->device_id) &&
426
427		    ((pci_subvendor_id == ent->subvendor_id) ||
428		    (ent->subvendor_id == PCI_ANY_ID)) &&
429
430		    ((pci_subdevice_id == ent->subdevice_id) ||
431		    (ent->subdevice_id == PCI_ANY_ID))) {
432			sprintf(adapter_name, "%s %s",
433				igb_strings[ent->index],
434				igb_driver_version);
435			device_set_desc_copy(dev, adapter_name);
436			return (BUS_PROBE_DEFAULT);
437		}
438		ent++;
439	}
440
441	return (ENXIO);
442}
443
444/*********************************************************************
445 *  Device initialization routine
446 *
447 *  The attach entry point is called when the driver is being loaded.
448 *  This routine identifies the type of hardware, allocates all resources
449 *  and initializes the hardware.
450 *
451 *  return 0 on success, positive on failure
452 *********************************************************************/
453
454static int
455igb_attach(device_t dev)
456{
457	struct adapter	*adapter;
458	int		error = 0;
459	u16		eeprom_data;
460
461	INIT_DEBUGOUT("igb_attach: begin");
462
463	adapter = device_get_softc(dev);
464	adapter->dev = adapter->osdep.dev = dev;
465	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
466
467	/* SYSCTL stuff */
468	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
469	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
470	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
471	    igb_sysctl_nvm_info, "I", "NVM Information");
472
473	igb_set_sysctl_value(adapter, "enable_aim",
474	    "Interrupt Moderation", &adapter->enable_aim,
475	    igb_enable_aim);
476
477	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
478	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
479	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
480	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
481
482	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
483
484	/* Determine hardware and mac info */
485	igb_identify_hardware(adapter);
486
487	/* Setup PCI resources */
488	if (igb_allocate_pci_resources(adapter)) {
489		device_printf(dev, "Allocation of PCI resources failed\n");
490		error = ENXIO;
491		goto err_pci;
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Sysctl for limiting the amount of work done in the taskqueue */
504	igb_set_sysctl_value(adapter, "rx_processing_limit",
505	    "max number of rx packets to process",
506	    &adapter->rx_process_limit, igb_rx_process_limit);
507
508	/*
509	 * Validate number of transmit and receive descriptors. It
510	 * must not exceed hardware maximum, and must be multiple
511	 * of E1000_DBA_ALIGN.
512	 */
513	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
514	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
515		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
516		    IGB_DEFAULT_TXD, igb_txd);
517		adapter->num_tx_desc = IGB_DEFAULT_TXD;
518	} else
519		adapter->num_tx_desc = igb_txd;
520	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
521	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
522		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
523		    IGB_DEFAULT_RXD, igb_rxd);
524		adapter->num_rx_desc = IGB_DEFAULT_RXD;
525	} else
526		adapter->num_rx_desc = igb_rxd;
527
528	adapter->hw.mac.autoneg = DO_AUTO_NEG;
529	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
530	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
531
532	/* Copper options */
533	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
534		adapter->hw.phy.mdix = AUTO_ALL_MODES;
535		adapter->hw.phy.disable_polarity_correction = FALSE;
536		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
537	}
538
539	/*
540	 * Set the frame limits assuming
541	 * standard ethernet sized frames.
542	 */
543	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
544	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
545
546	/*
547	** Allocate and Setup Queues
548	*/
549	if (igb_allocate_queues(adapter)) {
550		error = ENOMEM;
551		goto err_pci;
552	}
553
554	/* Allocate the appropriate stats memory */
555	if (adapter->vf_ifp) {
556		adapter->stats =
557		    (struct e1000_vf_stats *)malloc(sizeof \
558		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
559		igb_vf_init_stats(adapter);
560	} else
561		adapter->stats =
562		    (struct e1000_hw_stats *)malloc(sizeof \
563		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
564	if (adapter->stats == NULL) {
565		device_printf(dev, "Can not allocate stats memory\n");
566		error = ENOMEM;
567		goto err_late;
568	}
569
570	/* Allocate multicast array memory. */
571	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
572	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
573	if (adapter->mta == NULL) {
574		device_printf(dev, "Can not allocate multicast setup array\n");
575		error = ENOMEM;
576		goto err_late;
577	}
578
579	/* Some adapter-specific advanced features */
580	if (adapter->hw.mac.type >= e1000_i350) {
581		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
582		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
583		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
584		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
585		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
586		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
587		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
588		    adapter, 0, igb_sysctl_eee, "I",
589		    "Disable Energy Efficient Ethernet");
590		if (adapter->hw.phy.media_type == e1000_media_type_copper)
591			e1000_set_eee_i350(&adapter->hw);
592	}
593
594	/*
595	** Start from a known state, this is
596	** important in reading the nvm and
597	** mac from that.
598	*/
599	e1000_reset_hw(&adapter->hw);
600
601	/* Make sure we have a good EEPROM before we read from it */
602	if (((adapter->hw.mac.type != e1000_i210) &&
603	    (adapter->hw.mac.type != e1000_i211)) &&
604	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
605		/*
606		** Some PCI-E parts fail the first check due to
607		** the link being in sleep state, call it again,
608		** if it fails a second time its a real issue.
609		*/
610		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
611			device_printf(dev,
612			    "The EEPROM Checksum Is Not Valid\n");
613			error = EIO;
614			goto err_late;
615		}
616	}
617
618	/*
619	** Copy the permanent MAC address out of the EEPROM
620	*/
621	if (e1000_read_mac_addr(&adapter->hw) < 0) {
622		device_printf(dev, "EEPROM read error while reading MAC"
623		    " address\n");
624		error = EIO;
625		goto err_late;
626	}
627	/* Check its sanity */
628	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
629		device_printf(dev, "Invalid MAC address\n");
630		error = EIO;
631		goto err_late;
632	}
633
634	/* Setup OS specific network interface */
635	if (igb_setup_interface(dev, adapter) != 0)
636		goto err_late;
637
638	/* Now get a good starting state */
639	igb_reset(adapter);
640
641	/* Initialize statistics */
642	igb_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	igb_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	/* APME bit in EEPROM is mapped to WUC.APME */
659	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
660	if (eeprom_data)
661		adapter->wol = E1000_WUFC_MAG;
662
663	/* Register for VLAN events */
664	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
665	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
666	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
667	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
668
669	igb_add_hw_stats(adapter);
670
671	/* Tell the stack that the interface is not active */
672	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
673	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
674
675	adapter->led_dev = led_create(igb_led_func, adapter,
676	    device_get_nameunit(dev));
677
678	/*
679	** Configure Interrupts
680	*/
681	if ((adapter->msix > 1) && (igb_enable_msix))
682		error = igb_allocate_msix(adapter);
683	else /* MSI or Legacy */
684		error = igb_allocate_legacy(adapter);
685	if (error)
686		goto err_late;
687
688#ifdef DEV_NETMAP
689	igb_netmap_attach(adapter);
690#endif /* DEV_NETMAP */
691	INIT_DEBUGOUT("igb_attach: end");
692
693	return (0);
694
695err_late:
696	igb_detach(dev);
697	igb_free_transmit_structures(adapter);
698	igb_free_receive_structures(adapter);
699	igb_release_hw_control(adapter);
700err_pci:
701	igb_free_pci_resources(adapter);
702	if (adapter->ifp != NULL)
703		if_free(adapter->ifp);
704	free(adapter->mta, M_DEVBUF);
705	IGB_CORE_LOCK_DESTROY(adapter);
706
707	return (error);
708}
709
710/*********************************************************************
711 *  Device removal routine
712 *
713 *  The detach entry point is called when the driver is being removed.
714 *  This routine stops the adapter and deallocates all the resources
715 *  that were allocated for driver operation.
716 *
717 *  return 0 on success, positive on failure
718 *********************************************************************/
719
720static int
721igb_detach(device_t dev)
722{
723	struct adapter	*adapter = device_get_softc(dev);
724	struct ifnet	*ifp = adapter->ifp;
725
726	INIT_DEBUGOUT("igb_detach: begin");
727
728	/* Make sure VLANS are not using driver */
729	if (adapter->ifp->if_vlantrunk != NULL) {
730		device_printf(dev,"Vlan in use, detach first\n");
731		return (EBUSY);
732	}
733
734	ether_ifdetach(adapter->ifp);
735
736	if (adapter->led_dev != NULL)
737		led_destroy(adapter->led_dev);
738
739#ifdef DEVICE_POLLING
740	if (ifp->if_capenable & IFCAP_POLLING)
741		ether_poll_deregister(ifp);
742#endif
743
744	IGB_CORE_LOCK(adapter);
745	adapter->in_detach = 1;
746	igb_stop(adapter);
747	IGB_CORE_UNLOCK(adapter);
748
749	e1000_phy_hw_reset(&adapter->hw);
750
751	/* Give control back to firmware */
752	igb_release_manageability(adapter);
753	igb_release_hw_control(adapter);
754
755	if (adapter->wol) {
756		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
757		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
758		igb_enable_wakeup(dev);
759	}
760
761	/* Unregister VLAN events */
762	if (adapter->vlan_attach != NULL)
763		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
764	if (adapter->vlan_detach != NULL)
765		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
766
767	callout_drain(&adapter->timer);
768
769#ifdef DEV_NETMAP
770	netmap_detach(adapter->ifp);
771#endif /* DEV_NETMAP */
772	igb_free_pci_resources(adapter);
773	bus_generic_detach(dev);
774	if_free(ifp);
775
776	igb_free_transmit_structures(adapter);
777	igb_free_receive_structures(adapter);
778	if (adapter->mta != NULL)
779		free(adapter->mta, M_DEVBUF);
780
781	IGB_CORE_LOCK_DESTROY(adapter);
782
783	return (0);
784}
785
786/*********************************************************************
787 *
788 *  Shutdown entry point
789 *
790 **********************************************************************/
791
792static int
793igb_shutdown(device_t dev)
794{
795	return igb_suspend(dev);
796}
797
798/*
799 * Suspend/resume device methods.
800 */
801static int
802igb_suspend(device_t dev)
803{
804	struct adapter *adapter = device_get_softc(dev);
805
806	IGB_CORE_LOCK(adapter);
807
808	igb_stop(adapter);
809
810        igb_release_manageability(adapter);
811	igb_release_hw_control(adapter);
812
813        if (adapter->wol) {
814                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
815                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
816                igb_enable_wakeup(dev);
817        }
818
819	IGB_CORE_UNLOCK(adapter);
820
821	return bus_generic_suspend(dev);
822}
823
824static int
825igb_resume(device_t dev)
826{
827	struct adapter *adapter = device_get_softc(dev);
828	struct tx_ring	*txr = adapter->tx_rings;
829	struct ifnet *ifp = adapter->ifp;
830
831	IGB_CORE_LOCK(adapter);
832	igb_init_locked(adapter);
833	igb_init_manageability(adapter);
834
835	if ((ifp->if_flags & IFF_UP) &&
836	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
837		for (int i = 0; i < adapter->num_queues; i++, txr++) {
838			IGB_TX_LOCK(txr);
839#if __FreeBSD_version >= 800000
840			/* Process the stack queue only if not depleted */
841			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
842			    !drbr_empty(ifp, txr->br))
843				igb_mq_start_locked(ifp, txr, NULL);
844#else
845			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
846				igb_start_locked(txr, ifp);
847#endif
848			IGB_TX_UNLOCK(txr);
849		}
850	}
851	IGB_CORE_UNLOCK(adapter);
852
853	return bus_generic_resume(dev);
854}
855
856
857#if __FreeBSD_version < 800000
858
859/*********************************************************************
860 *  Transmit entry point
861 *
862 *  igb_start is called by the stack to initiate a transmit.
863 *  The driver will remain in this routine as long as there are
864 *  packets to transmit and transmit resources are available.
865 *  In case resources are not available stack is notified and
866 *  the packet is requeued.
867 **********************************************************************/
868
869static void
870igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
871{
872	struct adapter	*adapter = ifp->if_softc;
873	struct mbuf	*m_head;
874
875	IGB_TX_LOCK_ASSERT(txr);
876
877	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
878	    IFF_DRV_RUNNING)
879		return;
880	if (!adapter->link_active)
881		return;
882
883	/* Call cleanup if number of TX descriptors low */
884	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
885		igb_txeof(txr);
886
887	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
888		if (txr->tx_avail <= IGB_MAX_SCATTER) {
889			txr->queue_status |= IGB_QUEUE_DEPLETED;
890			break;
891		}
892		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
893		if (m_head == NULL)
894			break;
895		/*
896		 *  Encapsulation can modify our pointer, and or make it
897		 *  NULL on failure.  In that event, we can't requeue.
898		 */
899		if (igb_xmit(txr, &m_head)) {
900			if (m_head != NULL)
901				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
902			if (txr->tx_avail <= IGB_MAX_SCATTER)
903				txr->queue_status |= IGB_QUEUE_DEPLETED;
904			break;
905		}
906
907		/* Send a copy of the frame to the BPF listener */
908		ETHER_BPF_MTAP(ifp, m_head);
909
910		/* Set watchdog on */
911		txr->watchdog_time = ticks;
912		txr->queue_status |= IGB_QUEUE_WORKING;
913	}
914}
915
916/*
917 * Legacy TX driver routine, called from the
918 * stack, always uses tx[0], and spins for it.
919 * Should not be used with multiqueue tx
920 */
921static void
922igb_start(struct ifnet *ifp)
923{
924	struct adapter	*adapter = ifp->if_softc;
925	struct tx_ring	*txr = adapter->tx_rings;
926
927	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
928		IGB_TX_LOCK(txr);
929		igb_start_locked(txr, ifp);
930		IGB_TX_UNLOCK(txr);
931	}
932	return;
933}
934
935#else /* __FreeBSD_version >= 800000 */
936
937/*
938** Multiqueue Transmit driver
939**
940*/
941static int
942igb_mq_start(struct ifnet *ifp, struct mbuf *m)
943{
944	struct adapter		*adapter = ifp->if_softc;
945	struct igb_queue	*que;
946	struct tx_ring		*txr;
947	int 			i, err = 0;
948
949	/* Which queue to use */
950	if ((m->m_flags & M_FLOWID) != 0)
951		i = m->m_pkthdr.flowid % adapter->num_queues;
952	else
953		i = curcpu % adapter->num_queues;
954
955	txr = &adapter->tx_rings[i];
956	que = &adapter->queues[i];
957	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
958	    IGB_TX_TRYLOCK(txr)) {
959		struct mbuf *pm = NULL;
960		/*
961		** Try to queue first to avoid
962		** out-of-order delivery, but
963		** settle for it if that fails
964		*/
965		if (m && drbr_enqueue(ifp, txr->br, m))
966			pm = m;
967		err = igb_mq_start_locked(ifp, txr, pm);
968		IGB_TX_UNLOCK(txr);
969	} else {
970		err = drbr_enqueue(ifp, txr->br, m);
971		taskqueue_enqueue(que->tq, &txr->txq_task);
972	}
973
974	return (err);
975}
976
977static int
978igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
979{
980	struct adapter  *adapter = txr->adapter;
981        struct mbuf     *next;
982        int             err = 0, enq;
983
984	IGB_TX_LOCK_ASSERT(txr);
985
986	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
987	    (txr->queue_status & IGB_QUEUE_DEPLETED) ||
988	    adapter->link_active == 0) {
989		if (m != NULL)
990			err = drbr_enqueue(ifp, txr->br, m);
991		return (err);
992	}
993
994	enq = 0;
995	if (m == NULL) {
996		next = drbr_dequeue(ifp, txr->br);
997	} else if (drbr_needs_enqueue(ifp, txr->br)) {
998		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
999			return (err);
1000		next = drbr_dequeue(ifp, txr->br);
1001	} else
1002		next = m;
1003
1004	/* Process the queue */
1005	while (next != NULL) {
1006		if ((err = igb_xmit(txr, &next)) != 0) {
1007			if (next != NULL)
1008				err = drbr_enqueue(ifp, txr->br, next);
1009			break;
1010		}
1011		enq++;
1012		ifp->if_obytes += next->m_pkthdr.len;
1013		if (next->m_flags & M_MCAST)
1014			ifp->if_omcasts++;
1015		ETHER_BPF_MTAP(ifp, next);
1016		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1017			break;
1018		next = drbr_dequeue(ifp, txr->br);
1019	}
1020	if (enq > 0) {
1021		/* Set the watchdog */
1022		txr->queue_status |= IGB_QUEUE_WORKING;
1023		txr->watchdog_time = ticks;
1024	}
1025	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1026		igb_txeof(txr);
1027	if (txr->tx_avail <= IGB_MAX_SCATTER)
1028		txr->queue_status |= IGB_QUEUE_DEPLETED;
1029	return (err);
1030}
1031
1032/*
1033 * Called from a taskqueue to drain queued transmit packets.
1034 */
1035static void
1036igb_deferred_mq_start(void *arg, int pending)
1037{
1038	struct tx_ring *txr = arg;
1039	struct adapter *adapter = txr->adapter;
1040	struct ifnet *ifp = adapter->ifp;
1041
1042	IGB_TX_LOCK(txr);
1043	if (!drbr_empty(ifp, txr->br))
1044		igb_mq_start_locked(ifp, txr, NULL);
1045	IGB_TX_UNLOCK(txr);
1046}
1047
1048/*
1049** Flush all ring buffers
1050*/
1051static void
1052igb_qflush(struct ifnet *ifp)
1053{
1054	struct adapter	*adapter = ifp->if_softc;
1055	struct tx_ring	*txr = adapter->tx_rings;
1056	struct mbuf	*m;
1057
1058	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1059		IGB_TX_LOCK(txr);
1060		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1061			m_freem(m);
1062		IGB_TX_UNLOCK(txr);
1063	}
1064	if_qflush(ifp);
1065}
1066#endif /* __FreeBSD_version >= 800000 */
1067
1068/*********************************************************************
1069 *  Ioctl entry point
1070 *
1071 *  igb_ioctl is called when the user wants to configure the
1072 *  interface.
1073 *
1074 *  return 0 on success, positive on failure
1075 **********************************************************************/
1076
1077static int
1078igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1079{
1080	struct adapter	*adapter = ifp->if_softc;
1081	struct ifreq	*ifr = (struct ifreq *)data;
1082#if defined(INET) || defined(INET6)
1083	struct ifaddr	*ifa = (struct ifaddr *)data;
1084#endif
1085	bool		avoid_reset = FALSE;
1086	int		error = 0;
1087
1088	if (adapter->in_detach)
1089		return (error);
1090
1091	switch (command) {
1092	case SIOCSIFADDR:
1093#ifdef INET
1094		if (ifa->ifa_addr->sa_family == AF_INET)
1095			avoid_reset = TRUE;
1096#endif
1097#ifdef INET6
1098		if (ifa->ifa_addr->sa_family == AF_INET6)
1099			avoid_reset = TRUE;
1100#endif
1101		/*
1102		** Calling init results in link renegotiation,
1103		** so we avoid doing it when possible.
1104		*/
1105		if (avoid_reset) {
1106			ifp->if_flags |= IFF_UP;
1107			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1108				igb_init(adapter);
1109#ifdef INET
1110			if (!(ifp->if_flags & IFF_NOARP))
1111				arp_ifinit(ifp, ifa);
1112#endif
1113		} else
1114			error = ether_ioctl(ifp, command, data);
1115		break;
1116	case SIOCSIFMTU:
1117	    {
1118		int max_frame_size;
1119
1120		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1121
1122		IGB_CORE_LOCK(adapter);
1123		max_frame_size = 9234;
1124		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1125		    ETHER_CRC_LEN) {
1126			IGB_CORE_UNLOCK(adapter);
1127			error = EINVAL;
1128			break;
1129		}
1130
1131		ifp->if_mtu = ifr->ifr_mtu;
1132		adapter->max_frame_size =
1133		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1134		igb_init_locked(adapter);
1135		IGB_CORE_UNLOCK(adapter);
1136		break;
1137	    }
1138	case SIOCSIFFLAGS:
1139		IOCTL_DEBUGOUT("ioctl rcv'd:\
1140		    SIOCSIFFLAGS (Set Interface Flags)");
1141		IGB_CORE_LOCK(adapter);
1142		if (ifp->if_flags & IFF_UP) {
1143			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1144				if ((ifp->if_flags ^ adapter->if_flags) &
1145				    (IFF_PROMISC | IFF_ALLMULTI)) {
1146					igb_disable_promisc(adapter);
1147					igb_set_promisc(adapter);
1148				}
1149			} else
1150				igb_init_locked(adapter);
1151		} else
1152			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1153				igb_stop(adapter);
1154		adapter->if_flags = ifp->if_flags;
1155		IGB_CORE_UNLOCK(adapter);
1156		break;
1157	case SIOCADDMULTI:
1158	case SIOCDELMULTI:
1159		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1160		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1161			IGB_CORE_LOCK(adapter);
1162			igb_disable_intr(adapter);
1163			igb_set_multi(adapter);
1164#ifdef DEVICE_POLLING
1165			if (!(ifp->if_capenable & IFCAP_POLLING))
1166#endif
1167				igb_enable_intr(adapter);
1168			IGB_CORE_UNLOCK(adapter);
1169		}
1170		break;
1171	case SIOCSIFMEDIA:
1172		/* Check SOL/IDER usage */
1173		IGB_CORE_LOCK(adapter);
1174		if (e1000_check_reset_block(&adapter->hw)) {
1175			IGB_CORE_UNLOCK(adapter);
1176			device_printf(adapter->dev, "Media change is"
1177			    " blocked due to SOL/IDER session.\n");
1178			break;
1179		}
1180		IGB_CORE_UNLOCK(adapter);
1181	case SIOCGIFMEDIA:
1182		IOCTL_DEBUGOUT("ioctl rcv'd: \
1183		    SIOCxIFMEDIA (Get/Set Interface Media)");
1184		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185		break;
1186	case SIOCSIFCAP:
1187	    {
1188		int mask, reinit;
1189
1190		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191		reinit = 0;
1192		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193#ifdef DEVICE_POLLING
1194		if (mask & IFCAP_POLLING) {
1195			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196				error = ether_poll_register(igb_poll, ifp);
1197				if (error)
1198					return (error);
1199				IGB_CORE_LOCK(adapter);
1200				igb_disable_intr(adapter);
1201				ifp->if_capenable |= IFCAP_POLLING;
1202				IGB_CORE_UNLOCK(adapter);
1203			} else {
1204				error = ether_poll_deregister(ifp);
1205				/* Enable interrupt even in error case */
1206				IGB_CORE_LOCK(adapter);
1207				igb_enable_intr(adapter);
1208				ifp->if_capenable &= ~IFCAP_POLLING;
1209				IGB_CORE_UNLOCK(adapter);
1210			}
1211		}
1212#endif
1213		if (mask & IFCAP_HWCSUM) {
1214			ifp->if_capenable ^= IFCAP_HWCSUM;
1215			reinit = 1;
1216		}
1217		if (mask & IFCAP_TSO4) {
1218			ifp->if_capenable ^= IFCAP_TSO4;
1219			reinit = 1;
1220		}
1221		if (mask & IFCAP_VLAN_HWTAGGING) {
1222			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223			reinit = 1;
1224		}
1225		if (mask & IFCAP_VLAN_HWFILTER) {
1226			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227			reinit = 1;
1228		}
1229		if (mask & IFCAP_VLAN_HWTSO) {
1230			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231			reinit = 1;
1232		}
1233		if (mask & IFCAP_LRO) {
1234			ifp->if_capenable ^= IFCAP_LRO;
1235			reinit = 1;
1236		}
1237		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1238			igb_init(adapter);
1239		VLAN_CAPABILITIES(ifp);
1240		break;
1241	    }
1242
1243	default:
1244		error = ether_ioctl(ifp, command, data);
1245		break;
1246	}
1247
1248	return (error);
1249}
1250
1251
1252/*********************************************************************
1253 *  Init entry point
1254 *
1255 *  This routine is used in two ways. It is used by the stack as
1256 *  init entry point in network interface structure. It is also used
1257 *  by the driver as a hw/sw initialization routine to get to a
1258 *  consistent state.
1259 *
1260 *  return 0 on success, positive on failure
1261 **********************************************************************/
1262
1263static void
1264igb_init_locked(struct adapter *adapter)
1265{
1266	struct ifnet	*ifp = adapter->ifp;
1267	device_t	dev = adapter->dev;
1268
1269	INIT_DEBUGOUT("igb_init: begin");
1270
1271	IGB_CORE_LOCK_ASSERT(adapter);
1272
1273	igb_disable_intr(adapter);
1274	callout_stop(&adapter->timer);
1275
1276	/* Get the latest mac address, User can use a LAA */
1277        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1278              ETHER_ADDR_LEN);
1279
1280	/* Put the address into the Receive Address Array */
1281	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1282
1283	igb_reset(adapter);
1284	igb_update_link_status(adapter);
1285
1286	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1287
1288	/* Set hardware offload abilities */
1289	ifp->if_hwassist = 0;
1290	if (ifp->if_capenable & IFCAP_TXCSUM) {
1291		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1292#if __FreeBSD_version >= 800000
1293		if (adapter->hw.mac.type == e1000_82576)
1294			ifp->if_hwassist |= CSUM_SCTP;
1295#endif
1296	}
1297
1298	if (ifp->if_capenable & IFCAP_TSO4)
1299		ifp->if_hwassist |= CSUM_TSO;
1300
1301	/* Configure for OS presence */
1302	igb_init_manageability(adapter);
1303
1304	/* Prepare transmit descriptors and buffers */
1305	igb_setup_transmit_structures(adapter);
1306	igb_initialize_transmit_units(adapter);
1307
1308	/* Setup Multicast table */
1309	igb_set_multi(adapter);
1310
1311	/*
1312	** Figure out the desired mbuf pool
1313	** for doing jumbo/packetsplit
1314	*/
1315	if (adapter->max_frame_size <= 2048)
1316		adapter->rx_mbuf_sz = MCLBYTES;
1317	else if (adapter->max_frame_size <= 4096)
1318		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1319	else
1320		adapter->rx_mbuf_sz = MJUM9BYTES;
1321
1322	/* Prepare receive descriptors and buffers */
1323	if (igb_setup_receive_structures(adapter)) {
1324		device_printf(dev, "Could not setup receive structures\n");
1325		return;
1326	}
1327	igb_initialize_receive_units(adapter);
1328
1329        /* Enable VLAN support */
1330	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1331		igb_setup_vlan_hw_support(adapter);
1332
1333	/* Don't lose promiscuous settings */
1334	igb_set_promisc(adapter);
1335
1336	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1340	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342	if (adapter->msix > 1) /* Set up queue routing */
1343		igb_configure_queues(adapter);
1344
1345	/* this clears any pending interrupts */
1346	E1000_READ_REG(&adapter->hw, E1000_ICR);
1347#ifdef DEVICE_POLLING
1348	/*
1349	 * Only enable interrupts if we are not polling, make sure
1350	 * they are off otherwise.
1351	 */
1352	if (ifp->if_capenable & IFCAP_POLLING)
1353		igb_disable_intr(adapter);
1354	else
1355#endif /* DEVICE_POLLING */
1356	{
1357		igb_enable_intr(adapter);
1358		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1359	}
1360
1361	/* Set Energy Efficient Ethernet */
1362	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1363		e1000_set_eee_i350(&adapter->hw);
1364}
1365
1366static void
1367igb_init(void *arg)
1368{
1369	struct adapter *adapter = arg;
1370
1371	IGB_CORE_LOCK(adapter);
1372	igb_init_locked(adapter);
1373	IGB_CORE_UNLOCK(adapter);
1374}
1375
1376
1377static void
1378igb_handle_que(void *context, int pending)
1379{
1380	struct igb_queue *que = context;
1381	struct adapter *adapter = que->adapter;
1382	struct tx_ring *txr = que->txr;
1383	struct ifnet	*ifp = adapter->ifp;
1384
1385	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1386		bool	more;
1387
1388		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1389
1390		IGB_TX_LOCK(txr);
1391		igb_txeof(txr);
1392#if __FreeBSD_version >= 800000
1393		/* Process the stack queue only if not depleted */
1394		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1395		    !drbr_empty(ifp, txr->br))
1396			igb_mq_start_locked(ifp, txr, NULL);
1397#else
1398		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1399			igb_start_locked(txr, ifp);
1400#endif
1401		IGB_TX_UNLOCK(txr);
1402		/* Do we need another? */
1403		if (more) {
1404			taskqueue_enqueue(que->tq, &que->que_task);
1405			return;
1406		}
1407	}
1408
1409#ifdef DEVICE_POLLING
1410	if (ifp->if_capenable & IFCAP_POLLING)
1411		return;
1412#endif
1413	/* Reenable this interrupt */
1414	if (que->eims)
1415		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1416	else
1417		igb_enable_intr(adapter);
1418}
1419
1420/* Deal with link in a sleepable context */
1421static void
1422igb_handle_link(void *context, int pending)
1423{
1424	struct adapter *adapter = context;
1425
1426	IGB_CORE_LOCK(adapter);
1427	igb_handle_link_locked(adapter);
1428	IGB_CORE_UNLOCK(adapter);
1429}
1430
1431static void
1432igb_handle_link_locked(struct adapter *adapter)
1433{
1434	struct tx_ring	*txr = adapter->tx_rings;
1435	struct ifnet *ifp = adapter->ifp;
1436
1437	IGB_CORE_LOCK_ASSERT(adapter);
1438	adapter->hw.mac.get_link_status = 1;
1439	igb_update_link_status(adapter);
1440	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1441		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1442			IGB_TX_LOCK(txr);
1443#if __FreeBSD_version >= 800000
1444			/* Process the stack queue only if not depleted */
1445			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1446			    !drbr_empty(ifp, txr->br))
1447				igb_mq_start_locked(ifp, txr, NULL);
1448#else
1449			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450				igb_start_locked(txr, ifp);
1451#endif
1452			IGB_TX_UNLOCK(txr);
1453		}
1454	}
1455}
1456
1457/*********************************************************************
1458 *
1459 *  MSI/Legacy Deferred
1460 *  Interrupt Service routine
1461 *
1462 *********************************************************************/
1463static int
1464igb_irq_fast(void *arg)
1465{
1466	struct adapter		*adapter = arg;
1467	struct igb_queue	*que = adapter->queues;
1468	u32			reg_icr;
1469
1470
1471	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1472
1473	/* Hot eject?  */
1474	if (reg_icr == 0xffffffff)
1475		return FILTER_STRAY;
1476
1477	/* Definitely not our interrupt.  */
1478	if (reg_icr == 0x0)
1479		return FILTER_STRAY;
1480
1481	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482		return FILTER_STRAY;
1483
1484	/*
1485	 * Mask interrupts until the taskqueue is finished running.  This is
1486	 * cheap, just assume that it is needed.  This also works around the
1487	 * MSI message reordering errata on certain systems.
1488	 */
1489	igb_disable_intr(adapter);
1490	taskqueue_enqueue(que->tq, &que->que_task);
1491
1492	/* Link status change */
1493	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1494		taskqueue_enqueue(que->tq, &adapter->link_task);
1495
1496	if (reg_icr & E1000_ICR_RXO)
1497		adapter->rx_overruns++;
1498	return FILTER_HANDLED;
1499}
1500
1501#ifdef DEVICE_POLLING
1502#if __FreeBSD_version >= 800000
1503#define POLL_RETURN_COUNT(a) (a)
1504static int
1505#else
1506#define POLL_RETURN_COUNT(a)
1507static void
1508#endif
1509igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1510{
1511	struct adapter		*adapter = ifp->if_softc;
1512	struct igb_queue	*que;
1513	struct tx_ring		*txr;
1514	u32			reg_icr, rx_done = 0;
1515	u32			loop = IGB_MAX_LOOP;
1516	bool			more;
1517
1518	IGB_CORE_LOCK(adapter);
1519	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1520		IGB_CORE_UNLOCK(adapter);
1521		return POLL_RETURN_COUNT(rx_done);
1522	}
1523
1524	if (cmd == POLL_AND_CHECK_STATUS) {
1525		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1526		/* Link status change */
1527		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1528			igb_handle_link_locked(adapter);
1529
1530		if (reg_icr & E1000_ICR_RXO)
1531			adapter->rx_overruns++;
1532	}
1533	IGB_CORE_UNLOCK(adapter);
1534
1535	for (int i = 0; i < adapter->num_queues; i++) {
1536		que = &adapter->queues[i];
1537		txr = que->txr;
1538
1539		igb_rxeof(que, count, &rx_done);
1540
1541		IGB_TX_LOCK(txr);
1542		do {
1543			more = igb_txeof(txr);
1544		} while (loop-- && more);
1545#if __FreeBSD_version >= 800000
1546		if (!drbr_empty(ifp, txr->br))
1547			igb_mq_start_locked(ifp, txr, NULL);
1548#else
1549		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1550			igb_start_locked(txr, ifp);
1551#endif
1552		IGB_TX_UNLOCK(txr);
1553	}
1554
1555	return POLL_RETURN_COUNT(rx_done);
1556}
1557#endif /* DEVICE_POLLING */
1558
1559/*********************************************************************
1560 *
1561 *  MSIX Que Interrupt Service routine
1562 *
1563 **********************************************************************/
1564static void
1565igb_msix_que(void *arg)
1566{
1567	struct igb_queue *que = arg;
1568	struct adapter *adapter = que->adapter;
1569	struct ifnet   *ifp = adapter->ifp;
1570	struct tx_ring *txr = que->txr;
1571	struct rx_ring *rxr = que->rxr;
1572	u32		newitr = 0;
1573	bool		more_rx;
1574
1575	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1576	++que->irqs;
1577
1578	IGB_TX_LOCK(txr);
1579	igb_txeof(txr);
1580#if __FreeBSD_version >= 800000
1581	/* Process the stack queue only if not depleted */
1582	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1583	    !drbr_empty(ifp, txr->br))
1584		igb_mq_start_locked(ifp, txr, NULL);
1585#else
1586	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1587		igb_start_locked(txr, ifp);
1588#endif
1589	IGB_TX_UNLOCK(txr);
1590
1591	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1592
1593	if (adapter->enable_aim == FALSE)
1594		goto no_calc;
1595	/*
1596	** Do Adaptive Interrupt Moderation:
1597        **  - Write out last calculated setting
1598	**  - Calculate based on average size over
1599	**    the last interval.
1600	*/
1601        if (que->eitr_setting)
1602                E1000_WRITE_REG(&adapter->hw,
1603                    E1000_EITR(que->msix), que->eitr_setting);
1604
1605        que->eitr_setting = 0;
1606
1607        /* Idle, do nothing */
1608        if ((txr->bytes == 0) && (rxr->bytes == 0))
1609                goto no_calc;
1610
1611        /* Used half Default if sub-gig */
1612        if (adapter->link_speed != 1000)
1613                newitr = IGB_DEFAULT_ITR / 2;
1614        else {
1615		if ((txr->bytes) && (txr->packets))
1616                	newitr = txr->bytes/txr->packets;
1617		if ((rxr->bytes) && (rxr->packets))
1618			newitr = max(newitr,
1619			    (rxr->bytes / rxr->packets));
1620                newitr += 24; /* account for hardware frame, crc */
1621		/* set an upper boundary */
1622		newitr = min(newitr, 3000);
1623		/* Be nice to the mid range */
1624                if ((newitr > 300) && (newitr < 1200))
1625                        newitr = (newitr / 3);
1626                else
1627                        newitr = (newitr / 2);
1628        }
1629        newitr &= 0x7FFC;  /* Mask invalid bits */
1630        if (adapter->hw.mac.type == e1000_82575)
1631                newitr |= newitr << 16;
1632        else
1633                newitr |= E1000_EITR_CNT_IGNR;
1634
1635        /* save for next interrupt */
1636        que->eitr_setting = newitr;
1637
1638        /* Reset state */
1639        txr->bytes = 0;
1640        txr->packets = 0;
1641        rxr->bytes = 0;
1642        rxr->packets = 0;
1643
1644no_calc:
1645	/* Schedule a clean task if needed*/
1646	if (more_rx)
1647		taskqueue_enqueue(que->tq, &que->que_task);
1648	else
1649		/* Reenable this interrupt */
1650		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1651	return;
1652}
1653
1654
1655/*********************************************************************
1656 *
1657 *  MSIX Link Interrupt Service routine
1658 *
1659 **********************************************************************/
1660
1661static void
1662igb_msix_link(void *arg)
1663{
1664	struct adapter	*adapter = arg;
1665	u32       	icr;
1666
1667	++adapter->link_irq;
1668	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1669	if (!(icr & E1000_ICR_LSC))
1670		goto spurious;
1671	igb_handle_link(adapter, 0);
1672
1673spurious:
1674	/* Rearm */
1675	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1676	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1677	return;
1678}
1679
1680
1681/*********************************************************************
1682 *
1683 *  Media Ioctl callback
1684 *
1685 *  This routine is called whenever the user queries the status of
1686 *  the interface using ifconfig.
1687 *
1688 **********************************************************************/
1689static void
1690igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1691{
1692	struct adapter *adapter = ifp->if_softc;
1693	u_char fiber_type = IFM_1000_SX;
1694
1695	INIT_DEBUGOUT("igb_media_status: begin");
1696
1697	IGB_CORE_LOCK(adapter);
1698	igb_update_link_status(adapter);
1699
1700	ifmr->ifm_status = IFM_AVALID;
1701	ifmr->ifm_active = IFM_ETHER;
1702
1703	if (!adapter->link_active) {
1704		IGB_CORE_UNLOCK(adapter);
1705		return;
1706	}
1707
1708	ifmr->ifm_status |= IFM_ACTIVE;
1709
1710	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1711	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1712		ifmr->ifm_active |= fiber_type | IFM_FDX;
1713	else {
1714		switch (adapter->link_speed) {
1715		case 10:
1716			ifmr->ifm_active |= IFM_10_T;
1717			break;
1718		case 100:
1719			ifmr->ifm_active |= IFM_100_TX;
1720			break;
1721		case 1000:
1722			ifmr->ifm_active |= IFM_1000_T;
1723			break;
1724		}
1725		if (adapter->link_duplex == FULL_DUPLEX)
1726			ifmr->ifm_active |= IFM_FDX;
1727		else
1728			ifmr->ifm_active |= IFM_HDX;
1729	}
1730	IGB_CORE_UNLOCK(adapter);
1731}
1732
1733/*********************************************************************
1734 *
1735 *  Media Ioctl callback
1736 *
1737 *  This routine is called when the user changes speed/duplex using
1738 *  media/mediopt option with ifconfig.
1739 *
1740 **********************************************************************/
1741static int
1742igb_media_change(struct ifnet *ifp)
1743{
1744	struct adapter *adapter = ifp->if_softc;
1745	struct ifmedia  *ifm = &adapter->media;
1746
1747	INIT_DEBUGOUT("igb_media_change: begin");
1748
1749	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1750		return (EINVAL);
1751
1752	IGB_CORE_LOCK(adapter);
1753	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1754	case IFM_AUTO:
1755		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1756		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1757		break;
1758	case IFM_1000_LX:
1759	case IFM_1000_SX:
1760	case IFM_1000_T:
1761		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1762		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1763		break;
1764	case IFM_100_TX:
1765		adapter->hw.mac.autoneg = FALSE;
1766		adapter->hw.phy.autoneg_advertised = 0;
1767		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1768			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1769		else
1770			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1771		break;
1772	case IFM_10_T:
1773		adapter->hw.mac.autoneg = FALSE;
1774		adapter->hw.phy.autoneg_advertised = 0;
1775		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1776			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1777		else
1778			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1779		break;
1780	default:
1781		device_printf(adapter->dev, "Unsupported media type\n");
1782	}
1783
1784	igb_init_locked(adapter);
1785	IGB_CORE_UNLOCK(adapter);
1786
1787	return (0);
1788}
1789
1790
1791/*********************************************************************
1792 *
1793 *  This routine maps the mbufs to Advanced TX descriptors.
1794 *
1795 **********************************************************************/
1796static int
1797igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1798{
1799	struct adapter		*adapter = txr->adapter;
1800	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1801	bus_dmamap_t		map;
1802	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1803	union e1000_adv_tx_desc	*txd = NULL;
1804	struct mbuf		*m_head = *m_headp;
1805	struct ether_vlan_header *eh = NULL;
1806	struct ip		*ip = NULL;
1807	struct tcphdr		*th = NULL;
1808	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1809	int			ehdrlen, poff;
1810	int			nsegs, i, first, last = 0;
1811	int			error, do_tso, remap = 1;
1812
1813	/* Set basic descriptor constants */
1814	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1815	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1816	if (m_head->m_flags & M_VLANTAG)
1817		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1818
1819retry:
1820	m_head = *m_headp;
1821	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1822	hdrlen = ehdrlen = poff = 0;
1823
1824	/*
1825	 * Intel recommends entire IP/TCP header length reside in a single
1826	 * buffer. If multiple descriptors are used to describe the IP and
1827	 * TCP header, each descriptor should describe one or more
1828	 * complete headers; descriptors referencing only parts of headers
1829	 * are not supported. If all layer headers are not coalesced into
1830	 * a single buffer, each buffer should not cross a 4KB boundary,
1831	 * or be larger than the maximum read request size.
1832	 * Controller also requires modifing IP/TCP header to make TSO work
1833	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1834	 * IP/TCP header into a single buffer to meet the requirement of
1835	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1836	 * which also has similiar restrictions.
1837	 */
1838	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1839		if (do_tso || (m_head->m_next != NULL &&
1840		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1841			if (M_WRITABLE(*m_headp) == 0) {
1842				m_head = m_dup(*m_headp, M_DONTWAIT);
1843				m_freem(*m_headp);
1844				if (m_head == NULL) {
1845					*m_headp = NULL;
1846					return (ENOBUFS);
1847				}
1848				*m_headp = m_head;
1849			}
1850		}
1851		/*
1852		 * Assume IPv4, we don't have TSO/checksum offload support
1853		 * for IPv6 yet.
1854		 */
1855		ehdrlen = sizeof(struct ether_header);
1856		m_head = m_pullup(m_head, ehdrlen);
1857		if (m_head == NULL) {
1858			*m_headp = NULL;
1859			return (ENOBUFS);
1860		}
1861		eh = mtod(m_head, struct ether_vlan_header *);
1862		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1863			ehdrlen = sizeof(struct ether_vlan_header);
1864			m_head = m_pullup(m_head, ehdrlen);
1865			if (m_head == NULL) {
1866				*m_headp = NULL;
1867				return (ENOBUFS);
1868			}
1869		}
1870		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1871		if (m_head == NULL) {
1872			*m_headp = NULL;
1873			return (ENOBUFS);
1874		}
1875		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1876		poff = ehdrlen + (ip->ip_hl << 2);
1877		if (do_tso) {
1878			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1879			if (m_head == NULL) {
1880				*m_headp = NULL;
1881				return (ENOBUFS);
1882			}
1883			/*
1884			 * The pseudo TCP checksum does not include TCP payload
1885			 * length so driver should recompute the checksum here
1886			 * what hardware expect to see. This is adherence of
1887			 * Microsoft's Large Send specification.
1888			 */
1889			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1890			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1891			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1892			/* Keep track of the full header length */
1893			hdrlen = poff + (th->th_off << 2);
1894		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1895			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1896			if (m_head == NULL) {
1897				*m_headp = NULL;
1898				return (ENOBUFS);
1899			}
1900			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1901			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1902			if (m_head == NULL) {
1903				*m_headp = NULL;
1904				return (ENOBUFS);
1905			}
1906			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1907			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1908		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1909			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1910			if (m_head == NULL) {
1911				*m_headp = NULL;
1912				return (ENOBUFS);
1913			}
1914			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1915		}
1916		*m_headp = m_head;
1917	}
1918
1919	/*
1920	 * Map the packet for DMA
1921	 *
1922	 * Capture the first descriptor index,
1923	 * this descriptor will have the index
1924	 * of the EOP which is the only one that
1925	 * now gets a DONE bit writeback.
1926	 */
1927	first = txr->next_avail_desc;
1928	tx_buffer = &txr->tx_buffers[first];
1929	tx_buffer_mapped = tx_buffer;
1930	map = tx_buffer->map;
1931
1932	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1933	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1934
1935	/*
1936	 * There are two types of errors we can (try) to handle:
1937	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1938	 *   out of segments.  Defragment the mbuf chain and try again.
1939	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1940	 *   at this point in time.  Defer sending and try again later.
1941	 * All other errors, in particular EINVAL, are fatal and prevent the
1942	 * mbuf chain from ever going through.  Drop it and report error.
1943	 */
1944	if (error == EFBIG && remap) {
1945		struct mbuf *m;
1946
1947		m = m_defrag(*m_headp, M_DONTWAIT);
1948		if (m == NULL) {
1949			adapter->mbuf_defrag_failed++;
1950			m_freem(*m_headp);
1951			*m_headp = NULL;
1952			return (ENOBUFS);
1953		}
1954		*m_headp = m;
1955
1956		/* Try it again, but only once */
1957		remap = 0;
1958		goto retry;
1959	} else if (error == ENOMEM) {
1960		adapter->no_tx_dma_setup++;
1961		return (error);
1962	} else if (error != 0) {
1963		adapter->no_tx_dma_setup++;
1964		m_freem(*m_headp);
1965		*m_headp = NULL;
1966		return (error);
1967	}
1968
1969	/*
1970	** Make sure we don't overrun the ring,
1971	** we need nsegs descriptors and one for
1972	** the context descriptor used for the
1973	** offloads.
1974	*/
1975        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1976                txr->no_desc_avail++;
1977		bus_dmamap_unload(txr->txtag, map);
1978		return (ENOBUFS);
1979        }
1980	m_head = *m_headp;
1981
1982	/* Do hardware assists:
1983         * Set up the context descriptor, used
1984         * when any hardware offload is done.
1985         * This includes CSUM, VLAN, and TSO.
1986         * It will use the first descriptor.
1987         */
1988
1989	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1990		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1991			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1992			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1993			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1994		} else
1995			return (ENXIO);
1996	} else if (igb_tx_ctx_setup(txr, m_head))
1997			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1998
1999	/* Calculate payload length */
2000	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2001	    << E1000_ADVTXD_PAYLEN_SHIFT);
2002
2003	/* 82575 needs the queue index added */
2004	if (adapter->hw.mac.type == e1000_82575)
2005		olinfo_status |= txr->me << 4;
2006
2007	/* Set up our transmit descriptors */
2008	i = txr->next_avail_desc;
2009	for (int j = 0; j < nsegs; j++) {
2010		bus_size_t seg_len;
2011		bus_addr_t seg_addr;
2012
2013		tx_buffer = &txr->tx_buffers[i];
2014		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2015		seg_addr = segs[j].ds_addr;
2016		seg_len  = segs[j].ds_len;
2017
2018		txd->read.buffer_addr = htole64(seg_addr);
2019		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2020		txd->read.olinfo_status = htole32(olinfo_status);
2021		last = i;
2022		if (++i == adapter->num_tx_desc)
2023			i = 0;
2024		tx_buffer->m_head = NULL;
2025		tx_buffer->next_eop = -1;
2026	}
2027
2028	txr->next_avail_desc = i;
2029	txr->tx_avail -= nsegs;
2030        tx_buffer->m_head = m_head;
2031
2032	/*
2033	** Here we swap the map so the last descriptor,
2034	** which gets the completion interrupt has the
2035	** real map, and the first descriptor gets the
2036	** unused map from this descriptor.
2037	*/
2038	tx_buffer_mapped->map = tx_buffer->map;
2039	tx_buffer->map = map;
2040        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2041
2042        /*
2043         * Last Descriptor of Packet
2044	 * needs End Of Packet (EOP)
2045	 * and Report Status (RS)
2046         */
2047        txd->read.cmd_type_len |=
2048	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2049	/*
2050	 * Keep track in the first buffer which
2051	 * descriptor will be written back
2052	 */
2053	tx_buffer = &txr->tx_buffers[first];
2054	tx_buffer->next_eop = last;
2055	/* Update the watchdog time early and often */
2056	txr->watchdog_time = ticks;
2057
2058	/*
2059	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2060	 * that this frame is available to transmit.
2061	 */
2062	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2063	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2064	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2065	++txr->tx_packets;
2066
2067	return (0);
2068}
2069static void
2070igb_set_promisc(struct adapter *adapter)
2071{
2072	struct ifnet	*ifp = adapter->ifp;
2073	struct e1000_hw *hw = &adapter->hw;
2074	u32		reg;
2075
2076	if (adapter->vf_ifp) {
2077		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2078		return;
2079	}
2080
2081	reg = E1000_READ_REG(hw, E1000_RCTL);
2082	if (ifp->if_flags & IFF_PROMISC) {
2083		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2084		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2085	} else if (ifp->if_flags & IFF_ALLMULTI) {
2086		reg |= E1000_RCTL_MPE;
2087		reg &= ~E1000_RCTL_UPE;
2088		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2089	}
2090}
2091
2092static void
2093igb_disable_promisc(struct adapter *adapter)
2094{
2095	struct e1000_hw *hw = &adapter->hw;
2096	u32		reg;
2097
2098	if (adapter->vf_ifp) {
2099		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2100		return;
2101	}
2102	reg = E1000_READ_REG(hw, E1000_RCTL);
2103	reg &=  (~E1000_RCTL_UPE);
2104	reg &=  (~E1000_RCTL_MPE);
2105	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2106}
2107
2108
2109/*********************************************************************
2110 *  Multicast Update
2111 *
2112 *  This routine is called whenever multicast address list is updated.
2113 *
2114 **********************************************************************/
2115
2116static void
2117igb_set_multi(struct adapter *adapter)
2118{
2119	struct ifnet	*ifp = adapter->ifp;
2120	struct ifmultiaddr *ifma;
2121	u32 reg_rctl = 0;
2122	u8  *mta;
2123
2124	int mcnt = 0;
2125
2126	IOCTL_DEBUGOUT("igb_set_multi: begin");
2127
2128	mta = adapter->mta;
2129	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2130	    MAX_NUM_MULTICAST_ADDRESSES);
2131
2132#if __FreeBSD_version < 800000
2133	IF_ADDR_LOCK(ifp);
2134#else
2135	if_maddr_rlock(ifp);
2136#endif
2137	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2138		if (ifma->ifma_addr->sa_family != AF_LINK)
2139			continue;
2140
2141		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2142			break;
2143
2144		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2145		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2146		mcnt++;
2147	}
2148#if __FreeBSD_version < 800000
2149	IF_ADDR_UNLOCK(ifp);
2150#else
2151	if_maddr_runlock(ifp);
2152#endif
2153
2154	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2155		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2156		reg_rctl |= E1000_RCTL_MPE;
2157		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2158	} else
2159		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2160}
2161
2162
2163/*********************************************************************
2164 *  Timer routine:
2165 *  	This routine checks for link status,
2166 *	updates statistics, and does the watchdog.
2167 *
2168 **********************************************************************/
2169
2170static void
2171igb_local_timer(void *arg)
2172{
2173	struct adapter		*adapter = arg;
2174	device_t		dev = adapter->dev;
2175	struct ifnet		*ifp = adapter->ifp;
2176	struct tx_ring		*txr = adapter->tx_rings;
2177	struct igb_queue	*que = adapter->queues;
2178	int			hung = 0, busy = 0;
2179
2180
2181	IGB_CORE_LOCK_ASSERT(adapter);
2182
2183	igb_update_link_status(adapter);
2184	igb_update_stats_counters(adapter);
2185
2186        /*
2187        ** Check the TX queues status
2188	**	- central locked handling of OACTIVE
2189	**	- watchdog only if all queues show hung
2190        */
2191	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2192		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2193		    (adapter->pause_frames == 0))
2194			++hung;
2195		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2196			++busy;
2197		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2198			taskqueue_enqueue(que->tq, &que->que_task);
2199	}
2200	if (hung == adapter->num_queues)
2201		goto timeout;
2202	if (busy == adapter->num_queues)
2203		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2204	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2205	    (busy < adapter->num_queues))
2206		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2207
2208	adapter->pause_frames = 0;
2209	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2210#ifndef DEVICE_POLLING
2211	/* Schedule all queue interrupts - deadlock protection */
2212	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2213#endif
2214	return;
2215
2216timeout:
2217	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2218	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2219            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2220            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2221	device_printf(dev,"TX(%d) desc avail = %d,"
2222            "Next TX to Clean = %d\n",
2223            txr->me, txr->tx_avail, txr->next_to_clean);
2224	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2225	adapter->watchdog_events++;
2226	igb_init_locked(adapter);
2227}
2228
2229static void
2230igb_update_link_status(struct adapter *adapter)
2231{
2232	struct e1000_hw *hw = &adapter->hw;
2233	struct ifnet *ifp = adapter->ifp;
2234	device_t dev = adapter->dev;
2235	struct tx_ring *txr = adapter->tx_rings;
2236	u32 link_check, thstat, ctrl;
2237
2238	link_check = thstat = ctrl = 0;
2239
2240	/* Get the cached link value or read for real */
2241        switch (hw->phy.media_type) {
2242        case e1000_media_type_copper:
2243                if (hw->mac.get_link_status) {
2244			/* Do the work to read phy */
2245                        e1000_check_for_link(hw);
2246                        link_check = !hw->mac.get_link_status;
2247                } else
2248                        link_check = TRUE;
2249                break;
2250        case e1000_media_type_fiber:
2251                e1000_check_for_link(hw);
2252                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2253                                 E1000_STATUS_LU);
2254                break;
2255        case e1000_media_type_internal_serdes:
2256                e1000_check_for_link(hw);
2257                link_check = adapter->hw.mac.serdes_has_link;
2258                break;
2259	/* VF device is type_unknown */
2260        case e1000_media_type_unknown:
2261                e1000_check_for_link(hw);
2262		link_check = !hw->mac.get_link_status;
2263		/* Fall thru */
2264        default:
2265                break;
2266        }
2267
2268	/* Check for thermal downshift or shutdown */
2269	if (hw->mac.type == e1000_i350) {
2270		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2271		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2272	}
2273
2274	/* Now we check if a transition has happened */
2275	if (link_check && (adapter->link_active == 0)) {
2276		e1000_get_speed_and_duplex(&adapter->hw,
2277		    &adapter->link_speed, &adapter->link_duplex);
2278		if (bootverbose)
2279			device_printf(dev, "Link is up %d Mbps %s\n",
2280			    adapter->link_speed,
2281			    ((adapter->link_duplex == FULL_DUPLEX) ?
2282			    "Full Duplex" : "Half Duplex"));
2283		adapter->link_active = 1;
2284		ifp->if_baudrate = adapter->link_speed * 1000000;
2285		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2286		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2287			device_printf(dev, "Link: thermal downshift\n");
2288		/* This can sleep */
2289		if_link_state_change(ifp, LINK_STATE_UP);
2290	} else if (!link_check && (adapter->link_active == 1)) {
2291		ifp->if_baudrate = adapter->link_speed = 0;
2292		adapter->link_duplex = 0;
2293		if (bootverbose)
2294			device_printf(dev, "Link is Down\n");
2295		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2296		    (thstat & E1000_THSTAT_PWR_DOWN))
2297			device_printf(dev, "Link: thermal shutdown\n");
2298		adapter->link_active = 0;
2299		/* This can sleep */
2300		if_link_state_change(ifp, LINK_STATE_DOWN);
2301		/* Reset queue state */
2302		for (int i = 0; i < adapter->num_queues; i++, txr++)
2303			txr->queue_status = IGB_QUEUE_IDLE;
2304	}
2305}
2306
2307/*********************************************************************
2308 *
2309 *  This routine disables all traffic on the adapter by issuing a
2310 *  global reset on the MAC and deallocates TX/RX buffers.
2311 *
2312 **********************************************************************/
2313
2314static void
2315igb_stop(void *arg)
2316{
2317	struct adapter	*adapter = arg;
2318	struct ifnet	*ifp = adapter->ifp;
2319	struct tx_ring *txr = adapter->tx_rings;
2320
2321	IGB_CORE_LOCK_ASSERT(adapter);
2322
2323	INIT_DEBUGOUT("igb_stop: begin");
2324
2325	igb_disable_intr(adapter);
2326
2327	callout_stop(&adapter->timer);
2328
2329	/* Tell the stack that the interface is no longer active */
2330	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2331	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2332
2333	/* Disarm watchdog timer. */
2334	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2335		IGB_TX_LOCK(txr);
2336		txr->queue_status = IGB_QUEUE_IDLE;
2337		IGB_TX_UNLOCK(txr);
2338	}
2339
2340	e1000_reset_hw(&adapter->hw);
2341	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2342
2343	e1000_led_off(&adapter->hw);
2344	e1000_cleanup_led(&adapter->hw);
2345}
2346
2347
2348/*********************************************************************
2349 *
2350 *  Determine hardware revision.
2351 *
2352 **********************************************************************/
2353static void
2354igb_identify_hardware(struct adapter *adapter)
2355{
2356	device_t dev = adapter->dev;
2357
2358	/* Make sure our PCI config space has the necessary stuff set */
2359	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2360	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2361	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2362		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2363		    "bits were not set!\n");
2364		adapter->hw.bus.pci_cmd_word |=
2365		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2366		pci_write_config(dev, PCIR_COMMAND,
2367		    adapter->hw.bus.pci_cmd_word, 2);
2368	}
2369
2370	/* Save off the information about this board */
2371	adapter->hw.vendor_id = pci_get_vendor(dev);
2372	adapter->hw.device_id = pci_get_device(dev);
2373	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2374	adapter->hw.subsystem_vendor_id =
2375	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2376	adapter->hw.subsystem_device_id =
2377	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2378
2379	/* Set MAC type early for PCI setup */
2380	e1000_set_mac_type(&adapter->hw);
2381
2382	/* Are we a VF device? */
2383	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2384	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2385		adapter->vf_ifp = 1;
2386	else
2387		adapter->vf_ifp = 0;
2388}
2389
2390static int
2391igb_allocate_pci_resources(struct adapter *adapter)
2392{
2393	device_t	dev = adapter->dev;
2394	int		rid;
2395
2396	rid = PCIR_BAR(0);
2397	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2398	    &rid, RF_ACTIVE);
2399	if (adapter->pci_mem == NULL) {
2400		device_printf(dev, "Unable to allocate bus resource: memory\n");
2401		return (ENXIO);
2402	}
2403	adapter->osdep.mem_bus_space_tag =
2404	    rman_get_bustag(adapter->pci_mem);
2405	adapter->osdep.mem_bus_space_handle =
2406	    rman_get_bushandle(adapter->pci_mem);
2407	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2408
2409	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2410
2411	/* This will setup either MSI/X or MSI */
2412	adapter->msix = igb_setup_msix(adapter);
2413	adapter->hw.back = &adapter->osdep;
2414
2415	return (0);
2416}
2417
2418/*********************************************************************
2419 *
2420 *  Setup the Legacy or MSI Interrupt handler
2421 *
2422 **********************************************************************/
2423static int
2424igb_allocate_legacy(struct adapter *adapter)
2425{
2426	device_t		dev = adapter->dev;
2427	struct igb_queue	*que = adapter->queues;
2428	struct tx_ring		*txr = adapter->tx_rings;
2429	int			error, rid = 0;
2430
2431	/* Turn off all interrupts */
2432	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2433
2434	/* MSI RID is 1 */
2435	if (adapter->msix == 1)
2436		rid = 1;
2437
2438	/* We allocate a single interrupt resource */
2439	adapter->res = bus_alloc_resource_any(dev,
2440	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2441	if (adapter->res == NULL) {
2442		device_printf(dev, "Unable to allocate bus resource: "
2443		    "interrupt\n");
2444		return (ENXIO);
2445	}
2446
2447#if __FreeBSD_version >= 800000
2448	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2449#endif
2450
2451	/*
2452	 * Try allocating a fast interrupt and the associated deferred
2453	 * processing contexts.
2454	 */
2455	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2456	/* Make tasklet for deferred link handling */
2457	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2458	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2459	    taskqueue_thread_enqueue, &que->tq);
2460	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2461	    device_get_nameunit(adapter->dev));
2462	if ((error = bus_setup_intr(dev, adapter->res,
2463	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2464	    adapter, &adapter->tag)) != 0) {
2465		device_printf(dev, "Failed to register fast interrupt "
2466			    "handler: %d\n", error);
2467		taskqueue_free(que->tq);
2468		que->tq = NULL;
2469		return (error);
2470	}
2471
2472	return (0);
2473}
2474
2475
2476/*********************************************************************
2477 *
2478 *  Setup the MSIX Queue Interrupt handlers:
2479 *
2480 **********************************************************************/
2481static int
2482igb_allocate_msix(struct adapter *adapter)
2483{
2484	device_t		dev = adapter->dev;
2485	struct igb_queue	*que = adapter->queues;
2486	int			error, rid, vector = 0;
2487
2488	/* Be sure to start with all interrupts disabled */
2489	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2490	E1000_WRITE_FLUSH(&adapter->hw);
2491
2492	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2493		rid = vector +1;
2494		que->res = bus_alloc_resource_any(dev,
2495		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2496		if (que->res == NULL) {
2497			device_printf(dev,
2498			    "Unable to allocate bus resource: "
2499			    "MSIX Queue Interrupt\n");
2500			return (ENXIO);
2501		}
2502		error = bus_setup_intr(dev, que->res,
2503	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2504		    igb_msix_que, que, &que->tag);
2505		if (error) {
2506			que->res = NULL;
2507			device_printf(dev, "Failed to register Queue handler");
2508			return (error);
2509		}
2510#if __FreeBSD_version >= 800504
2511		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2512#endif
2513		que->msix = vector;
2514		if (adapter->hw.mac.type == e1000_82575)
2515			que->eims = E1000_EICR_TX_QUEUE0 << i;
2516		else
2517			que->eims = 1 << vector;
2518		/*
2519		** Bind the msix vector, and thus the
2520		** rings to the corresponding cpu.
2521		*/
2522		if (adapter->num_queues > 1) {
2523			if (igb_last_bind_cpu < 0)
2524				igb_last_bind_cpu = CPU_FIRST();
2525			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2526			device_printf(dev,
2527				"Bound queue %d to cpu %d\n",
2528				i,igb_last_bind_cpu);
2529			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2530		}
2531#if __FreeBSD_version >= 800000
2532		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2533		    que->txr);
2534#endif
2535		/* Make tasklet for deferred handling */
2536		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2537		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2538		    taskqueue_thread_enqueue, &que->tq);
2539		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2540		    device_get_nameunit(adapter->dev));
2541	}
2542
2543	/* And Link */
2544	rid = vector + 1;
2545	adapter->res = bus_alloc_resource_any(dev,
2546	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2547	if (adapter->res == NULL) {
2548		device_printf(dev,
2549		    "Unable to allocate bus resource: "
2550		    "MSIX Link Interrupt\n");
2551		return (ENXIO);
2552	}
2553	if ((error = bus_setup_intr(dev, adapter->res,
2554	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2555	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2556		device_printf(dev, "Failed to register Link handler");
2557		return (error);
2558	}
2559#if __FreeBSD_version >= 800504
2560	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2561#endif
2562	adapter->linkvec = vector;
2563
2564	return (0);
2565}
2566
2567
2568static void
2569igb_configure_queues(struct adapter *adapter)
2570{
2571	struct	e1000_hw	*hw = &adapter->hw;
2572	struct	igb_queue	*que;
2573	u32			tmp, ivar = 0, newitr = 0;
2574
2575	/* First turn on RSS capability */
2576	if (adapter->hw.mac.type != e1000_82575)
2577		E1000_WRITE_REG(hw, E1000_GPIE,
2578		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2579		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2580
2581	/* Turn on MSIX */
2582	switch (adapter->hw.mac.type) {
2583	case e1000_82580:
2584	case e1000_i350:
2585	case e1000_i210:
2586	case e1000_i211:
2587	case e1000_vfadapt:
2588	case e1000_vfadapt_i350:
2589		/* RX entries */
2590		for (int i = 0; i < adapter->num_queues; i++) {
2591			u32 index = i >> 1;
2592			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2593			que = &adapter->queues[i];
2594			if (i & 1) {
2595				ivar &= 0xFF00FFFF;
2596				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2597			} else {
2598				ivar &= 0xFFFFFF00;
2599				ivar |= que->msix | E1000_IVAR_VALID;
2600			}
2601			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2602		}
2603		/* TX entries */
2604		for (int i = 0; i < adapter->num_queues; i++) {
2605			u32 index = i >> 1;
2606			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2607			que = &adapter->queues[i];
2608			if (i & 1) {
2609				ivar &= 0x00FFFFFF;
2610				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2611			} else {
2612				ivar &= 0xFFFF00FF;
2613				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2614			}
2615			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2616			adapter->que_mask |= que->eims;
2617		}
2618
2619		/* And for the link interrupt */
2620		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2621		adapter->link_mask = 1 << adapter->linkvec;
2622		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2623		break;
2624	case e1000_82576:
2625		/* RX entries */
2626		for (int i = 0; i < adapter->num_queues; i++) {
2627			u32 index = i & 0x7; /* Each IVAR has two entries */
2628			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2629			que = &adapter->queues[i];
2630			if (i < 8) {
2631				ivar &= 0xFFFFFF00;
2632				ivar |= que->msix | E1000_IVAR_VALID;
2633			} else {
2634				ivar &= 0xFF00FFFF;
2635				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2636			}
2637			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2638			adapter->que_mask |= que->eims;
2639		}
2640		/* TX entries */
2641		for (int i = 0; i < adapter->num_queues; i++) {
2642			u32 index = i & 0x7; /* Each IVAR has two entries */
2643			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2644			que = &adapter->queues[i];
2645			if (i < 8) {
2646				ivar &= 0xFFFF00FF;
2647				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2648			} else {
2649				ivar &= 0x00FFFFFF;
2650				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2651			}
2652			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2653			adapter->que_mask |= que->eims;
2654		}
2655
2656		/* And for the link interrupt */
2657		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2658		adapter->link_mask = 1 << adapter->linkvec;
2659		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2660		break;
2661
2662	case e1000_82575:
2663                /* enable MSI-X support*/
2664		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2665                tmp |= E1000_CTRL_EXT_PBA_CLR;
2666                /* Auto-Mask interrupts upon ICR read. */
2667                tmp |= E1000_CTRL_EXT_EIAME;
2668                tmp |= E1000_CTRL_EXT_IRCA;
2669                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2670
2671		/* Queues */
2672		for (int i = 0; i < adapter->num_queues; i++) {
2673			que = &adapter->queues[i];
2674			tmp = E1000_EICR_RX_QUEUE0 << i;
2675			tmp |= E1000_EICR_TX_QUEUE0 << i;
2676			que->eims = tmp;
2677			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2678			    i, que->eims);
2679			adapter->que_mask |= que->eims;
2680		}
2681
2682		/* Link */
2683		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2684		    E1000_EIMS_OTHER);
2685		adapter->link_mask |= E1000_EIMS_OTHER;
2686	default:
2687		break;
2688	}
2689
2690	/* Set the starting interrupt rate */
2691	if (igb_max_interrupt_rate > 0)
2692		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2693
2694        if (hw->mac.type == e1000_82575)
2695                newitr |= newitr << 16;
2696        else
2697                newitr |= E1000_EITR_CNT_IGNR;
2698
2699	for (int i = 0; i < adapter->num_queues; i++) {
2700		que = &adapter->queues[i];
2701		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2702	}
2703
2704	return;
2705}
2706
2707
2708static void
2709igb_free_pci_resources(struct adapter *adapter)
2710{
2711	struct		igb_queue *que = adapter->queues;
2712	device_t	dev = adapter->dev;
2713	int		rid;
2714
2715	/*
2716	** There is a slight possibility of a failure mode
2717	** in attach that will result in entering this function
2718	** before interrupt resources have been initialized, and
2719	** in that case we do not want to execute the loops below
2720	** We can detect this reliably by the state of the adapter
2721	** res pointer.
2722	*/
2723	if (adapter->res == NULL)
2724		goto mem;
2725
2726	/*
2727	 * First release all the interrupt resources:
2728	 */
2729	for (int i = 0; i < adapter->num_queues; i++, que++) {
2730		rid = que->msix + 1;
2731		if (que->tag != NULL) {
2732			bus_teardown_intr(dev, que->res, que->tag);
2733			que->tag = NULL;
2734		}
2735		if (que->res != NULL)
2736			bus_release_resource(dev,
2737			    SYS_RES_IRQ, rid, que->res);
2738	}
2739
2740	/* Clean the Legacy or Link interrupt last */
2741	if (adapter->linkvec) /* we are doing MSIX */
2742		rid = adapter->linkvec + 1;
2743	else
2744		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2745
2746	que = adapter->queues;
2747	if (adapter->tag != NULL) {
2748		taskqueue_drain(que->tq, &adapter->link_task);
2749		bus_teardown_intr(dev, adapter->res, adapter->tag);
2750		adapter->tag = NULL;
2751	}
2752	if (adapter->res != NULL)
2753		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2754
2755	for (int i = 0; i < adapter->num_queues; i++, que++) {
2756		if (que->tq != NULL) {
2757#if __FreeBSD_version >= 800000
2758			taskqueue_drain(que->tq, &que->txr->txq_task);
2759#endif
2760			taskqueue_drain(que->tq, &que->que_task);
2761			taskqueue_free(que->tq);
2762		}
2763	}
2764mem:
2765	if (adapter->msix)
2766		pci_release_msi(dev);
2767
2768	if (adapter->msix_mem != NULL)
2769		bus_release_resource(dev, SYS_RES_MEMORY,
2770		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2771
2772	if (adapter->pci_mem != NULL)
2773		bus_release_resource(dev, SYS_RES_MEMORY,
2774		    PCIR_BAR(0), adapter->pci_mem);
2775
2776}
2777
2778/*
2779 * Setup Either MSI/X or MSI
2780 */
2781static int
2782igb_setup_msix(struct adapter *adapter)
2783{
2784	device_t dev = adapter->dev;
2785	int rid, want, queues, msgs, maxqueues;
2786
2787	/* tuneable override */
2788	if (igb_enable_msix == 0)
2789		goto msi;
2790
2791	/* First try MSI/X */
2792	rid = PCIR_BAR(IGB_MSIX_BAR);
2793	adapter->msix_mem = bus_alloc_resource_any(dev,
2794	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2795       	if (!adapter->msix_mem) {
2796		/* May not be enabled */
2797		device_printf(adapter->dev,
2798		    "Unable to map MSIX table \n");
2799		goto msi;
2800	}
2801
2802	msgs = pci_msix_count(dev);
2803	if (msgs == 0) { /* system has msix disabled */
2804		bus_release_resource(dev, SYS_RES_MEMORY,
2805		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2806		adapter->msix_mem = NULL;
2807		goto msi;
2808	}
2809
2810	/* Figure out a reasonable auto config value */
2811	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2812
2813	/* Manual override */
2814	if (igb_num_queues != 0)
2815		queues = igb_num_queues;
2816
2817	/* Sanity check based on HW */
2818	switch (adapter->hw.mac.type) {
2819		case e1000_82575:
2820			maxqueues = 4;
2821			break;
2822		case e1000_82576:
2823		case e1000_82580:
2824		case e1000_i350:
2825			maxqueues = 8;
2826			break;
2827		case e1000_i210:
2828			maxqueues = 4;
2829			break;
2830		case e1000_i211:
2831			maxqueues = 2;
2832			break;
2833		default:  /* VF interfaces */
2834			maxqueues = 1;
2835			break;
2836	}
2837	if (queues > maxqueues)
2838		queues = maxqueues;
2839
2840	/*
2841	** One vector (RX/TX pair) per queue
2842	** plus an additional for Link interrupt
2843	*/
2844	want = queues + 1;
2845	if (msgs >= want)
2846		msgs = want;
2847	else {
2848               	device_printf(adapter->dev,
2849		    "MSIX Configuration Problem, "
2850		    "%d vectors configured, but %d queues wanted!\n",
2851		    msgs, want);
2852		return (0);
2853	}
2854	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2855               	device_printf(adapter->dev,
2856		    "Using MSIX interrupts with %d vectors\n", msgs);
2857		adapter->num_queues = queues;
2858		return (msgs);
2859	}
2860msi:
2861       	msgs = pci_msi_count(dev);
2862	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2863		device_printf(adapter->dev," Using MSI interrupt\n");
2864		return (msgs);
2865	}
2866	return (0);
2867}
2868
2869/*********************************************************************
2870 *
2871 *  Set up an fresh starting state
2872 *
2873 **********************************************************************/
2874static void
2875igb_reset(struct adapter *adapter)
2876{
2877	device_t	dev = adapter->dev;
2878	struct e1000_hw *hw = &adapter->hw;
2879	struct e1000_fc_info *fc = &hw->fc;
2880	struct ifnet	*ifp = adapter->ifp;
2881	u32		pba = 0;
2882	u16		hwm;
2883
2884	INIT_DEBUGOUT("igb_reset: begin");
2885
2886	/* Let the firmware know the OS is in control */
2887	igb_get_hw_control(adapter);
2888
2889	/*
2890	 * Packet Buffer Allocation (PBA)
2891	 * Writing PBA sets the receive portion of the buffer
2892	 * the remainder is used for the transmit buffer.
2893	 */
2894	switch (hw->mac.type) {
2895	case e1000_82575:
2896		pba = E1000_PBA_32K;
2897		break;
2898	case e1000_82576:
2899	case e1000_vfadapt:
2900		pba = E1000_READ_REG(hw, E1000_RXPBS);
2901		pba &= E1000_RXPBS_SIZE_MASK_82576;
2902		break;
2903	case e1000_82580:
2904	case e1000_i350:
2905	case e1000_vfadapt_i350:
2906		pba = E1000_READ_REG(hw, E1000_RXPBS);
2907		pba = e1000_rxpbs_adjust_82580(pba);
2908		break;
2909	case e1000_i210:
2910	case e1000_i211:
2911		pba = E1000_PBA_34K;
2912	default:
2913		break;
2914	}
2915
2916	/* Special needs in case of Jumbo frames */
2917	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2918		u32 tx_space, min_tx, min_rx;
2919		pba = E1000_READ_REG(hw, E1000_PBA);
2920		tx_space = pba >> 16;
2921		pba &= 0xffff;
2922		min_tx = (adapter->max_frame_size +
2923		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2924		min_tx = roundup2(min_tx, 1024);
2925		min_tx >>= 10;
2926                min_rx = adapter->max_frame_size;
2927                min_rx = roundup2(min_rx, 1024);
2928                min_rx >>= 10;
2929		if (tx_space < min_tx &&
2930		    ((min_tx - tx_space) < pba)) {
2931			pba = pba - (min_tx - tx_space);
2932			/*
2933                         * if short on rx space, rx wins
2934                         * and must trump tx adjustment
2935			 */
2936                        if (pba < min_rx)
2937                                pba = min_rx;
2938		}
2939		E1000_WRITE_REG(hw, E1000_PBA, pba);
2940	}
2941
2942	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2943
2944	/*
2945	 * These parameters control the automatic generation (Tx) and
2946	 * response (Rx) to Ethernet PAUSE frames.
2947	 * - High water mark should allow for at least two frames to be
2948	 *   received after sending an XOFF.
2949	 * - Low water mark works best when it is very near the high water mark.
2950	 *   This allows the receiver to restart by sending XON when it has
2951	 *   drained a bit.
2952	 */
2953	hwm = min(((pba << 10) * 9 / 10),
2954	    ((pba << 10) - 2 * adapter->max_frame_size));
2955
2956	if (hw->mac.type < e1000_82576) {
2957		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2958		fc->low_water = fc->high_water - 8;
2959	} else {
2960		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2961		fc->low_water = fc->high_water - 16;
2962	}
2963
2964	fc->pause_time = IGB_FC_PAUSE_TIME;
2965	fc->send_xon = TRUE;
2966	if (adapter->fc)
2967		fc->requested_mode = adapter->fc;
2968	else
2969		fc->requested_mode = e1000_fc_default;
2970
2971	/* Issue a global reset */
2972	e1000_reset_hw(hw);
2973	E1000_WRITE_REG(hw, E1000_WUC, 0);
2974
2975	if (e1000_init_hw(hw) < 0)
2976		device_printf(dev, "Hardware Initialization Failed\n");
2977
2978	/* Setup DMA Coalescing */
2979	if ((hw->mac.type > e1000_82580) &&
2980	    (hw->mac.type != e1000_i211)) {
2981		u32 dmac;
2982		u32 reg = ~E1000_DMACR_DMAC_EN;
2983
2984		if (adapter->dmac == 0) { /* Disabling it */
2985			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2986			goto reset_out;
2987		}
2988
2989		/* Set starting thresholds */
2990		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2991		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2992
2993		hwm = 64 * pba - adapter->max_frame_size / 16;
2994		if (hwm < 64 * (pba - 6))
2995			hwm = 64 * (pba - 6);
2996		reg = E1000_READ_REG(hw, E1000_FCRTC);
2997		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2998		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2999		    & E1000_FCRTC_RTH_COAL_MASK);
3000		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3001
3002
3003		dmac = pba - adapter->max_frame_size / 512;
3004		if (dmac < pba - 10)
3005			dmac = pba - 10;
3006		reg = E1000_READ_REG(hw, E1000_DMACR);
3007		reg &= ~E1000_DMACR_DMACTHR_MASK;
3008		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3009		    & E1000_DMACR_DMACTHR_MASK);
3010		/* transition to L0x or L1 if available..*/
3011		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3012		/* timer = value in adapter->dmac in 32usec intervals */
3013		reg |= (adapter->dmac >> 5);
3014		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3015
3016		/* Set the interval before transition */
3017		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3018		reg |= 0x80000004;
3019		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3020
3021		/* free space in tx packet buffer to wake from DMA coal */
3022		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3023		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3024
3025		/* make low power state decision controlled by DMA coal */
3026		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3027		reg &= ~E1000_PCIEMISC_LX_DECISION;
3028		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3029		device_printf(dev, "DMA Coalescing enabled\n");
3030
3031	} else if (hw->mac.type == e1000_82580) {
3032		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3033		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3034		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3035		    reg & ~E1000_PCIEMISC_LX_DECISION);
3036	}
3037
3038reset_out:
3039	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3040	e1000_get_phy_info(hw);
3041	e1000_check_for_link(hw);
3042	return;
3043}
3044
3045/*********************************************************************
3046 *
3047 *  Setup networking device structure and register an interface.
3048 *
3049 **********************************************************************/
3050static int
3051igb_setup_interface(device_t dev, struct adapter *adapter)
3052{
3053	struct ifnet   *ifp;
3054
3055	INIT_DEBUGOUT("igb_setup_interface: begin");
3056
3057	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3058	if (ifp == NULL) {
3059		device_printf(dev, "can not allocate ifnet structure\n");
3060		return (-1);
3061	}
3062	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3063	ifp->if_init =  igb_init;
3064	ifp->if_softc = adapter;
3065	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3066	ifp->if_ioctl = igb_ioctl;
3067#if __FreeBSD_version >= 800000
3068	ifp->if_transmit = igb_mq_start;
3069	ifp->if_qflush = igb_qflush;
3070#else
3071	ifp->if_start = igb_start;
3072	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3073	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3074	IFQ_SET_READY(&ifp->if_snd);
3075#endif
3076
3077	ether_ifattach(ifp, adapter->hw.mac.addr);
3078
3079	ifp->if_capabilities = ifp->if_capenable = 0;
3080
3081	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3082	ifp->if_capabilities |= IFCAP_TSO4;
3083	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3084	ifp->if_capenable = ifp->if_capabilities;
3085
3086	/* Don't enable LRO by default */
3087	ifp->if_capabilities |= IFCAP_LRO;
3088
3089#ifdef DEVICE_POLLING
3090	ifp->if_capabilities |= IFCAP_POLLING;
3091#endif
3092
3093	/*
3094	 * Tell the upper layer(s) we
3095	 * support full VLAN capability.
3096	 */
3097	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3098	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3099			     |  IFCAP_VLAN_HWTSO
3100			     |  IFCAP_VLAN_MTU;
3101	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3102			  |  IFCAP_VLAN_HWTSO
3103			  |  IFCAP_VLAN_MTU;
3104
3105	/*
3106	** Don't turn this on by default, if vlans are
3107	** created on another pseudo device (eg. lagg)
3108	** then vlan events are not passed thru, breaking
3109	** operation, but with HW FILTER off it works. If
3110	** using vlans directly on the igb driver you can
3111	** enable this and get full hardware tag filtering.
3112	*/
3113	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3114
3115	/*
3116	 * Specify the media types supported by this adapter and register
3117	 * callbacks to update media and link information
3118	 */
3119	ifmedia_init(&adapter->media, IFM_IMASK,
3120	    igb_media_change, igb_media_status);
3121	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3122	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3123		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3124			    0, NULL);
3125		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3126	} else {
3127		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3128		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3129			    0, NULL);
3130		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3131			    0, NULL);
3132		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3133			    0, NULL);
3134		if (adapter->hw.phy.type != e1000_phy_ife) {
3135			ifmedia_add(&adapter->media,
3136				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3137			ifmedia_add(&adapter->media,
3138				IFM_ETHER | IFM_1000_T, 0, NULL);
3139		}
3140	}
3141	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3142	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3143	return (0);
3144}
3145
3146
3147/*
3148 * Manage DMA'able memory.
3149 */
3150static void
3151igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3152{
3153	if (error)
3154		return;
3155	*(bus_addr_t *) arg = segs[0].ds_addr;
3156}
3157
3158static int
3159igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3160        struct igb_dma_alloc *dma, int mapflags)
3161{
3162	int error;
3163
3164	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3165				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3166				BUS_SPACE_MAXADDR,	/* lowaddr */
3167				BUS_SPACE_MAXADDR,	/* highaddr */
3168				NULL, NULL,		/* filter, filterarg */
3169				size,			/* maxsize */
3170				1,			/* nsegments */
3171				size,			/* maxsegsize */
3172				0,			/* flags */
3173				NULL,			/* lockfunc */
3174				NULL,			/* lockarg */
3175				&dma->dma_tag);
3176	if (error) {
3177		device_printf(adapter->dev,
3178		    "%s: bus_dma_tag_create failed: %d\n",
3179		    __func__, error);
3180		goto fail_0;
3181	}
3182
3183	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3184	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3185	if (error) {
3186		device_printf(adapter->dev,
3187		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3188		    __func__, (uintmax_t)size, error);
3189		goto fail_2;
3190	}
3191
3192	dma->dma_paddr = 0;
3193	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3194	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3195	if (error || dma->dma_paddr == 0) {
3196		device_printf(adapter->dev,
3197		    "%s: bus_dmamap_load failed: %d\n",
3198		    __func__, error);
3199		goto fail_3;
3200	}
3201
3202	return (0);
3203
3204fail_3:
3205	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3206fail_2:
3207	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3208	bus_dma_tag_destroy(dma->dma_tag);
3209fail_0:
3210	dma->dma_map = NULL;
3211	dma->dma_tag = NULL;
3212
3213	return (error);
3214}
3215
3216static void
3217igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3218{
3219	if (dma->dma_tag == NULL)
3220		return;
3221	if (dma->dma_map != NULL) {
3222		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3223		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3224		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3225		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3226		dma->dma_map = NULL;
3227	}
3228	bus_dma_tag_destroy(dma->dma_tag);
3229	dma->dma_tag = NULL;
3230}
3231
3232
3233/*********************************************************************
3234 *
3235 *  Allocate memory for the transmit and receive rings, and then
3236 *  the descriptors associated with each, called only once at attach.
3237 *
3238 **********************************************************************/
3239static int
3240igb_allocate_queues(struct adapter *adapter)
3241{
3242	device_t dev = adapter->dev;
3243	struct igb_queue	*que = NULL;
3244	struct tx_ring		*txr = NULL;
3245	struct rx_ring		*rxr = NULL;
3246	int rsize, tsize, error = E1000_SUCCESS;
3247	int txconf = 0, rxconf = 0;
3248
3249	/* First allocate the top level queue structs */
3250	if (!(adapter->queues =
3251	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3252	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3253		device_printf(dev, "Unable to allocate queue memory\n");
3254		error = ENOMEM;
3255		goto fail;
3256	}
3257
3258	/* Next allocate the TX ring struct memory */
3259	if (!(adapter->tx_rings =
3260	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3261	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3262		device_printf(dev, "Unable to allocate TX ring memory\n");
3263		error = ENOMEM;
3264		goto tx_fail;
3265	}
3266
3267	/* Now allocate the RX */
3268	if (!(adapter->rx_rings =
3269	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3270	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3271		device_printf(dev, "Unable to allocate RX ring memory\n");
3272		error = ENOMEM;
3273		goto rx_fail;
3274	}
3275
3276	tsize = roundup2(adapter->num_tx_desc *
3277	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3278	/*
3279	 * Now set up the TX queues, txconf is needed to handle the
3280	 * possibility that things fail midcourse and we need to
3281	 * undo memory gracefully
3282	 */
3283	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3284		/* Set up some basics */
3285		txr = &adapter->tx_rings[i];
3286		txr->adapter = adapter;
3287		txr->me = i;
3288
3289		/* Initialize the TX lock */
3290		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3291		    device_get_nameunit(dev), txr->me);
3292		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3293
3294		if (igb_dma_malloc(adapter, tsize,
3295			&txr->txdma, BUS_DMA_NOWAIT)) {
3296			device_printf(dev,
3297			    "Unable to allocate TX Descriptor memory\n");
3298			error = ENOMEM;
3299			goto err_tx_desc;
3300		}
3301		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3302		bzero((void *)txr->tx_base, tsize);
3303
3304        	/* Now allocate transmit buffers for the ring */
3305        	if (igb_allocate_transmit_buffers(txr)) {
3306			device_printf(dev,
3307			    "Critical Failure setting up transmit buffers\n");
3308			error = ENOMEM;
3309			goto err_tx_desc;
3310        	}
3311#if __FreeBSD_version >= 800000
3312		/* Allocate a buf ring */
3313		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3314		    M_WAITOK, &txr->tx_mtx);
3315#endif
3316	}
3317
3318	/*
3319	 * Next the RX queues...
3320	 */
3321	rsize = roundup2(adapter->num_rx_desc *
3322	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3323	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3324		rxr = &adapter->rx_rings[i];
3325		rxr->adapter = adapter;
3326		rxr->me = i;
3327
3328		/* Initialize the RX lock */
3329		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3330		    device_get_nameunit(dev), txr->me);
3331		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3332
3333		if (igb_dma_malloc(adapter, rsize,
3334			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3335			device_printf(dev,
3336			    "Unable to allocate RxDescriptor memory\n");
3337			error = ENOMEM;
3338			goto err_rx_desc;
3339		}
3340		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3341		bzero((void *)rxr->rx_base, rsize);
3342
3343        	/* Allocate receive buffers for the ring*/
3344		if (igb_allocate_receive_buffers(rxr)) {
3345			device_printf(dev,
3346			    "Critical Failure setting up receive buffers\n");
3347			error = ENOMEM;
3348			goto err_rx_desc;
3349		}
3350	}
3351
3352	/*
3353	** Finally set up the queue holding structs
3354	*/
3355	for (int i = 0; i < adapter->num_queues; i++) {
3356		que = &adapter->queues[i];
3357		que->adapter = adapter;
3358		que->txr = &adapter->tx_rings[i];
3359		que->rxr = &adapter->rx_rings[i];
3360	}
3361
3362	return (0);
3363
3364err_rx_desc:
3365	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3366		igb_dma_free(adapter, &rxr->rxdma);
3367err_tx_desc:
3368	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3369		igb_dma_free(adapter, &txr->txdma);
3370	free(adapter->rx_rings, M_DEVBUF);
3371rx_fail:
3372#if __FreeBSD_version >= 800000
3373	buf_ring_free(txr->br, M_DEVBUF);
3374#endif
3375	free(adapter->tx_rings, M_DEVBUF);
3376tx_fail:
3377	free(adapter->queues, M_DEVBUF);
3378fail:
3379	return (error);
3380}
3381
3382/*********************************************************************
3383 *
3384 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3385 *  the information needed to transmit a packet on the wire. This is
3386 *  called only once at attach, setup is done every reset.
3387 *
3388 **********************************************************************/
3389static int
3390igb_allocate_transmit_buffers(struct tx_ring *txr)
3391{
3392	struct adapter *adapter = txr->adapter;
3393	device_t dev = adapter->dev;
3394	struct igb_tx_buffer *txbuf;
3395	int error, i;
3396
3397	/*
3398	 * Setup DMA descriptor areas.
3399	 */
3400	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3401			       1, 0,			/* alignment, bounds */
3402			       BUS_SPACE_MAXADDR,	/* lowaddr */
3403			       BUS_SPACE_MAXADDR,	/* highaddr */
3404			       NULL, NULL,		/* filter, filterarg */
3405			       IGB_TSO_SIZE,		/* maxsize */
3406			       IGB_MAX_SCATTER,		/* nsegments */
3407			       PAGE_SIZE,		/* maxsegsize */
3408			       0,			/* flags */
3409			       NULL,			/* lockfunc */
3410			       NULL,			/* lockfuncarg */
3411			       &txr->txtag))) {
3412		device_printf(dev,"Unable to allocate TX DMA tag\n");
3413		goto fail;
3414	}
3415
3416	if (!(txr->tx_buffers =
3417	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3418	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3419		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3420		error = ENOMEM;
3421		goto fail;
3422	}
3423
3424        /* Create the descriptor buffer dma maps */
3425	txbuf = txr->tx_buffers;
3426	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3427		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3428		if (error != 0) {
3429			device_printf(dev, "Unable to create TX DMA map\n");
3430			goto fail;
3431		}
3432	}
3433
3434	return 0;
3435fail:
3436	/* We free all, it handles case where we are in the middle */
3437	igb_free_transmit_structures(adapter);
3438	return (error);
3439}
3440
3441/*********************************************************************
3442 *
3443 *  Initialize a transmit ring.
3444 *
3445 **********************************************************************/
3446static void
3447igb_setup_transmit_ring(struct tx_ring *txr)
3448{
3449	struct adapter *adapter = txr->adapter;
3450	struct igb_tx_buffer *txbuf;
3451	int i;
3452#ifdef DEV_NETMAP
3453	struct netmap_adapter *na = NA(adapter->ifp);
3454	struct netmap_slot *slot;
3455#endif /* DEV_NETMAP */
3456
3457	/* Clear the old descriptor contents */
3458	IGB_TX_LOCK(txr);
3459#ifdef DEV_NETMAP
3460	slot = netmap_reset(na, NR_TX, txr->me, 0);
3461#endif /* DEV_NETMAP */
3462	bzero((void *)txr->tx_base,
3463	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3464	/* Reset indices */
3465	txr->next_avail_desc = 0;
3466	txr->next_to_clean = 0;
3467
3468	/* Free any existing tx buffers. */
3469        txbuf = txr->tx_buffers;
3470	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3471		if (txbuf->m_head != NULL) {
3472			bus_dmamap_sync(txr->txtag, txbuf->map,
3473			    BUS_DMASYNC_POSTWRITE);
3474			bus_dmamap_unload(txr->txtag, txbuf->map);
3475			m_freem(txbuf->m_head);
3476			txbuf->m_head = NULL;
3477		}
3478#ifdef DEV_NETMAP
3479		if (slot) {
3480			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3481			/* no need to set the address */
3482			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3483		}
3484#endif /* DEV_NETMAP */
3485		/* clear the watch index */
3486		txbuf->next_eop = -1;
3487        }
3488
3489	/* Set number of descriptors available */
3490	txr->tx_avail = adapter->num_tx_desc;
3491
3492	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3493	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3494	IGB_TX_UNLOCK(txr);
3495}
3496
3497/*********************************************************************
3498 *
3499 *  Initialize all transmit rings.
3500 *
3501 **********************************************************************/
3502static void
3503igb_setup_transmit_structures(struct adapter *adapter)
3504{
3505	struct tx_ring *txr = adapter->tx_rings;
3506
3507	for (int i = 0; i < adapter->num_queues; i++, txr++)
3508		igb_setup_transmit_ring(txr);
3509
3510	return;
3511}
3512
3513/*********************************************************************
3514 *
3515 *  Enable transmit unit.
3516 *
3517 **********************************************************************/
3518static void
3519igb_initialize_transmit_units(struct adapter *adapter)
3520{
3521	struct tx_ring	*txr = adapter->tx_rings;
3522	struct e1000_hw *hw = &adapter->hw;
3523	u32		tctl, txdctl;
3524
3525	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3526	tctl = txdctl = 0;
3527
3528	/* Setup the Tx Descriptor Rings */
3529	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3530		u64 bus_addr = txr->txdma.dma_paddr;
3531
3532		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3533		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3534		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3535		    (uint32_t)(bus_addr >> 32));
3536		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3537		    (uint32_t)bus_addr);
3538
3539		/* Setup the HW Tx Head and Tail descriptor pointers */
3540		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3541		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3542
3543		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3544		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3545		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3546
3547		txr->queue_status = IGB_QUEUE_IDLE;
3548
3549		txdctl |= IGB_TX_PTHRESH;
3550		txdctl |= IGB_TX_HTHRESH << 8;
3551		txdctl |= IGB_TX_WTHRESH << 16;
3552		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3553		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3554	}
3555
3556	if (adapter->vf_ifp)
3557		return;
3558
3559	e1000_config_collision_dist(hw);
3560
3561	/* Program the Transmit Control Register */
3562	tctl = E1000_READ_REG(hw, E1000_TCTL);
3563	tctl &= ~E1000_TCTL_CT;
3564	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3565		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3566
3567	/* This write will effectively turn on the transmit unit. */
3568	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3569}
3570
3571/*********************************************************************
3572 *
3573 *  Free all transmit rings.
3574 *
3575 **********************************************************************/
3576static void
3577igb_free_transmit_structures(struct adapter *adapter)
3578{
3579	struct tx_ring *txr = adapter->tx_rings;
3580
3581	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3582		IGB_TX_LOCK(txr);
3583		igb_free_transmit_buffers(txr);
3584		igb_dma_free(adapter, &txr->txdma);
3585		IGB_TX_UNLOCK(txr);
3586		IGB_TX_LOCK_DESTROY(txr);
3587	}
3588	free(adapter->tx_rings, M_DEVBUF);
3589}
3590
3591/*********************************************************************
3592 *
3593 *  Free transmit ring related data structures.
3594 *
3595 **********************************************************************/
3596static void
3597igb_free_transmit_buffers(struct tx_ring *txr)
3598{
3599	struct adapter *adapter = txr->adapter;
3600	struct igb_tx_buffer *tx_buffer;
3601	int             i;
3602
3603	INIT_DEBUGOUT("free_transmit_ring: begin");
3604
3605	if (txr->tx_buffers == NULL)
3606		return;
3607
3608	tx_buffer = txr->tx_buffers;
3609	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3610		if (tx_buffer->m_head != NULL) {
3611			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3612			    BUS_DMASYNC_POSTWRITE);
3613			bus_dmamap_unload(txr->txtag,
3614			    tx_buffer->map);
3615			m_freem(tx_buffer->m_head);
3616			tx_buffer->m_head = NULL;
3617			if (tx_buffer->map != NULL) {
3618				bus_dmamap_destroy(txr->txtag,
3619				    tx_buffer->map);
3620				tx_buffer->map = NULL;
3621			}
3622		} else if (tx_buffer->map != NULL) {
3623			bus_dmamap_unload(txr->txtag,
3624			    tx_buffer->map);
3625			bus_dmamap_destroy(txr->txtag,
3626			    tx_buffer->map);
3627			tx_buffer->map = NULL;
3628		}
3629	}
3630#if __FreeBSD_version >= 800000
3631	if (txr->br != NULL)
3632		buf_ring_free(txr->br, M_DEVBUF);
3633#endif
3634	if (txr->tx_buffers != NULL) {
3635		free(txr->tx_buffers, M_DEVBUF);
3636		txr->tx_buffers = NULL;
3637	}
3638	if (txr->txtag != NULL) {
3639		bus_dma_tag_destroy(txr->txtag);
3640		txr->txtag = NULL;
3641	}
3642	return;
3643}
3644
3645/**********************************************************************
3646 *
3647 *  Setup work for hardware segmentation offload (TSO)
3648 *
3649 **********************************************************************/
3650static bool
3651igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3652	struct ip *ip, struct tcphdr *th)
3653{
3654	struct adapter *adapter = txr->adapter;
3655	struct e1000_adv_tx_context_desc *TXD;
3656	struct igb_tx_buffer        *tx_buffer;
3657	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3658	u32 mss_l4len_idx = 0;
3659	u16 vtag = 0;
3660	int ctxd, ip_hlen, tcp_hlen;
3661
3662	ctxd = txr->next_avail_desc;
3663	tx_buffer = &txr->tx_buffers[ctxd];
3664	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3665
3666	ip->ip_sum = 0;
3667	ip_hlen = ip->ip_hl << 2;
3668	tcp_hlen = th->th_off << 2;
3669
3670	/* VLAN MACLEN IPLEN */
3671	if (mp->m_flags & M_VLANTAG) {
3672		vtag = htole16(mp->m_pkthdr.ether_vtag);
3673		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3674	}
3675
3676	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3677	vlan_macip_lens |= ip_hlen;
3678	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3679
3680	/* ADV DTYPE TUCMD */
3681	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3682	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3683	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3684	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3685
3686	/* MSS L4LEN IDX */
3687	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3688	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3689	/* 82575 needs the queue index added */
3690	if (adapter->hw.mac.type == e1000_82575)
3691		mss_l4len_idx |= txr->me << 4;
3692	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3693
3694	TXD->seqnum_seed = htole32(0);
3695	tx_buffer->m_head = NULL;
3696	tx_buffer->next_eop = -1;
3697
3698	if (++ctxd == adapter->num_tx_desc)
3699		ctxd = 0;
3700
3701	txr->tx_avail--;
3702	txr->next_avail_desc = ctxd;
3703	return TRUE;
3704}
3705
3706
3707/*********************************************************************
3708 *
3709 *  Context Descriptor setup for VLAN or CSUM
3710 *
3711 **********************************************************************/
3712
3713static bool
3714igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3715{
3716	struct adapter *adapter = txr->adapter;
3717	struct e1000_adv_tx_context_desc *TXD;
3718	struct igb_tx_buffer        *tx_buffer;
3719	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3720	struct ether_vlan_header *eh;
3721	struct ip *ip = NULL;
3722	struct ip6_hdr *ip6;
3723	int  ehdrlen, ctxd, ip_hlen = 0;
3724	u16	etype, vtag = 0;
3725	u8	ipproto = 0;
3726	bool	offload = TRUE;
3727
3728	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3729		offload = FALSE;
3730
3731	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3732	ctxd = txr->next_avail_desc;
3733	tx_buffer = &txr->tx_buffers[ctxd];
3734	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3735
3736	/*
3737	** In advanced descriptors the vlan tag must
3738	** be placed into the context descriptor, thus
3739	** we need to be here just for that setup.
3740	*/
3741	if (mp->m_flags & M_VLANTAG) {
3742		vtag = htole16(mp->m_pkthdr.ether_vtag);
3743		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3744	} else if (offload == FALSE)
3745		return FALSE;
3746
3747	/*
3748	 * Determine where frame payload starts.
3749	 * Jump over vlan headers if already present,
3750	 * helpful for QinQ too.
3751	 */
3752	eh = mtod(mp, struct ether_vlan_header *);
3753	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3754		etype = ntohs(eh->evl_proto);
3755		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3756	} else {
3757		etype = ntohs(eh->evl_encap_proto);
3758		ehdrlen = ETHER_HDR_LEN;
3759	}
3760
3761	/* Set the ether header length */
3762	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3763
3764	switch (etype) {
3765		case ETHERTYPE_IP:
3766			ip = (struct ip *)(mp->m_data + ehdrlen);
3767			ip_hlen = ip->ip_hl << 2;
3768			if (mp->m_len < ehdrlen + ip_hlen) {
3769				offload = FALSE;
3770				break;
3771			}
3772			ipproto = ip->ip_p;
3773			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3774			break;
3775		case ETHERTYPE_IPV6:
3776			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3777			ip_hlen = sizeof(struct ip6_hdr);
3778			ipproto = ip6->ip6_nxt;
3779			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3780			break;
3781		default:
3782			offload = FALSE;
3783			break;
3784	}
3785
3786	vlan_macip_lens |= ip_hlen;
3787	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3788
3789	switch (ipproto) {
3790		case IPPROTO_TCP:
3791			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3792				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3793			break;
3794		case IPPROTO_UDP:
3795			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3796				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3797			break;
3798#if __FreeBSD_version >= 800000
3799		case IPPROTO_SCTP:
3800			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3801				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3802			break;
3803#endif
3804		default:
3805			offload = FALSE;
3806			break;
3807	}
3808
3809	/* 82575 needs the queue index added */
3810	if (adapter->hw.mac.type == e1000_82575)
3811		mss_l4len_idx = txr->me << 4;
3812
3813	/* Now copy bits into descriptor */
3814	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3815	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3816	TXD->seqnum_seed = htole32(0);
3817	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3818
3819	tx_buffer->m_head = NULL;
3820	tx_buffer->next_eop = -1;
3821
3822	/* We've consumed the first desc, adjust counters */
3823	if (++ctxd == adapter->num_tx_desc)
3824		ctxd = 0;
3825	txr->next_avail_desc = ctxd;
3826	--txr->tx_avail;
3827
3828        return (offload);
3829}
3830
3831
3832/**********************************************************************
3833 *
3834 *  Examine each tx_buffer in the used queue. If the hardware is done
3835 *  processing the packet then free associated resources. The
3836 *  tx_buffer is put back on the free queue.
3837 *
3838 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3839 **********************************************************************/
3840static bool
3841igb_txeof(struct tx_ring *txr)
3842{
3843	struct adapter	*adapter = txr->adapter;
3844        int first, last, done, processed;
3845        struct igb_tx_buffer *tx_buffer;
3846        struct e1000_tx_desc   *tx_desc, *eop_desc;
3847	struct ifnet   *ifp = adapter->ifp;
3848
3849	IGB_TX_LOCK_ASSERT(txr);
3850
3851#ifdef DEV_NETMAP
3852	if (ifp->if_capenable & IFCAP_NETMAP) {
3853		struct netmap_adapter *na = NA(ifp);
3854
3855		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3856		IGB_TX_UNLOCK(txr);
3857		IGB_CORE_LOCK(adapter);
3858		selwakeuppri(&na->tx_si, PI_NET);
3859		IGB_CORE_UNLOCK(adapter);
3860		IGB_TX_LOCK(txr);
3861		return FALSE;
3862	}
3863#endif /* DEV_NETMAP */
3864        if (txr->tx_avail == adapter->num_tx_desc) {
3865		txr->queue_status = IGB_QUEUE_IDLE;
3866                return FALSE;
3867	}
3868
3869	processed = 0;
3870        first = txr->next_to_clean;
3871        tx_desc = &txr->tx_base[first];
3872        tx_buffer = &txr->tx_buffers[first];
3873	last = tx_buffer->next_eop;
3874        eop_desc = &txr->tx_base[last];
3875
3876	/*
3877	 * What this does is get the index of the
3878	 * first descriptor AFTER the EOP of the
3879	 * first packet, that way we can do the
3880	 * simple comparison on the inner while loop.
3881	 */
3882	if (++last == adapter->num_tx_desc)
3883 		last = 0;
3884	done = last;
3885
3886        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3887            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3888
3889        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3890		/* We clean the range of the packet */
3891		while (first != done) {
3892                	tx_desc->upper.data = 0;
3893                	tx_desc->lower.data = 0;
3894                	tx_desc->buffer_addr = 0;
3895                	++txr->tx_avail;
3896			++processed;
3897
3898			if (tx_buffer->m_head) {
3899				txr->bytes +=
3900				    tx_buffer->m_head->m_pkthdr.len;
3901				bus_dmamap_sync(txr->txtag,
3902				    tx_buffer->map,
3903				    BUS_DMASYNC_POSTWRITE);
3904				bus_dmamap_unload(txr->txtag,
3905				    tx_buffer->map);
3906
3907                        	m_freem(tx_buffer->m_head);
3908                        	tx_buffer->m_head = NULL;
3909                	}
3910			tx_buffer->next_eop = -1;
3911			txr->watchdog_time = ticks;
3912
3913	                if (++first == adapter->num_tx_desc)
3914				first = 0;
3915
3916	                tx_buffer = &txr->tx_buffers[first];
3917			tx_desc = &txr->tx_base[first];
3918		}
3919		++txr->packets;
3920		++ifp->if_opackets;
3921		/* See if we can continue to the next packet */
3922		last = tx_buffer->next_eop;
3923		if (last != -1) {
3924        		eop_desc = &txr->tx_base[last];
3925			/* Get new done point */
3926			if (++last == adapter->num_tx_desc) last = 0;
3927			done = last;
3928		} else
3929			break;
3930        }
3931        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3932            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3933
3934        txr->next_to_clean = first;
3935
3936	/*
3937	** Watchdog calculation, we know there's
3938	** work outstanding or the first return
3939	** would have been taken, so none processed
3940	** for too long indicates a hang.
3941	*/
3942	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3943		txr->queue_status |= IGB_QUEUE_HUNG;
3944        /*
3945         * If we have a minimum free,
3946         * clear depleted state bit
3947         */
3948        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3949                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3950
3951	/* All clean, turn off the watchdog */
3952	if (txr->tx_avail == adapter->num_tx_desc) {
3953		txr->queue_status = IGB_QUEUE_IDLE;
3954		return (FALSE);
3955        }
3956
3957	return (TRUE);
3958}
3959
3960/*********************************************************************
3961 *
3962 *  Refresh mbuf buffers for RX descriptor rings
3963 *   - now keeps its own state so discards due to resource
3964 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3965 *     it just returns, keeping its placeholder, thus it can simply
3966 *     be recalled to try again.
3967 *
3968 **********************************************************************/
3969static void
3970igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3971{
3972	struct adapter		*adapter = rxr->adapter;
3973	bus_dma_segment_t	hseg[1];
3974	bus_dma_segment_t	pseg[1];
3975	struct igb_rx_buf	*rxbuf;
3976	struct mbuf		*mh, *mp;
3977	int			i, j, nsegs, error;
3978	bool			refreshed = FALSE;
3979
3980	i = j = rxr->next_to_refresh;
3981	/*
3982	** Get one descriptor beyond
3983	** our work mark to control
3984	** the loop.
3985        */
3986	if (++j == adapter->num_rx_desc)
3987		j = 0;
3988
3989	while (j != limit) {
3990		rxbuf = &rxr->rx_buffers[i];
3991		/* No hdr mbuf used with header split off */
3992		if (rxr->hdr_split == FALSE)
3993			goto no_split;
3994		if (rxbuf->m_head == NULL) {
3995			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3996			if (mh == NULL)
3997				goto update;
3998		} else
3999			mh = rxbuf->m_head;
4000
4001		mh->m_pkthdr.len = mh->m_len = MHLEN;
4002		mh->m_len = MHLEN;
4003		mh->m_flags |= M_PKTHDR;
4004		/* Get the memory mapping */
4005		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4006		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4007		if (error != 0) {
4008			printf("Refresh mbufs: hdr dmamap load"
4009			    " failure - %d\n", error);
4010			m_free(mh);
4011			rxbuf->m_head = NULL;
4012			goto update;
4013		}
4014		rxbuf->m_head = mh;
4015		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4016		    BUS_DMASYNC_PREREAD);
4017		rxr->rx_base[i].read.hdr_addr =
4018		    htole64(hseg[0].ds_addr);
4019no_split:
4020		if (rxbuf->m_pack == NULL) {
4021			mp = m_getjcl(M_DONTWAIT, MT_DATA,
4022			    M_PKTHDR, adapter->rx_mbuf_sz);
4023			if (mp == NULL)
4024				goto update;
4025		} else
4026			mp = rxbuf->m_pack;
4027
4028		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4029		/* Get the memory mapping */
4030		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4031		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4032		if (error != 0) {
4033			printf("Refresh mbufs: payload dmamap load"
4034			    " failure - %d\n", error);
4035			m_free(mp);
4036			rxbuf->m_pack = NULL;
4037			goto update;
4038		}
4039		rxbuf->m_pack = mp;
4040		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4041		    BUS_DMASYNC_PREREAD);
4042		rxr->rx_base[i].read.pkt_addr =
4043		    htole64(pseg[0].ds_addr);
4044		refreshed = TRUE; /* I feel wefreshed :) */
4045
4046		i = j; /* our next is precalculated */
4047		rxr->next_to_refresh = i;
4048		if (++j == adapter->num_rx_desc)
4049			j = 0;
4050	}
4051update:
4052	if (refreshed) /* update tail */
4053		E1000_WRITE_REG(&adapter->hw,
4054		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4055	return;
4056}
4057
4058
4059/*********************************************************************
4060 *
4061 *  Allocate memory for rx_buffer structures. Since we use one
4062 *  rx_buffer per received packet, the maximum number of rx_buffer's
4063 *  that we'll need is equal to the number of receive descriptors
4064 *  that we've allocated.
4065 *
4066 **********************************************************************/
4067static int
4068igb_allocate_receive_buffers(struct rx_ring *rxr)
4069{
4070	struct	adapter 	*adapter = rxr->adapter;
4071	device_t 		dev = adapter->dev;
4072	struct igb_rx_buf	*rxbuf;
4073	int             	i, bsize, error;
4074
4075	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4076	if (!(rxr->rx_buffers =
4077	    (struct igb_rx_buf *) malloc(bsize,
4078	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4079		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4080		error = ENOMEM;
4081		goto fail;
4082	}
4083
4084	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4085				   1, 0,		/* alignment, bounds */
4086				   BUS_SPACE_MAXADDR,	/* lowaddr */
4087				   BUS_SPACE_MAXADDR,	/* highaddr */
4088				   NULL, NULL,		/* filter, filterarg */
4089				   MSIZE,		/* maxsize */
4090				   1,			/* nsegments */
4091				   MSIZE,		/* maxsegsize */
4092				   0,			/* flags */
4093				   NULL,		/* lockfunc */
4094				   NULL,		/* lockfuncarg */
4095				   &rxr->htag))) {
4096		device_printf(dev, "Unable to create RX DMA tag\n");
4097		goto fail;
4098	}
4099
4100	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4101				   1, 0,		/* alignment, bounds */
4102				   BUS_SPACE_MAXADDR,	/* lowaddr */
4103				   BUS_SPACE_MAXADDR,	/* highaddr */
4104				   NULL, NULL,		/* filter, filterarg */
4105				   MJUM9BYTES,		/* maxsize */
4106				   1,			/* nsegments */
4107				   MJUM9BYTES,		/* maxsegsize */
4108				   0,			/* flags */
4109				   NULL,		/* lockfunc */
4110				   NULL,		/* lockfuncarg */
4111				   &rxr->ptag))) {
4112		device_printf(dev, "Unable to create RX payload DMA tag\n");
4113		goto fail;
4114	}
4115
4116	for (i = 0; i < adapter->num_rx_desc; i++) {
4117		rxbuf = &rxr->rx_buffers[i];
4118		error = bus_dmamap_create(rxr->htag,
4119		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4120		if (error) {
4121			device_printf(dev,
4122			    "Unable to create RX head DMA maps\n");
4123			goto fail;
4124		}
4125		error = bus_dmamap_create(rxr->ptag,
4126		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4127		if (error) {
4128			device_printf(dev,
4129			    "Unable to create RX packet DMA maps\n");
4130			goto fail;
4131		}
4132	}
4133
4134	return (0);
4135
4136fail:
4137	/* Frees all, but can handle partial completion */
4138	igb_free_receive_structures(adapter);
4139	return (error);
4140}
4141
4142
4143static void
4144igb_free_receive_ring(struct rx_ring *rxr)
4145{
4146	struct	adapter		*adapter = rxr->adapter;
4147	struct igb_rx_buf	*rxbuf;
4148
4149
4150	for (int i = 0; i < adapter->num_rx_desc; i++) {
4151		rxbuf = &rxr->rx_buffers[i];
4152		if (rxbuf->m_head != NULL) {
4153			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4154			    BUS_DMASYNC_POSTREAD);
4155			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4156			rxbuf->m_head->m_flags |= M_PKTHDR;
4157			m_freem(rxbuf->m_head);
4158		}
4159		if (rxbuf->m_pack != NULL) {
4160			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4161			    BUS_DMASYNC_POSTREAD);
4162			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4163			rxbuf->m_pack->m_flags |= M_PKTHDR;
4164			m_freem(rxbuf->m_pack);
4165		}
4166		rxbuf->m_head = NULL;
4167		rxbuf->m_pack = NULL;
4168	}
4169}
4170
4171
4172/*********************************************************************
4173 *
4174 *  Initialize a receive ring and its buffers.
4175 *
4176 **********************************************************************/
4177static int
4178igb_setup_receive_ring(struct rx_ring *rxr)
4179{
4180	struct	adapter		*adapter;
4181	struct  ifnet		*ifp;
4182	device_t		dev;
4183	struct igb_rx_buf	*rxbuf;
4184	bus_dma_segment_t	pseg[1], hseg[1];
4185	struct lro_ctrl		*lro = &rxr->lro;
4186	int			rsize, nsegs, error = 0;
4187#ifdef DEV_NETMAP
4188	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4189	struct netmap_slot *slot;
4190#endif /* DEV_NETMAP */
4191
4192	adapter = rxr->adapter;
4193	dev = adapter->dev;
4194	ifp = adapter->ifp;
4195
4196	/* Clear the ring contents */
4197	IGB_RX_LOCK(rxr);
4198#ifdef DEV_NETMAP
4199	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4200#endif /* DEV_NETMAP */
4201	rsize = roundup2(adapter->num_rx_desc *
4202	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4203	bzero((void *)rxr->rx_base, rsize);
4204
4205	/*
4206	** Free current RX buffer structures and their mbufs
4207	*/
4208	igb_free_receive_ring(rxr);
4209
4210	/* Configure for header split? */
4211	if (igb_header_split)
4212		rxr->hdr_split = TRUE;
4213
4214        /* Now replenish the ring mbufs */
4215	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4216		struct mbuf	*mh, *mp;
4217
4218		rxbuf = &rxr->rx_buffers[j];
4219#ifdef DEV_NETMAP
4220		if (slot) {
4221			/* slot sj is mapped to the i-th NIC-ring entry */
4222			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4223			uint64_t paddr;
4224			void *addr;
4225
4226			addr = PNMB(slot + sj, &paddr);
4227			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4228			/* Update descriptor */
4229			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4230			continue;
4231		}
4232#endif /* DEV_NETMAP */
4233		if (rxr->hdr_split == FALSE)
4234			goto skip_head;
4235
4236		/* First the header */
4237		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4238		if (rxbuf->m_head == NULL) {
4239			error = ENOBUFS;
4240                        goto fail;
4241		}
4242		m_adj(rxbuf->m_head, ETHER_ALIGN);
4243		mh = rxbuf->m_head;
4244		mh->m_len = mh->m_pkthdr.len = MHLEN;
4245		mh->m_flags |= M_PKTHDR;
4246		/* Get the memory mapping */
4247		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4248		    rxbuf->hmap, rxbuf->m_head, hseg,
4249		    &nsegs, BUS_DMA_NOWAIT);
4250		if (error != 0) /* Nothing elegant to do here */
4251                        goto fail;
4252		bus_dmamap_sync(rxr->htag,
4253		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4254		/* Update descriptor */
4255		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4256
4257skip_head:
4258		/* Now the payload cluster */
4259		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4260		    M_PKTHDR, adapter->rx_mbuf_sz);
4261		if (rxbuf->m_pack == NULL) {
4262			error = ENOBUFS;
4263                        goto fail;
4264		}
4265		mp = rxbuf->m_pack;
4266		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4267		/* Get the memory mapping */
4268		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4269		    rxbuf->pmap, mp, pseg,
4270		    &nsegs, BUS_DMA_NOWAIT);
4271		if (error != 0)
4272                        goto fail;
4273		bus_dmamap_sync(rxr->ptag,
4274		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4275		/* Update descriptor */
4276		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4277        }
4278
4279	/* Setup our descriptor indices */
4280	rxr->next_to_check = 0;
4281	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4282	rxr->lro_enabled = FALSE;
4283	rxr->rx_split_packets = 0;
4284	rxr->rx_bytes = 0;
4285
4286	rxr->fmp = NULL;
4287	rxr->lmp = NULL;
4288	rxr->discard = FALSE;
4289
4290	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4291	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4292
4293	/*
4294	** Now set up the LRO interface, we
4295	** also only do head split when LRO
4296	** is enabled, since so often they
4297	** are undesireable in similar setups.
4298	*/
4299	if (ifp->if_capenable & IFCAP_LRO) {
4300		error = tcp_lro_init(lro);
4301		if (error) {
4302			device_printf(dev, "LRO Initialization failed!\n");
4303			goto fail;
4304		}
4305		INIT_DEBUGOUT("RX LRO Initialized\n");
4306		rxr->lro_enabled = TRUE;
4307		lro->ifp = adapter->ifp;
4308	}
4309
4310	IGB_RX_UNLOCK(rxr);
4311	return (0);
4312
4313fail:
4314	igb_free_receive_ring(rxr);
4315	IGB_RX_UNLOCK(rxr);
4316	return (error);
4317}
4318
4319
4320/*********************************************************************
4321 *
4322 *  Initialize all receive rings.
4323 *
4324 **********************************************************************/
4325static int
4326igb_setup_receive_structures(struct adapter *adapter)
4327{
4328	struct rx_ring *rxr = adapter->rx_rings;
4329	int i;
4330
4331	for (i = 0; i < adapter->num_queues; i++, rxr++)
4332		if (igb_setup_receive_ring(rxr))
4333			goto fail;
4334
4335	return (0);
4336fail:
4337	/*
4338	 * Free RX buffers allocated so far, we will only handle
4339	 * the rings that completed, the failing case will have
4340	 * cleaned up for itself. 'i' is the endpoint.
4341	 */
4342	for (int j = 0; j > i; ++j) {
4343		rxr = &adapter->rx_rings[i];
4344		IGB_RX_LOCK(rxr);
4345		igb_free_receive_ring(rxr);
4346		IGB_RX_UNLOCK(rxr);
4347	}
4348
4349	return (ENOBUFS);
4350}
4351
4352/*********************************************************************
4353 *
4354 *  Enable receive unit.
4355 *
4356 **********************************************************************/
4357static void
4358igb_initialize_receive_units(struct adapter *adapter)
4359{
4360	struct rx_ring	*rxr = adapter->rx_rings;
4361	struct ifnet	*ifp = adapter->ifp;
4362	struct e1000_hw *hw = &adapter->hw;
4363	u32		rctl, rxcsum, psize, srrctl = 0;
4364
4365	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4366
4367	/*
4368	 * Make sure receives are disabled while setting
4369	 * up the descriptor ring
4370	 */
4371	rctl = E1000_READ_REG(hw, E1000_RCTL);
4372	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4373
4374	/*
4375	** Set up for header split
4376	*/
4377	if (igb_header_split) {
4378		/* Use a standard mbuf for the header */
4379		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4380		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4381	} else
4382		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4383
4384	/*
4385	** Set up for jumbo frames
4386	*/
4387	if (ifp->if_mtu > ETHERMTU) {
4388		rctl |= E1000_RCTL_LPE;
4389		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4390			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4391			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4392		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4393			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4394			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4395		}
4396		/* Set maximum packet len */
4397		psize = adapter->max_frame_size;
4398		/* are we on a vlan? */
4399		if (adapter->ifp->if_vlantrunk != NULL)
4400			psize += VLAN_TAG_SIZE;
4401		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4402	} else {
4403		rctl &= ~E1000_RCTL_LPE;
4404		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4405		rctl |= E1000_RCTL_SZ_2048;
4406	}
4407
4408	/* Setup the Base and Length of the Rx Descriptor Rings */
4409	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4410		u64 bus_addr = rxr->rxdma.dma_paddr;
4411		u32 rxdctl;
4412
4413		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4414		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4415		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4416		    (uint32_t)(bus_addr >> 32));
4417		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4418		    (uint32_t)bus_addr);
4419		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4420		/* Enable this Queue */
4421		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4422		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4423		rxdctl &= 0xFFF00000;
4424		rxdctl |= IGB_RX_PTHRESH;
4425		rxdctl |= IGB_RX_HTHRESH << 8;
4426		rxdctl |= IGB_RX_WTHRESH << 16;
4427		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4428	}
4429
4430	/*
4431	** Setup for RX MultiQueue
4432	*/
4433	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4434	if (adapter->num_queues >1) {
4435		u32 random[10], mrqc, shift = 0;
4436		union igb_reta {
4437			u32 dword;
4438			u8  bytes[4];
4439		} reta;
4440
4441		arc4rand(&random, sizeof(random), 0);
4442		if (adapter->hw.mac.type == e1000_82575)
4443			shift = 6;
4444		/* Warning FM follows */
4445		for (int i = 0; i < 128; i++) {
4446			reta.bytes[i & 3] =
4447			    (i % adapter->num_queues) << shift;
4448			if ((i & 3) == 3)
4449				E1000_WRITE_REG(hw,
4450				    E1000_RETA(i >> 2), reta.dword);
4451		}
4452		/* Now fill in hash table */
4453		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4454		for (int i = 0; i < 10; i++)
4455			E1000_WRITE_REG_ARRAY(hw,
4456			    E1000_RSSRK(0), i, random[i]);
4457
4458		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4459		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4460		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4461		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4462		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4463		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4464		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4465		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4466
4467		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4468
4469		/*
4470		** NOTE: Receive Full-Packet Checksum Offload
4471		** is mutually exclusive with Multiqueue. However
4472		** this is not the same as TCP/IP checksums which
4473		** still work.
4474		*/
4475		rxcsum |= E1000_RXCSUM_PCSD;
4476#if __FreeBSD_version >= 800000
4477		/* For SCTP Offload */
4478		if ((hw->mac.type == e1000_82576)
4479		    && (ifp->if_capenable & IFCAP_RXCSUM))
4480			rxcsum |= E1000_RXCSUM_CRCOFL;
4481#endif
4482	} else {
4483		/* Non RSS setup */
4484		if (ifp->if_capenable & IFCAP_RXCSUM) {
4485			rxcsum |= E1000_RXCSUM_IPPCSE;
4486#if __FreeBSD_version >= 800000
4487			if (adapter->hw.mac.type == e1000_82576)
4488				rxcsum |= E1000_RXCSUM_CRCOFL;
4489#endif
4490		} else
4491			rxcsum &= ~E1000_RXCSUM_TUOFL;
4492	}
4493	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4494
4495	/* Setup the Receive Control Register */
4496	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4497	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4498		   E1000_RCTL_RDMTS_HALF |
4499		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4500	/* Strip CRC bytes. */
4501	rctl |= E1000_RCTL_SECRC;
4502	/* Make sure VLAN Filters are off */
4503	rctl &= ~E1000_RCTL_VFE;
4504	/* Don't store bad packets */
4505	rctl &= ~E1000_RCTL_SBP;
4506
4507	/* Enable Receives */
4508	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4509
4510	/*
4511	 * Setup the HW Rx Head and Tail Descriptor Pointers
4512	 *   - needs to be after enable
4513	 */
4514	for (int i = 0; i < adapter->num_queues; i++) {
4515		rxr = &adapter->rx_rings[i];
4516		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4517#ifdef DEV_NETMAP
4518		/*
4519		 * an init() while a netmap client is active must
4520		 * preserve the rx buffers passed to userspace.
4521		 * In this driver it means we adjust RDT to
4522		 * somthing different from next_to_refresh
4523		 * (which is not used in netmap mode).
4524		 */
4525		if (ifp->if_capenable & IFCAP_NETMAP) {
4526			struct netmap_adapter *na = NA(adapter->ifp);
4527			struct netmap_kring *kring = &na->rx_rings[i];
4528			int t = rxr->next_to_refresh - kring->nr_hwavail;
4529
4530			if (t >= adapter->num_rx_desc)
4531				t -= adapter->num_rx_desc;
4532			else if (t < 0)
4533				t += adapter->num_rx_desc;
4534			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4535		} else
4536#endif /* DEV_NETMAP */
4537		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4538	}
4539	return;
4540}
4541
4542/*********************************************************************
4543 *
4544 *  Free receive rings.
4545 *
4546 **********************************************************************/
4547static void
4548igb_free_receive_structures(struct adapter *adapter)
4549{
4550	struct rx_ring *rxr = adapter->rx_rings;
4551
4552	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4553		struct lro_ctrl	*lro = &rxr->lro;
4554		igb_free_receive_buffers(rxr);
4555		tcp_lro_free(lro);
4556		igb_dma_free(adapter, &rxr->rxdma);
4557	}
4558
4559	free(adapter->rx_rings, M_DEVBUF);
4560}
4561
4562/*********************************************************************
4563 *
4564 *  Free receive ring data structures.
4565 *
4566 **********************************************************************/
4567static void
4568igb_free_receive_buffers(struct rx_ring *rxr)
4569{
4570	struct adapter		*adapter = rxr->adapter;
4571	struct igb_rx_buf	*rxbuf;
4572	int i;
4573
4574	INIT_DEBUGOUT("free_receive_structures: begin");
4575
4576	/* Cleanup any existing buffers */
4577	if (rxr->rx_buffers != NULL) {
4578		for (i = 0; i < adapter->num_rx_desc; i++) {
4579			rxbuf = &rxr->rx_buffers[i];
4580			if (rxbuf->m_head != NULL) {
4581				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4582				    BUS_DMASYNC_POSTREAD);
4583				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4584				rxbuf->m_head->m_flags |= M_PKTHDR;
4585				m_freem(rxbuf->m_head);
4586			}
4587			if (rxbuf->m_pack != NULL) {
4588				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4589				    BUS_DMASYNC_POSTREAD);
4590				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4591				rxbuf->m_pack->m_flags |= M_PKTHDR;
4592				m_freem(rxbuf->m_pack);
4593			}
4594			rxbuf->m_head = NULL;
4595			rxbuf->m_pack = NULL;
4596			if (rxbuf->hmap != NULL) {
4597				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4598				rxbuf->hmap = NULL;
4599			}
4600			if (rxbuf->pmap != NULL) {
4601				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4602				rxbuf->pmap = NULL;
4603			}
4604		}
4605		if (rxr->rx_buffers != NULL) {
4606			free(rxr->rx_buffers, M_DEVBUF);
4607			rxr->rx_buffers = NULL;
4608		}
4609	}
4610
4611	if (rxr->htag != NULL) {
4612		bus_dma_tag_destroy(rxr->htag);
4613		rxr->htag = NULL;
4614	}
4615	if (rxr->ptag != NULL) {
4616		bus_dma_tag_destroy(rxr->ptag);
4617		rxr->ptag = NULL;
4618	}
4619}
4620
4621static __inline void
4622igb_rx_discard(struct rx_ring *rxr, int i)
4623{
4624	struct igb_rx_buf	*rbuf;
4625
4626	rbuf = &rxr->rx_buffers[i];
4627
4628	/* Partially received? Free the chain */
4629	if (rxr->fmp != NULL) {
4630		rxr->fmp->m_flags |= M_PKTHDR;
4631		m_freem(rxr->fmp);
4632		rxr->fmp = NULL;
4633		rxr->lmp = NULL;
4634	}
4635
4636	/*
4637	** With advanced descriptors the writeback
4638	** clobbers the buffer addrs, so its easier
4639	** to just free the existing mbufs and take
4640	** the normal refresh path to get new buffers
4641	** and mapping.
4642	*/
4643	if (rbuf->m_head) {
4644		m_free(rbuf->m_head);
4645		rbuf->m_head = NULL;
4646	}
4647
4648	if (rbuf->m_pack) {
4649		m_free(rbuf->m_pack);
4650		rbuf->m_pack = NULL;
4651	}
4652
4653	return;
4654}
4655
4656static __inline void
4657igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4658{
4659
4660	/*
4661	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4662	 * should be computed by hardware. Also it should not have VLAN tag in
4663	 * ethernet header.
4664	 */
4665	if (rxr->lro_enabled &&
4666	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4667	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4668	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4669	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4670	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4671	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4672		/*
4673		 * Send to the stack if:
4674		 **  - LRO not enabled, or
4675		 **  - no LRO resources, or
4676		 **  - lro enqueue fails
4677		 */
4678		if (rxr->lro.lro_cnt != 0)
4679			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4680				return;
4681	}
4682	IGB_RX_UNLOCK(rxr);
4683	(*ifp->if_input)(ifp, m);
4684	IGB_RX_LOCK(rxr);
4685}
4686
4687/*********************************************************************
4688 *
4689 *  This routine executes in interrupt context. It replenishes
4690 *  the mbufs in the descriptor and sends data which has been
4691 *  dma'ed into host memory to upper layer.
4692 *
4693 *  We loop at most count times if count is > 0, or until done if
4694 *  count < 0.
4695 *
4696 *  Return TRUE if more to clean, FALSE otherwise
4697 *********************************************************************/
4698static bool
4699igb_rxeof(struct igb_queue *que, int count, int *done)
4700{
4701	struct adapter		*adapter = que->adapter;
4702	struct rx_ring		*rxr = que->rxr;
4703	struct ifnet		*ifp = adapter->ifp;
4704	struct lro_ctrl		*lro = &rxr->lro;
4705	struct lro_entry	*queued;
4706	int			i, processed = 0, rxdone = 0;
4707	u32			ptype, staterr = 0;
4708	union e1000_adv_rx_desc	*cur;
4709
4710	IGB_RX_LOCK(rxr);
4711	/* Sync the ring. */
4712	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4713	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4714
4715#ifdef DEV_NETMAP
4716	if (ifp->if_capenable & IFCAP_NETMAP) {
4717		struct netmap_adapter *na = NA(ifp);
4718
4719		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4720		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4721		IGB_RX_UNLOCK(rxr);
4722		IGB_CORE_LOCK(adapter);
4723		selwakeuppri(&na->rx_si, PI_NET);
4724		IGB_CORE_UNLOCK(adapter);
4725		return (0);
4726	}
4727#endif /* DEV_NETMAP */
4728
4729	/* Main clean loop */
4730	for (i = rxr->next_to_check; count != 0;) {
4731		struct mbuf		*sendmp, *mh, *mp;
4732		struct igb_rx_buf	*rxbuf;
4733		u16			hlen, plen, hdr, vtag;
4734		bool			eop = FALSE;
4735
4736		cur = &rxr->rx_base[i];
4737		staterr = le32toh(cur->wb.upper.status_error);
4738		if ((staterr & E1000_RXD_STAT_DD) == 0)
4739			break;
4740		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4741			break;
4742		count--;
4743		sendmp = mh = mp = NULL;
4744		cur->wb.upper.status_error = 0;
4745		rxbuf = &rxr->rx_buffers[i];
4746		plen = le16toh(cur->wb.upper.length);
4747		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4748		if ((adapter->hw.mac.type == e1000_i350) &&
4749		    (staterr & E1000_RXDEXT_STATERR_LB))
4750			vtag = be16toh(cur->wb.upper.vlan);
4751		else
4752			vtag = le16toh(cur->wb.upper.vlan);
4753		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4754		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4755
4756		/* Make sure all segments of a bad packet are discarded */
4757		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4758		    (rxr->discard)) {
4759			adapter->dropped_pkts++;
4760			++rxr->rx_discarded;
4761			if (!eop) /* Catch subsequent segs */
4762				rxr->discard = TRUE;
4763			else
4764				rxr->discard = FALSE;
4765			igb_rx_discard(rxr, i);
4766			goto next_desc;
4767		}
4768
4769		/*
4770		** The way the hardware is configured to
4771		** split, it will ONLY use the header buffer
4772		** when header split is enabled, otherwise we
4773		** get normal behavior, ie, both header and
4774		** payload are DMA'd into the payload buffer.
4775		**
4776		** The fmp test is to catch the case where a
4777		** packet spans multiple descriptors, in that
4778		** case only the first header is valid.
4779		*/
4780		if (rxr->hdr_split && rxr->fmp == NULL) {
4781			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4782			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4783			if (hlen > IGB_HDR_BUF)
4784				hlen = IGB_HDR_BUF;
4785			mh = rxr->rx_buffers[i].m_head;
4786			mh->m_len = hlen;
4787			/* clear buf pointer for refresh */
4788			rxbuf->m_head = NULL;
4789			/*
4790			** Get the payload length, this
4791			** could be zero if its a small
4792			** packet.
4793			*/
4794			if (plen > 0) {
4795				mp = rxr->rx_buffers[i].m_pack;
4796				mp->m_len = plen;
4797				mh->m_next = mp;
4798				/* clear buf pointer */
4799				rxbuf->m_pack = NULL;
4800				rxr->rx_split_packets++;
4801			}
4802		} else {
4803			/*
4804			** Either no header split, or a
4805			** secondary piece of a fragmented
4806			** split packet.
4807			*/
4808			mh = rxr->rx_buffers[i].m_pack;
4809			mh->m_len = plen;
4810			/* clear buf info for refresh */
4811			rxbuf->m_pack = NULL;
4812		}
4813
4814		++processed; /* So we know when to refresh */
4815
4816		/* Initial frame - setup */
4817		if (rxr->fmp == NULL) {
4818			mh->m_pkthdr.len = mh->m_len;
4819			/* Save the head of the chain */
4820			rxr->fmp = mh;
4821			rxr->lmp = mh;
4822			if (mp != NULL) {
4823				/* Add payload if split */
4824				mh->m_pkthdr.len += mp->m_len;
4825				rxr->lmp = mh->m_next;
4826			}
4827		} else {
4828			/* Chain mbuf's together */
4829			rxr->lmp->m_next = mh;
4830			rxr->lmp = rxr->lmp->m_next;
4831			rxr->fmp->m_pkthdr.len += mh->m_len;
4832		}
4833
4834		if (eop) {
4835			rxr->fmp->m_pkthdr.rcvif = ifp;
4836			ifp->if_ipackets++;
4837			rxr->rx_packets++;
4838			/* capture data for AIM */
4839			rxr->packets++;
4840			rxr->bytes += rxr->fmp->m_pkthdr.len;
4841			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4842
4843			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4844				igb_rx_checksum(staterr, rxr->fmp, ptype);
4845
4846			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4847			    (staterr & E1000_RXD_STAT_VP) != 0) {
4848				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4849				rxr->fmp->m_flags |= M_VLANTAG;
4850			}
4851#if __FreeBSD_version >= 800000
4852			rxr->fmp->m_pkthdr.flowid = que->msix;
4853			rxr->fmp->m_flags |= M_FLOWID;
4854#endif
4855			sendmp = rxr->fmp;
4856			/* Make sure to set M_PKTHDR. */
4857			sendmp->m_flags |= M_PKTHDR;
4858			rxr->fmp = NULL;
4859			rxr->lmp = NULL;
4860		}
4861
4862next_desc:
4863		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4864		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4865
4866		/* Advance our pointers to the next descriptor. */
4867		if (++i == adapter->num_rx_desc)
4868			i = 0;
4869		/*
4870		** Send to the stack or LRO
4871		*/
4872		if (sendmp != NULL) {
4873			rxr->next_to_check = i;
4874			igb_rx_input(rxr, ifp, sendmp, ptype);
4875			i = rxr->next_to_check;
4876			rxdone++;
4877		}
4878
4879		/* Every 8 descriptors we go to refresh mbufs */
4880		if (processed == 8) {
4881                        igb_refresh_mbufs(rxr, i);
4882                        processed = 0;
4883		}
4884	}
4885
4886	/* Catch any remainders */
4887	if (igb_rx_unrefreshed(rxr))
4888		igb_refresh_mbufs(rxr, i);
4889
4890	rxr->next_to_check = i;
4891
4892	/*
4893	 * Flush any outstanding LRO work
4894	 */
4895	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4896		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4897		tcp_lro_flush(lro, queued);
4898	}
4899
4900	if (done != NULL)
4901		*done += rxdone;
4902
4903	IGB_RX_UNLOCK(rxr);
4904	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4905}
4906
4907/*********************************************************************
4908 *
4909 *  Verify that the hardware indicated that the checksum is valid.
4910 *  Inform the stack about the status of checksum so that stack
4911 *  doesn't spend time verifying the checksum.
4912 *
4913 *********************************************************************/
4914static void
4915igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4916{
4917	u16 status = (u16)staterr;
4918	u8  errors = (u8) (staterr >> 24);
4919	int sctp;
4920
4921	/* Ignore Checksum bit is set */
4922	if (status & E1000_RXD_STAT_IXSM) {
4923		mp->m_pkthdr.csum_flags = 0;
4924		return;
4925	}
4926
4927	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4928	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4929		sctp = 1;
4930	else
4931		sctp = 0;
4932	if (status & E1000_RXD_STAT_IPCS) {
4933		/* Did it pass? */
4934		if (!(errors & E1000_RXD_ERR_IPE)) {
4935			/* IP Checksum Good */
4936			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4937			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4938		} else
4939			mp->m_pkthdr.csum_flags = 0;
4940	}
4941
4942	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4943		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4944#if __FreeBSD_version >= 800000
4945		if (sctp) /* reassign */
4946			type = CSUM_SCTP_VALID;
4947#endif
4948		/* Did it pass? */
4949		if (!(errors & E1000_RXD_ERR_TCPE)) {
4950			mp->m_pkthdr.csum_flags |= type;
4951			if (sctp == 0)
4952				mp->m_pkthdr.csum_data = htons(0xffff);
4953		}
4954	}
4955	return;
4956}
4957
4958/*
4959 * This routine is run via an vlan
4960 * config EVENT
4961 */
4962static void
4963igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4964{
4965	struct adapter	*adapter = ifp->if_softc;
4966	u32		index, bit;
4967
4968	if (ifp->if_softc !=  arg)   /* Not our event */
4969		return;
4970
4971	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4972                return;
4973
4974	IGB_CORE_LOCK(adapter);
4975	index = (vtag >> 5) & 0x7F;
4976	bit = vtag & 0x1F;
4977	adapter->shadow_vfta[index] |= (1 << bit);
4978	++adapter->num_vlans;
4979	/* Change hw filter setting */
4980	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4981		igb_setup_vlan_hw_support(adapter);
4982	IGB_CORE_UNLOCK(adapter);
4983}
4984
4985/*
4986 * This routine is run via an vlan
4987 * unconfig EVENT
4988 */
4989static void
4990igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4991{
4992	struct adapter	*adapter = ifp->if_softc;
4993	u32		index, bit;
4994
4995	if (ifp->if_softc !=  arg)
4996		return;
4997
4998	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4999                return;
5000
5001	IGB_CORE_LOCK(adapter);
5002	index = (vtag >> 5) & 0x7F;
5003	bit = vtag & 0x1F;
5004	adapter->shadow_vfta[index] &= ~(1 << bit);
5005	--adapter->num_vlans;
5006	/* Change hw filter setting */
5007	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5008		igb_setup_vlan_hw_support(adapter);
5009	IGB_CORE_UNLOCK(adapter);
5010}
5011
5012static void
5013igb_setup_vlan_hw_support(struct adapter *adapter)
5014{
5015	struct e1000_hw *hw = &adapter->hw;
5016	struct ifnet	*ifp = adapter->ifp;
5017	u32             reg;
5018
5019	if (adapter->vf_ifp) {
5020		e1000_rlpml_set_vf(hw,
5021		    adapter->max_frame_size + VLAN_TAG_SIZE);
5022		return;
5023	}
5024
5025	reg = E1000_READ_REG(hw, E1000_CTRL);
5026	reg |= E1000_CTRL_VME;
5027	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5028
5029	/* Enable the Filter Table */
5030	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5031		reg = E1000_READ_REG(hw, E1000_RCTL);
5032		reg &= ~E1000_RCTL_CFIEN;
5033		reg |= E1000_RCTL_VFE;
5034		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5035	}
5036
5037	/* Update the frame size */
5038	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5039	    adapter->max_frame_size + VLAN_TAG_SIZE);
5040
5041	/* Don't bother with table if no vlans */
5042	if ((adapter->num_vlans == 0) ||
5043	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5044                return;
5045	/*
5046	** A soft reset zero's out the VFTA, so
5047	** we need to repopulate it now.
5048	*/
5049	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5050                if (adapter->shadow_vfta[i] != 0) {
5051			if (adapter->vf_ifp)
5052				e1000_vfta_set_vf(hw,
5053				    adapter->shadow_vfta[i], TRUE);
5054			else
5055				e1000_write_vfta(hw,
5056				    i, adapter->shadow_vfta[i]);
5057		}
5058}
5059
5060static void
5061igb_enable_intr(struct adapter *adapter)
5062{
5063	/* With RSS set up what to auto clear */
5064	if (adapter->msix_mem) {
5065		u32 mask = (adapter->que_mask | adapter->link_mask);
5066		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5067		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5068		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5069		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5070		    E1000_IMS_LSC);
5071	} else {
5072		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5073		    IMS_ENABLE_MASK);
5074	}
5075	E1000_WRITE_FLUSH(&adapter->hw);
5076
5077	return;
5078}
5079
5080static void
5081igb_disable_intr(struct adapter *adapter)
5082{
5083	if (adapter->msix_mem) {
5084		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5085		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5086	}
5087	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5088	E1000_WRITE_FLUSH(&adapter->hw);
5089	return;
5090}
5091
5092/*
5093 * Bit of a misnomer, what this really means is
5094 * to enable OS management of the system... aka
5095 * to disable special hardware management features
5096 */
5097static void
5098igb_init_manageability(struct adapter *adapter)
5099{
5100	if (adapter->has_manage) {
5101		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5102		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5103
5104		/* disable hardware interception of ARP */
5105		manc &= ~(E1000_MANC_ARP_EN);
5106
5107                /* enable receiving management packets to the host */
5108		manc |= E1000_MANC_EN_MNG2HOST;
5109		manc2h |= 1 << 5;  /* Mng Port 623 */
5110		manc2h |= 1 << 6;  /* Mng Port 664 */
5111		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5112		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5113	}
5114}
5115
5116/*
5117 * Give control back to hardware management
5118 * controller if there is one.
5119 */
5120static void
5121igb_release_manageability(struct adapter *adapter)
5122{
5123	if (adapter->has_manage) {
5124		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5125
5126		/* re-enable hardware interception of ARP */
5127		manc |= E1000_MANC_ARP_EN;
5128		manc &= ~E1000_MANC_EN_MNG2HOST;
5129
5130		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5131	}
5132}
5133
5134/*
5135 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5136 * For ASF and Pass Through versions of f/w this means that
5137 * the driver is loaded.
5138 *
5139 */
5140static void
5141igb_get_hw_control(struct adapter *adapter)
5142{
5143	u32 ctrl_ext;
5144
5145	if (adapter->vf_ifp)
5146		return;
5147
5148	/* Let firmware know the driver has taken over */
5149	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5150	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5151	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5152}
5153
5154/*
5155 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5156 * For ASF and Pass Through versions of f/w this means that the
5157 * driver is no longer loaded.
5158 *
5159 */
5160static void
5161igb_release_hw_control(struct adapter *adapter)
5162{
5163	u32 ctrl_ext;
5164
5165	if (adapter->vf_ifp)
5166		return;
5167
5168	/* Let firmware taken over control of h/w */
5169	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5170	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5171	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5172}
5173
5174static int
5175igb_is_valid_ether_addr(uint8_t *addr)
5176{
5177	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5178
5179	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5180		return (FALSE);
5181	}
5182
5183	return (TRUE);
5184}
5185
5186
5187/*
5188 * Enable PCI Wake On Lan capability
5189 */
5190static void
5191igb_enable_wakeup(device_t dev)
5192{
5193	u16     cap, status;
5194	u8      id;
5195
5196	/* First find the capabilities pointer*/
5197	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5198	/* Read the PM Capabilities */
5199	id = pci_read_config(dev, cap, 1);
5200	if (id != PCIY_PMG)     /* Something wrong */
5201		return;
5202	/* OK, we have the power capabilities, so
5203	   now get the status register */
5204	cap += PCIR_POWER_STATUS;
5205	status = pci_read_config(dev, cap, 2);
5206	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5207	pci_write_config(dev, cap, status, 2);
5208	return;
5209}
5210
5211static void
5212igb_led_func(void *arg, int onoff)
5213{
5214	struct adapter	*adapter = arg;
5215
5216	IGB_CORE_LOCK(adapter);
5217	if (onoff) {
5218		e1000_setup_led(&adapter->hw);
5219		e1000_led_on(&adapter->hw);
5220	} else {
5221		e1000_led_off(&adapter->hw);
5222		e1000_cleanup_led(&adapter->hw);
5223	}
5224	IGB_CORE_UNLOCK(adapter);
5225}
5226
5227/**********************************************************************
5228 *
5229 *  Update the board statistics counters.
5230 *
5231 **********************************************************************/
5232static void
5233igb_update_stats_counters(struct adapter *adapter)
5234{
5235	struct ifnet		*ifp;
5236        struct e1000_hw		*hw = &adapter->hw;
5237	struct e1000_hw_stats	*stats;
5238
5239	/*
5240	** The virtual function adapter has only a
5241	** small controlled set of stats, do only
5242	** those and return.
5243	*/
5244	if (adapter->vf_ifp) {
5245		igb_update_vf_stats_counters(adapter);
5246		return;
5247	}
5248
5249	stats = (struct e1000_hw_stats	*)adapter->stats;
5250
5251	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5252	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5253		stats->symerrs +=
5254		    E1000_READ_REG(hw,E1000_SYMERRS);
5255		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5256	}
5257
5258	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5259	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5260	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5261	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5262
5263	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5264	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5265	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5266	stats->dc += E1000_READ_REG(hw, E1000_DC);
5267	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5268	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5269	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5270	/*
5271	** For watchdog management we need to know if we have been
5272	** paused during the last interval, so capture that here.
5273	*/
5274        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5275        stats->xoffrxc += adapter->pause_frames;
5276	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5277	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5278	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5279	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5280	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5281	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5282	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5283	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5284	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5285	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5286	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5287	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5288
5289	/* For the 64-bit byte counters the low dword must be read first. */
5290	/* Both registers clear on the read of the high dword */
5291
5292	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5293	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5294	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5295	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5296
5297	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5298	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5299	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5300	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5301	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5302
5303	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5304	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5305
5306	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5307	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5308	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5309	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5310	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5311	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5312	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5313	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5314	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5315	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5316
5317	/* Interrupt Counts */
5318
5319	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5320	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5321	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5322	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5323	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5324	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5325	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5326	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5327	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5328
5329	/* Host to Card Statistics */
5330
5331	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5332	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5333	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5334	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5335	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5336	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5337	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5338	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5339	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5340	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5341	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5342	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5343	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5344	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5345
5346	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5347	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5348	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5349	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5350	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5351	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5352
5353	ifp = adapter->ifp;
5354	ifp->if_collisions = stats->colc;
5355
5356	/* Rx Errors */
5357	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5358	    stats->crcerrs + stats->algnerrc +
5359	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5360
5361	/* Tx Errors */
5362	ifp->if_oerrors = stats->ecol +
5363	    stats->latecol + adapter->watchdog_events;
5364
5365	/* Driver specific counters */
5366	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5367	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5368	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5369	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5370	adapter->packet_buf_alloc_tx =
5371	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5372	adapter->packet_buf_alloc_rx =
5373	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5374}
5375
5376
5377/**********************************************************************
5378 *
5379 *  Initialize the VF board statistics counters.
5380 *
5381 **********************************************************************/
5382static void
5383igb_vf_init_stats(struct adapter *adapter)
5384{
5385        struct e1000_hw *hw = &adapter->hw;
5386	struct e1000_vf_stats	*stats;
5387
5388	stats = (struct e1000_vf_stats	*)adapter->stats;
5389	if (stats == NULL)
5390		return;
5391        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5392        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5393        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5394        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5395        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5396}
5397
5398/**********************************************************************
5399 *
5400 *  Update the VF board statistics counters.
5401 *
5402 **********************************************************************/
5403static void
5404igb_update_vf_stats_counters(struct adapter *adapter)
5405{
5406	struct e1000_hw *hw = &adapter->hw;
5407	struct e1000_vf_stats	*stats;
5408
5409	if (adapter->link_speed == 0)
5410		return;
5411
5412	stats = (struct e1000_vf_stats	*)adapter->stats;
5413
5414	UPDATE_VF_REG(E1000_VFGPRC,
5415	    stats->last_gprc, stats->gprc);
5416	UPDATE_VF_REG(E1000_VFGORC,
5417	    stats->last_gorc, stats->gorc);
5418	UPDATE_VF_REG(E1000_VFGPTC,
5419	    stats->last_gptc, stats->gptc);
5420	UPDATE_VF_REG(E1000_VFGOTC,
5421	    stats->last_gotc, stats->gotc);
5422	UPDATE_VF_REG(E1000_VFMPRC,
5423	    stats->last_mprc, stats->mprc);
5424}
5425
5426/* Export a single 32-bit register via a read-only sysctl. */
5427static int
5428igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5429{
5430	struct adapter *adapter;
5431	u_int val;
5432
5433	adapter = oidp->oid_arg1;
5434	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5435	return (sysctl_handle_int(oidp, &val, 0, req));
5436}
5437
5438/*
5439**  Tuneable interrupt rate handler
5440*/
5441static int
5442igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5443{
5444	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5445	int			error;
5446	u32			reg, usec, rate;
5447
5448	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5449	usec = ((reg & 0x7FFC) >> 2);
5450	if (usec > 0)
5451		rate = 1000000 / usec;
5452	else
5453		rate = 0;
5454	error = sysctl_handle_int(oidp, &rate, 0, req);
5455	if (error || !req->newptr)
5456		return error;
5457	return 0;
5458}
5459
5460/*
5461 * Add sysctl variables, one per statistic, to the system.
5462 */
5463static void
5464igb_add_hw_stats(struct adapter *adapter)
5465{
5466	device_t dev = adapter->dev;
5467
5468	struct tx_ring *txr = adapter->tx_rings;
5469	struct rx_ring *rxr = adapter->rx_rings;
5470
5471	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5472	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5473	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5474	struct e1000_hw_stats *stats = adapter->stats;
5475
5476	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5477	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5478
5479#define QUEUE_NAME_LEN 32
5480	char namebuf[QUEUE_NAME_LEN];
5481
5482	/* Driver Statistics */
5483	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5484			CTLFLAG_RD, &adapter->link_irq, 0,
5485			"Link MSIX IRQ Handled");
5486	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5487			CTLFLAG_RD, &adapter->dropped_pkts,
5488			"Driver dropped packets");
5489	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5490			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5491			"Driver tx dma failure in xmit");
5492	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5493			CTLFLAG_RD, &adapter->rx_overruns,
5494			"RX overruns");
5495	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5496			CTLFLAG_RD, &adapter->watchdog_events,
5497			"Watchdog timeouts");
5498
5499	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5500			CTLFLAG_RD, &adapter->device_control,
5501			"Device Control Register");
5502	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5503			CTLFLAG_RD, &adapter->rx_control,
5504			"Receiver Control Register");
5505	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5506			CTLFLAG_RD, &adapter->int_mask,
5507			"Interrupt Mask");
5508	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5509			CTLFLAG_RD, &adapter->eint_mask,
5510			"Extended Interrupt Mask");
5511	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5512			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5513			"Transmit Buffer Packet Allocation");
5514	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5515			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5516			"Receive Buffer Packet Allocation");
5517	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5518			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5519			"Flow Control High Watermark");
5520	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5521			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5522			"Flow Control Low Watermark");
5523
5524	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5525		struct lro_ctrl *lro = &rxr->lro;
5526
5527		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5528		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5529					    CTLFLAG_RD, NULL, "Queue Name");
5530		queue_list = SYSCTL_CHILDREN(queue_node);
5531
5532		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5533				CTLFLAG_RD, &adapter->queues[i],
5534				sizeof(&adapter->queues[i]),
5535				igb_sysctl_interrupt_rate_handler,
5536				"IU", "Interrupt Rate");
5537
5538		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5539				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5540				igb_sysctl_reg_handler, "IU",
5541 				"Transmit Descriptor Head");
5542		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5543				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5544				igb_sysctl_reg_handler, "IU",
5545 				"Transmit Descriptor Tail");
5546		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5547				CTLFLAG_RD, &txr->no_desc_avail,
5548				"Queue No Descriptor Available");
5549		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5550				CTLFLAG_RD, &txr->tx_packets,
5551				"Queue Packets Transmitted");
5552
5553		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5554				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5555				igb_sysctl_reg_handler, "IU",
5556				"Receive Descriptor Head");
5557		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5558				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5559				igb_sysctl_reg_handler, "IU",
5560				"Receive Descriptor Tail");
5561		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5562				CTLFLAG_RD, &rxr->rx_packets,
5563				"Queue Packets Received");
5564		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5565				CTLFLAG_RD, &rxr->rx_bytes,
5566				"Queue Bytes Received");
5567		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5568				CTLFLAG_RD, &lro->lro_queued, 0,
5569				"LRO Queued");
5570		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5571				CTLFLAG_RD, &lro->lro_flushed, 0,
5572				"LRO Flushed");
5573	}
5574
5575	/* MAC stats get their own sub node */
5576
5577	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5578				    CTLFLAG_RD, NULL, "MAC Statistics");
5579	stat_list = SYSCTL_CHILDREN(stat_node);
5580
5581	/*
5582	** VF adapter has a very limited set of stats
5583	** since its not managing the metal, so to speak.
5584	*/
5585	if (adapter->vf_ifp) {
5586	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5587			CTLFLAG_RD, &stats->gprc,
5588			"Good Packets Received");
5589	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5590			CTLFLAG_RD, &stats->gptc,
5591			"Good Packets Transmitted");
5592 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5593 			CTLFLAG_RD, &stats->gorc,
5594 			"Good Octets Received");
5595 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5596 			CTLFLAG_RD, &stats->gotc,
5597 			"Good Octets Transmitted");
5598	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5599			CTLFLAG_RD, &stats->mprc,
5600			"Multicast Packets Received");
5601		return;
5602	}
5603
5604	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5605			CTLFLAG_RD, &stats->ecol,
5606			"Excessive collisions");
5607	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5608			CTLFLAG_RD, &stats->scc,
5609			"Single collisions");
5610	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5611			CTLFLAG_RD, &stats->mcc,
5612			"Multiple collisions");
5613	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5614			CTLFLAG_RD, &stats->latecol,
5615			"Late collisions");
5616	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5617			CTLFLAG_RD, &stats->colc,
5618			"Collision Count");
5619	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5620			CTLFLAG_RD, &stats->symerrs,
5621			"Symbol Errors");
5622	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5623			CTLFLAG_RD, &stats->sec,
5624			"Sequence Errors");
5625	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5626			CTLFLAG_RD, &stats->dc,
5627			"Defer Count");
5628	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5629			CTLFLAG_RD, &stats->mpc,
5630			"Missed Packets");
5631	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5632			CTLFLAG_RD, &stats->rnbc,
5633			"Receive No Buffers");
5634	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5635			CTLFLAG_RD, &stats->ruc,
5636			"Receive Undersize");
5637	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5638			CTLFLAG_RD, &stats->rfc,
5639			"Fragmented Packets Received ");
5640	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5641			CTLFLAG_RD, &stats->roc,
5642			"Oversized Packets Received");
5643	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5644			CTLFLAG_RD, &stats->rjc,
5645			"Recevied Jabber");
5646	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5647			CTLFLAG_RD, &stats->rxerrc,
5648			"Receive Errors");
5649	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5650			CTLFLAG_RD, &stats->crcerrs,
5651			"CRC errors");
5652	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5653			CTLFLAG_RD, &stats->algnerrc,
5654			"Alignment Errors");
5655	/* On 82575 these are collision counts */
5656	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5657			CTLFLAG_RD, &stats->cexterr,
5658			"Collision/Carrier extension errors");
5659	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5660			CTLFLAG_RD, &stats->xonrxc,
5661			"XON Received");
5662	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5663			CTLFLAG_RD, &stats->xontxc,
5664			"XON Transmitted");
5665	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5666			CTLFLAG_RD, &stats->xoffrxc,
5667			"XOFF Received");
5668	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5669			CTLFLAG_RD, &stats->xofftxc,
5670			"XOFF Transmitted");
5671	/* Packet Reception Stats */
5672	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5673			CTLFLAG_RD, &stats->tpr,
5674			"Total Packets Received ");
5675	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5676			CTLFLAG_RD, &stats->gprc,
5677			"Good Packets Received");
5678	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5679			CTLFLAG_RD, &stats->bprc,
5680			"Broadcast Packets Received");
5681	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5682			CTLFLAG_RD, &stats->mprc,
5683			"Multicast Packets Received");
5684	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5685			CTLFLAG_RD, &stats->prc64,
5686			"64 byte frames received ");
5687	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5688			CTLFLAG_RD, &stats->prc127,
5689			"65-127 byte frames received");
5690	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5691			CTLFLAG_RD, &stats->prc255,
5692			"128-255 byte frames received");
5693	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5694			CTLFLAG_RD, &stats->prc511,
5695			"256-511 byte frames received");
5696	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5697			CTLFLAG_RD, &stats->prc1023,
5698			"512-1023 byte frames received");
5699	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5700			CTLFLAG_RD, &stats->prc1522,
5701			"1023-1522 byte frames received");
5702 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5703 			CTLFLAG_RD, &stats->gorc,
5704 			"Good Octets Received");
5705
5706	/* Packet Transmission Stats */
5707 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5708 			CTLFLAG_RD, &stats->gotc,
5709 			"Good Octets Transmitted");
5710	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5711			CTLFLAG_RD, &stats->tpt,
5712			"Total Packets Transmitted");
5713	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5714			CTLFLAG_RD, &stats->gptc,
5715			"Good Packets Transmitted");
5716	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5717			CTLFLAG_RD, &stats->bptc,
5718			"Broadcast Packets Transmitted");
5719	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5720			CTLFLAG_RD, &stats->mptc,
5721			"Multicast Packets Transmitted");
5722	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5723			CTLFLAG_RD, &stats->ptc64,
5724			"64 byte frames transmitted ");
5725	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5726			CTLFLAG_RD, &stats->ptc127,
5727			"65-127 byte frames transmitted");
5728	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5729			CTLFLAG_RD, &stats->ptc255,
5730			"128-255 byte frames transmitted");
5731	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5732			CTLFLAG_RD, &stats->ptc511,
5733			"256-511 byte frames transmitted");
5734	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5735			CTLFLAG_RD, &stats->ptc1023,
5736			"512-1023 byte frames transmitted");
5737	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5738			CTLFLAG_RD, &stats->ptc1522,
5739			"1024-1522 byte frames transmitted");
5740	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5741			CTLFLAG_RD, &stats->tsctc,
5742			"TSO Contexts Transmitted");
5743	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5744			CTLFLAG_RD, &stats->tsctfc,
5745			"TSO Contexts Failed");
5746
5747
5748	/* Interrupt Stats */
5749
5750	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5751				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5752	int_list = SYSCTL_CHILDREN(int_node);
5753
5754	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5755			CTLFLAG_RD, &stats->iac,
5756			"Interrupt Assertion Count");
5757
5758	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5759			CTLFLAG_RD, &stats->icrxptc,
5760			"Interrupt Cause Rx Pkt Timer Expire Count");
5761
5762	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5763			CTLFLAG_RD, &stats->icrxatc,
5764			"Interrupt Cause Rx Abs Timer Expire Count");
5765
5766	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5767			CTLFLAG_RD, &stats->ictxptc,
5768			"Interrupt Cause Tx Pkt Timer Expire Count");
5769
5770	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5771			CTLFLAG_RD, &stats->ictxatc,
5772			"Interrupt Cause Tx Abs Timer Expire Count");
5773
5774	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5775			CTLFLAG_RD, &stats->ictxqec,
5776			"Interrupt Cause Tx Queue Empty Count");
5777
5778	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5779			CTLFLAG_RD, &stats->ictxqmtc,
5780			"Interrupt Cause Tx Queue Min Thresh Count");
5781
5782	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5783			CTLFLAG_RD, &stats->icrxdmtc,
5784			"Interrupt Cause Rx Desc Min Thresh Count");
5785
5786	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5787			CTLFLAG_RD, &stats->icrxoc,
5788			"Interrupt Cause Receiver Overrun Count");
5789
5790	/* Host to Card Stats */
5791
5792	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5793				    CTLFLAG_RD, NULL,
5794				    "Host to Card Statistics");
5795
5796	host_list = SYSCTL_CHILDREN(host_node);
5797
5798	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5799			CTLFLAG_RD, &stats->cbtmpc,
5800			"Circuit Breaker Tx Packet Count");
5801
5802	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5803			CTLFLAG_RD, &stats->htdpmc,
5804			"Host Transmit Discarded Packets");
5805
5806	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5807			CTLFLAG_RD, &stats->rpthc,
5808			"Rx Packets To Host");
5809
5810	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5811			CTLFLAG_RD, &stats->cbrmpc,
5812			"Circuit Breaker Rx Packet Count");
5813
5814	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5815			CTLFLAG_RD, &stats->cbrdpc,
5816			"Circuit Breaker Rx Dropped Count");
5817
5818	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5819			CTLFLAG_RD, &stats->hgptc,
5820			"Host Good Packets Tx Count");
5821
5822	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5823			CTLFLAG_RD, &stats->htcbdpc,
5824			"Host Tx Circuit Breaker Dropped Count");
5825
5826	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5827			CTLFLAG_RD, &stats->hgorc,
5828			"Host Good Octets Received Count");
5829
5830	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5831			CTLFLAG_RD, &stats->hgotc,
5832			"Host Good Octets Transmit Count");
5833
5834	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5835			CTLFLAG_RD, &stats->lenerrs,
5836			"Length Errors");
5837
5838	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5839			CTLFLAG_RD, &stats->scvpc,
5840			"SerDes/SGMII Code Violation Pkt Count");
5841
5842	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5843			CTLFLAG_RD, &stats->hrmpc,
5844			"Header Redirection Missed Packet Count");
5845}
5846
5847
5848/**********************************************************************
5849 *
5850 *  This routine provides a way to dump out the adapter eeprom,
5851 *  often a useful debug/service tool. This only dumps the first
5852 *  32 words, stuff that matters is in that extent.
5853 *
5854 **********************************************************************/
5855static int
5856igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5857{
5858	struct adapter *adapter;
5859	int error;
5860	int result;
5861
5862	result = -1;
5863	error = sysctl_handle_int(oidp, &result, 0, req);
5864
5865	if (error || !req->newptr)
5866		return (error);
5867
5868	/*
5869	 * This value will cause a hex dump of the
5870	 * first 32 16-bit words of the EEPROM to
5871	 * the screen.
5872	 */
5873	if (result == 1) {
5874		adapter = (struct adapter *)arg1;
5875		igb_print_nvm_info(adapter);
5876        }
5877
5878	return (error);
5879}
5880
5881static void
5882igb_print_nvm_info(struct adapter *adapter)
5883{
5884	u16	eeprom_data;
5885	int	i, j, row = 0;
5886
5887	/* Its a bit crude, but it gets the job done */
5888	printf("\nInterface EEPROM Dump:\n");
5889	printf("Offset\n0x0000  ");
5890	for (i = 0, j = 0; i < 32; i++, j++) {
5891		if (j == 8) { /* Make the offset block */
5892			j = 0; ++row;
5893			printf("\n0x00%x0  ",row);
5894		}
5895		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5896		printf("%04x ", eeprom_data);
5897	}
5898	printf("\n");
5899}
5900
5901static void
5902igb_set_sysctl_value(struct adapter *adapter, const char *name,
5903	const char *description, int *limit, int value)
5904{
5905	*limit = value;
5906	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5907	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5908	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5909}
5910
5911/*
5912** Set flow control using sysctl:
5913** Flow control values:
5914** 	0 - off
5915**	1 - rx pause
5916**	2 - tx pause
5917**	3 - full
5918*/
5919static int
5920igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5921{
5922	int		error;
5923	static int	input = 3; /* default is full */
5924	struct adapter	*adapter = (struct adapter *) arg1;
5925
5926	error = sysctl_handle_int(oidp, &input, 0, req);
5927
5928	if ((error) || (req->newptr == NULL))
5929		return (error);
5930
5931	switch (input) {
5932		case e1000_fc_rx_pause:
5933		case e1000_fc_tx_pause:
5934		case e1000_fc_full:
5935		case e1000_fc_none:
5936			adapter->hw.fc.requested_mode = input;
5937			adapter->fc = input;
5938			break;
5939		default:
5940			/* Do nothing */
5941			return (error);
5942	}
5943
5944	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5945	e1000_force_mac_fc(&adapter->hw);
5946	return (error);
5947}
5948
5949/*
5950** Manage DMA Coalesce:
5951** Control values:
5952** 	0/1 - off/on
5953**	Legal timer values are:
5954**	250,500,1000-10000 in thousands
5955*/
5956static int
5957igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5958{
5959	struct adapter *adapter = (struct adapter *) arg1;
5960	int		error;
5961
5962	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5963
5964	if ((error) || (req->newptr == NULL))
5965		return (error);
5966
5967	switch (adapter->dmac) {
5968		case 0:
5969			/*Disabling */
5970			break;
5971		case 1: /* Just enable and use default */
5972			adapter->dmac = 1000;
5973			break;
5974		case 250:
5975		case 500:
5976		case 1000:
5977		case 2000:
5978		case 3000:
5979		case 4000:
5980		case 5000:
5981		case 6000:
5982		case 7000:
5983		case 8000:
5984		case 9000:
5985		case 10000:
5986			/* Legal values - allow */
5987			break;
5988		default:
5989			/* Do nothing, illegal value */
5990			adapter->dmac = 0;
5991			return (error);
5992	}
5993	/* Reinit the interface */
5994	igb_init(adapter);
5995	return (error);
5996}
5997
5998/*
5999** Manage Energy Efficient Ethernet:
6000** Control values:
6001**     0/1 - enabled/disabled
6002*/
6003static int
6004igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6005{
6006	struct adapter	*adapter = (struct adapter *) arg1;
6007	int		error, value;
6008
6009	value = adapter->hw.dev_spec._82575.eee_disable;
6010	error = sysctl_handle_int(oidp, &value, 0, req);
6011	if (error || req->newptr == NULL)
6012		return (error);
6013	IGB_CORE_LOCK(adapter);
6014	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6015	igb_init_locked(adapter);
6016	IGB_CORE_UNLOCK(adapter);
6017	return (0);
6018}
6019