1/******************************************************************************
2
3  Copyright (c) 2001-2012, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD$*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.4";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#if __FreeBSD_version >= 800000
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *,
185		    struct tx_ring *, struct mbuf *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282#ifdef DEVICE_POLLING
283static poll_handler_t igb_poll;
284#endif /* POLLING */
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t igb_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, igb_probe),
293	DEVMETHOD(device_attach, igb_attach),
294	DEVMETHOD(device_detach, igb_detach),
295	DEVMETHOD(device_shutdown, igb_shutdown),
296	DEVMETHOD(device_suspend, igb_suspend),
297	DEVMETHOD(device_resume, igb_resume),
298	{0, 0}
299};
300
301static driver_t igb_driver = {
302	"igb", igb_methods, sizeof(struct adapter),
303};
304
305static devclass_t igb_devclass;
306DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307MODULE_DEPEND(igb, pci, 1, 1, 1);
308MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316/* Descriptor defaults */
317static int igb_rxd = IGB_DEFAULT_RXD;
318static int igb_txd = IGB_DEFAULT_TXD;
319TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320TUNABLE_INT("hw.igb.txd", &igb_txd);
321SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322    "Number of receive descriptors per queue");
323SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324    "Number of transmit descriptors per queue");
325
326/*
327** AIM: Adaptive Interrupt Moderation
328** which means that the interrupt rate
329** is varied over time based on the
330** traffic for that interrupt vector
331*/
332static int igb_enable_aim = TRUE;
333TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335    "Enable adaptive interrupt moderation");
336
337/*
338 * MSIX should be the default for best performance,
339 * but this allows it to be forced off for testing.
340 */
341static int igb_enable_msix = 1;
342TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344    "Enable MSI-X interrupts");
345
346/*
347** Tuneable Interrupt rate
348*/
349static int igb_max_interrupt_rate = 8000;
350TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354/*
355** Header split causes the packet header to
356** be dma'd to a seperate mbuf from the payload.
357** this can have memory alignment benefits. But
358** another plus is that small packets often fit
359** into the header and thus use no cluster. Its
360** a very workload dependent type feature.
361*/
362static int igb_header_split = FALSE;
363TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
364SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
365    "Enable receive mbuf header split");
366
367/*
368** This will autoconfigure based on
369** the number of CPUs if left at 0.
370*/
371static int igb_num_queues = 0;
372TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
373SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
374    "Number of queues to configure, 0 indicates autoconfigure");
375
376/*
377** Global variable to store last used CPU when binding queues
378** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
379** queue is bound to a cpu.
380*/
381static int igb_last_bind_cpu = -1;
382
383/* How many packets rxeof tries to clean at a time */
384static int igb_rx_process_limit = 100;
385TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
386SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387    &igb_rx_process_limit, 0,
388    "Maximum number of received packets to process at a time, -1 means unlimited");
389
390#ifdef DEV_NETMAP	/* see ixgbe.c for details */
391#include <dev/netmap/if_igb_netmap.h>
392#endif /* DEV_NETMAP */
393/*********************************************************************
394 *  Device identification routine
395 *
396 *  igb_probe determines if the driver should be loaded on
397 *  adapter based on PCI vendor/device id of the adapter.
398 *
399 *  return BUS_PROBE_DEFAULT on success, positive on failure
400 *********************************************************************/
401
402static int
403igb_probe(device_t dev)
404{
405	char		adapter_name[60];
406	uint16_t	pci_vendor_id = 0;
407	uint16_t	pci_device_id = 0;
408	uint16_t	pci_subvendor_id = 0;
409	uint16_t	pci_subdevice_id = 0;
410	igb_vendor_info_t *ent;
411
412	INIT_DEBUGOUT("igb_probe: begin");
413
414	pci_vendor_id = pci_get_vendor(dev);
415	if (pci_vendor_id != IGB_VENDOR_ID)
416		return (ENXIO);
417
418	pci_device_id = pci_get_device(dev);
419	pci_subvendor_id = pci_get_subvendor(dev);
420	pci_subdevice_id = pci_get_subdevice(dev);
421
422	ent = igb_vendor_info_array;
423	while (ent->vendor_id != 0) {
424		if ((pci_vendor_id == ent->vendor_id) &&
425		    (pci_device_id == ent->device_id) &&
426
427		    ((pci_subvendor_id == ent->subvendor_id) ||
428		    (ent->subvendor_id == PCI_ANY_ID)) &&
429
430		    ((pci_subdevice_id == ent->subdevice_id) ||
431		    (ent->subdevice_id == PCI_ANY_ID))) {
432			sprintf(adapter_name, "%s %s",
433				igb_strings[ent->index],
434				igb_driver_version);
435			device_set_desc_copy(dev, adapter_name);
436			return (BUS_PROBE_DEFAULT);
437		}
438		ent++;
439	}
440
441	return (ENXIO);
442}
443
444/*********************************************************************
445 *  Device initialization routine
446 *
447 *  The attach entry point is called when the driver is being loaded.
448 *  This routine identifies the type of hardware, allocates all resources
449 *  and initializes the hardware.
450 *
451 *  return 0 on success, positive on failure
452 *********************************************************************/
453
454static int
455igb_attach(device_t dev)
456{
457	struct adapter	*adapter;
458	int		error = 0;
459	u16		eeprom_data;
460
461	INIT_DEBUGOUT("igb_attach: begin");
462
463	if (resource_disabled("igb", device_get_unit(dev))) {
464		device_printf(dev, "Disabled by device hint\n");
465		return (ENXIO);
466	}
467
468	adapter = device_get_softc(dev);
469	adapter->dev = adapter->osdep.dev = dev;
470	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
471
472	/* SYSCTL stuff */
473	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
476	    igb_sysctl_nvm_info, "I", "NVM Information");
477
478	igb_set_sysctl_value(adapter, "enable_aim",
479	    "Interrupt Moderation", &adapter->enable_aim,
480	    igb_enable_aim);
481
482	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
485	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
486
487	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
488
489	/* Determine hardware and mac info */
490	igb_identify_hardware(adapter);
491
492	/* Setup PCI resources */
493	if (igb_allocate_pci_resources(adapter)) {
494		device_printf(dev, "Allocation of PCI resources failed\n");
495		error = ENXIO;
496		goto err_pci;
497	}
498
499	/* Do Shared Code initialization */
500	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
501		device_printf(dev, "Setup of Shared code failed\n");
502		error = ENXIO;
503		goto err_pci;
504	}
505
506	e1000_get_bus_info(&adapter->hw);
507
508	/* Sysctl for limiting the amount of work done in the taskqueue */
509	igb_set_sysctl_value(adapter, "rx_processing_limit",
510	    "max number of rx packets to process",
511	    &adapter->rx_process_limit, igb_rx_process_limit);
512
513	/*
514	 * Validate number of transmit and receive descriptors. It
515	 * must not exceed hardware maximum, and must be multiple
516	 * of E1000_DBA_ALIGN.
517	 */
518	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
519	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
520		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
521		    IGB_DEFAULT_TXD, igb_txd);
522		adapter->num_tx_desc = IGB_DEFAULT_TXD;
523	} else
524		adapter->num_tx_desc = igb_txd;
525	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
526	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
527		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
528		    IGB_DEFAULT_RXD, igb_rxd);
529		adapter->num_rx_desc = IGB_DEFAULT_RXD;
530	} else
531		adapter->num_rx_desc = igb_rxd;
532
533	adapter->hw.mac.autoneg = DO_AUTO_NEG;
534	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
535	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
536
537	/* Copper options */
538	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
539		adapter->hw.phy.mdix = AUTO_ALL_MODES;
540		adapter->hw.phy.disable_polarity_correction = FALSE;
541		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
542	}
543
544	/*
545	 * Set the frame limits assuming
546	 * standard ethernet sized frames.
547	 */
548	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
549	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
550
551	/*
552	** Allocate and Setup Queues
553	*/
554	if (igb_allocate_queues(adapter)) {
555		error = ENOMEM;
556		goto err_pci;
557	}
558
559	/* Allocate the appropriate stats memory */
560	if (adapter->vf_ifp) {
561		adapter->stats =
562		    (struct e1000_vf_stats *)malloc(sizeof \
563		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
564		igb_vf_init_stats(adapter);
565	} else
566		adapter->stats =
567		    (struct e1000_hw_stats *)malloc(sizeof \
568		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
569	if (adapter->stats == NULL) {
570		device_printf(dev, "Can not allocate stats memory\n");
571		error = ENOMEM;
572		goto err_late;
573	}
574
575	/* Allocate multicast array memory. */
576	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
577	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
578	if (adapter->mta == NULL) {
579		device_printf(dev, "Can not allocate multicast setup array\n");
580		error = ENOMEM;
581		goto err_late;
582	}
583
584	/* Some adapter-specific advanced features */
585	if (adapter->hw.mac.type >= e1000_i350) {
586		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
587		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
588		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
589		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
590		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
591		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
592		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
593		    adapter, 0, igb_sysctl_eee, "I",
594		    "Disable Energy Efficient Ethernet");
595		if (adapter->hw.phy.media_type == e1000_media_type_copper)
596			e1000_set_eee_i350(&adapter->hw);
597	}
598
599	/*
600	** Start from a known state, this is
601	** important in reading the nvm and
602	** mac from that.
603	*/
604	e1000_reset_hw(&adapter->hw);
605
606	/* Make sure we have a good EEPROM before we read from it */
607	if (((adapter->hw.mac.type != e1000_i210) &&
608	    (adapter->hw.mac.type != e1000_i211)) &&
609	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
610		/*
611		** Some PCI-E parts fail the first check due to
612		** the link being in sleep state, call it again,
613		** if it fails a second time its a real issue.
614		*/
615		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
616			device_printf(dev,
617			    "The EEPROM Checksum Is Not Valid\n");
618			error = EIO;
619			goto err_late;
620		}
621	}
622
623	/*
624	** Copy the permanent MAC address out of the EEPROM
625	*/
626	if (e1000_read_mac_addr(&adapter->hw) < 0) {
627		device_printf(dev, "EEPROM read error while reading MAC"
628		    " address\n");
629		error = EIO;
630		goto err_late;
631	}
632	/* Check its sanity */
633	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
634		device_printf(dev, "Invalid MAC address\n");
635		error = EIO;
636		goto err_late;
637	}
638
639	/* Setup OS specific network interface */
640	if (igb_setup_interface(dev, adapter) != 0)
641		goto err_late;
642
643	/* Now get a good starting state */
644	igb_reset(adapter);
645
646	/* Initialize statistics */
647	igb_update_stats_counters(adapter);
648
649	adapter->hw.mac.get_link_status = 1;
650	igb_update_link_status(adapter);
651
652	/* Indicate SOL/IDER usage */
653	if (e1000_check_reset_block(&adapter->hw))
654		device_printf(dev,
655		    "PHY reset is blocked due to SOL/IDER session.\n");
656
657	/* Determine if we have to control management hardware */
658	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
659
660	/*
661	 * Setup Wake-on-Lan
662	 */
663	/* APME bit in EEPROM is mapped to WUC.APME */
664	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
665	if (eeprom_data)
666		adapter->wol = E1000_WUFC_MAG;
667
668	/* Register for VLAN events */
669	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
670	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
671	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
672	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
673
674	igb_add_hw_stats(adapter);
675
676	/* Tell the stack that the interface is not active */
677	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
678	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
679
680	adapter->led_dev = led_create(igb_led_func, adapter,
681	    device_get_nameunit(dev));
682
683	/*
684	** Configure Interrupts
685	*/
686	if ((adapter->msix > 1) && (igb_enable_msix))
687		error = igb_allocate_msix(adapter);
688	else /* MSI or Legacy */
689		error = igb_allocate_legacy(adapter);
690	if (error)
691		goto err_late;
692
693#ifdef DEV_NETMAP
694	igb_netmap_attach(adapter);
695#endif /* DEV_NETMAP */
696	INIT_DEBUGOUT("igb_attach: end");
697
698	return (0);
699
700err_late:
701	igb_detach(dev);
702	igb_free_transmit_structures(adapter);
703	igb_free_receive_structures(adapter);
704	igb_release_hw_control(adapter);
705err_pci:
706	igb_free_pci_resources(adapter);
707	if (adapter->ifp != NULL)
708		if_free(adapter->ifp);
709	free(adapter->mta, M_DEVBUF);
710	IGB_CORE_LOCK_DESTROY(adapter);
711
712	return (error);
713}
714
715/*********************************************************************
716 *  Device removal routine
717 *
718 *  The detach entry point is called when the driver is being removed.
719 *  This routine stops the adapter and deallocates all the resources
720 *  that were allocated for driver operation.
721 *
722 *  return 0 on success, positive on failure
723 *********************************************************************/
724
725static int
726igb_detach(device_t dev)
727{
728	struct adapter	*adapter = device_get_softc(dev);
729	struct ifnet	*ifp = adapter->ifp;
730
731	INIT_DEBUGOUT("igb_detach: begin");
732
733	/* Make sure VLANS are not using driver */
734	if (adapter->ifp->if_vlantrunk != NULL) {
735		device_printf(dev,"Vlan in use, detach first\n");
736		return (EBUSY);
737	}
738
739	ether_ifdetach(adapter->ifp);
740
741	if (adapter->led_dev != NULL)
742		led_destroy(adapter->led_dev);
743
744#ifdef DEVICE_POLLING
745	if (ifp->if_capenable & IFCAP_POLLING)
746		ether_poll_deregister(ifp);
747#endif
748
749	IGB_CORE_LOCK(adapter);
750	adapter->in_detach = 1;
751	igb_stop(adapter);
752	IGB_CORE_UNLOCK(adapter);
753
754	e1000_phy_hw_reset(&adapter->hw);
755
756	/* Give control back to firmware */
757	igb_release_manageability(adapter);
758	igb_release_hw_control(adapter);
759
760	if (adapter->wol) {
761		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
762		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
763		igb_enable_wakeup(dev);
764	}
765
766	/* Unregister VLAN events */
767	if (adapter->vlan_attach != NULL)
768		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
769	if (adapter->vlan_detach != NULL)
770		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
771
772	callout_drain(&adapter->timer);
773
774#ifdef DEV_NETMAP
775	netmap_detach(adapter->ifp);
776#endif /* DEV_NETMAP */
777	igb_free_pci_resources(adapter);
778	bus_generic_detach(dev);
779	if_free(ifp);
780
781	igb_free_transmit_structures(adapter);
782	igb_free_receive_structures(adapter);
783	if (adapter->mta != NULL)
784		free(adapter->mta, M_DEVBUF);
785
786	IGB_CORE_LOCK_DESTROY(adapter);
787
788	return (0);
789}
790
791/*********************************************************************
792 *
793 *  Shutdown entry point
794 *
795 **********************************************************************/
796
797static int
798igb_shutdown(device_t dev)
799{
800	return igb_suspend(dev);
801}
802
803/*
804 * Suspend/resume device methods.
805 */
806static int
807igb_suspend(device_t dev)
808{
809	struct adapter *adapter = device_get_softc(dev);
810
811	IGB_CORE_LOCK(adapter);
812
813	igb_stop(adapter);
814
815        igb_release_manageability(adapter);
816	igb_release_hw_control(adapter);
817
818        if (adapter->wol) {
819                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
820                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
821                igb_enable_wakeup(dev);
822        }
823
824	IGB_CORE_UNLOCK(adapter);
825
826	return bus_generic_suspend(dev);
827}
828
829static int
830igb_resume(device_t dev)
831{
832	struct adapter *adapter = device_get_softc(dev);
833	struct tx_ring	*txr = adapter->tx_rings;
834	struct ifnet *ifp = adapter->ifp;
835
836	IGB_CORE_LOCK(adapter);
837	igb_init_locked(adapter);
838	igb_init_manageability(adapter);
839
840	if ((ifp->if_flags & IFF_UP) &&
841	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
842		for (int i = 0; i < adapter->num_queues; i++, txr++) {
843			IGB_TX_LOCK(txr);
844#if __FreeBSD_version >= 800000
845			/* Process the stack queue only if not depleted */
846			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
847			    !drbr_empty(ifp, txr->br))
848				igb_mq_start_locked(ifp, txr, NULL);
849#else
850			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
851				igb_start_locked(txr, ifp);
852#endif
853			IGB_TX_UNLOCK(txr);
854		}
855	}
856	IGB_CORE_UNLOCK(adapter);
857
858	return bus_generic_resume(dev);
859}
860
861
862#if __FreeBSD_version < 800000
863
864/*********************************************************************
865 *  Transmit entry point
866 *
867 *  igb_start is called by the stack to initiate a transmit.
868 *  The driver will remain in this routine as long as there are
869 *  packets to transmit and transmit resources are available.
870 *  In case resources are not available stack is notified and
871 *  the packet is requeued.
872 **********************************************************************/
873
874static void
875igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
876{
877	struct adapter	*adapter = ifp->if_softc;
878	struct mbuf	*m_head;
879
880	IGB_TX_LOCK_ASSERT(txr);
881
882	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
883	    IFF_DRV_RUNNING)
884		return;
885	if (!adapter->link_active)
886		return;
887
888	/* Call cleanup if number of TX descriptors low */
889	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
890		igb_txeof(txr);
891
892	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
893		if (txr->tx_avail <= IGB_MAX_SCATTER) {
894			txr->queue_status |= IGB_QUEUE_DEPLETED;
895			break;
896		}
897		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
898		if (m_head == NULL)
899			break;
900		/*
901		 *  Encapsulation can modify our pointer, and or make it
902		 *  NULL on failure.  In that event, we can't requeue.
903		 */
904		if (igb_xmit(txr, &m_head)) {
905			if (m_head != NULL)
906				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
907			if (txr->tx_avail <= IGB_MAX_SCATTER)
908				txr->queue_status |= IGB_QUEUE_DEPLETED;
909			break;
910		}
911
912		/* Send a copy of the frame to the BPF listener */
913		ETHER_BPF_MTAP(ifp, m_head);
914
915		/* Set watchdog on */
916		txr->watchdog_time = ticks;
917		txr->queue_status |= IGB_QUEUE_WORKING;
918	}
919}
920
921/*
922 * Legacy TX driver routine, called from the
923 * stack, always uses tx[0], and spins for it.
924 * Should not be used with multiqueue tx
925 */
926static void
927igb_start(struct ifnet *ifp)
928{
929	struct adapter	*adapter = ifp->if_softc;
930	struct tx_ring	*txr = adapter->tx_rings;
931
932	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
933		IGB_TX_LOCK(txr);
934		igb_start_locked(txr, ifp);
935		IGB_TX_UNLOCK(txr);
936	}
937	return;
938}
939
940#else /* __FreeBSD_version >= 800000 */
941
942/*
943** Multiqueue Transmit driver
944**
945*/
946static int
947igb_mq_start(struct ifnet *ifp, struct mbuf *m)
948{
949	struct adapter		*adapter = ifp->if_softc;
950	struct igb_queue	*que;
951	struct tx_ring		*txr;
952	int 			i, err = 0;
953
954	/* Which queue to use */
955	if ((m->m_flags & M_FLOWID) != 0)
956		i = m->m_pkthdr.flowid % adapter->num_queues;
957	else
958		i = curcpu % adapter->num_queues;
959
960	txr = &adapter->tx_rings[i];
961	que = &adapter->queues[i];
962	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
963	    IGB_TX_TRYLOCK(txr)) {
964		err = igb_mq_start_locked(ifp, txr, m);
965		IGB_TX_UNLOCK(txr);
966	} else {
967		err = drbr_enqueue(ifp, txr->br, m);
968		taskqueue_enqueue(que->tq, &txr->txq_task);
969	}
970
971	return (err);
972}
973
974static int
975igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
976{
977	struct adapter  *adapter = txr->adapter;
978        struct mbuf     *next;
979        int             err = 0, enq;
980
981	IGB_TX_LOCK_ASSERT(txr);
982
983	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
984	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
985	    adapter->link_active == 0) {
986		if (m != NULL)
987			err = drbr_enqueue(ifp, txr->br, m);
988		return (err);
989	}
990
991	enq = 0;
992	if (m == NULL) {
993		next = drbr_dequeue(ifp, txr->br);
994	} else if (drbr_needs_enqueue(ifp, txr->br)) {
995		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
996			return (err);
997		next = drbr_dequeue(ifp, txr->br);
998	} else
999		next = m;
1000
1001	/* Process the queue */
1002	while (next != NULL) {
1003		if ((err = igb_xmit(txr, &next)) != 0) {
1004			if (next != NULL)
1005				err = drbr_enqueue(ifp, txr->br, next);
1006			break;
1007		}
1008		enq++;
1009		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
1010		ETHER_BPF_MTAP(ifp, next);
1011		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1012			break;
1013		next = drbr_dequeue(ifp, txr->br);
1014	}
1015	if (enq > 0) {
1016		/* Set the watchdog */
1017		txr->queue_status |= IGB_QUEUE_WORKING;
1018		txr->watchdog_time = ticks;
1019	}
1020	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1021		igb_txeof(txr);
1022	if (txr->tx_avail <= IGB_MAX_SCATTER)
1023		txr->queue_status |= IGB_QUEUE_DEPLETED;
1024	return (err);
1025}
1026
1027/*
1028 * Called from a taskqueue to drain queued transmit packets.
1029 */
1030static void
1031igb_deferred_mq_start(void *arg, int pending)
1032{
1033	struct tx_ring *txr = arg;
1034	struct adapter *adapter = txr->adapter;
1035	struct ifnet *ifp = adapter->ifp;
1036
1037	IGB_TX_LOCK(txr);
1038	if (!drbr_empty(ifp, txr->br))
1039		igb_mq_start_locked(ifp, txr, NULL);
1040	IGB_TX_UNLOCK(txr);
1041}
1042
1043/*
1044** Flush all ring buffers
1045*/
1046static void
1047igb_qflush(struct ifnet *ifp)
1048{
1049	struct adapter	*adapter = ifp->if_softc;
1050	struct tx_ring	*txr = adapter->tx_rings;
1051	struct mbuf	*m;
1052
1053	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1054		IGB_TX_LOCK(txr);
1055		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1056			m_freem(m);
1057		IGB_TX_UNLOCK(txr);
1058	}
1059	if_qflush(ifp);
1060}
1061#endif /* __FreeBSD_version >= 800000 */
1062
1063/*********************************************************************
1064 *  Ioctl entry point
1065 *
1066 *  igb_ioctl is called when the user wants to configure the
1067 *  interface.
1068 *
1069 *  return 0 on success, positive on failure
1070 **********************************************************************/
1071
1072static int
1073igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1074{
1075	struct adapter	*adapter = ifp->if_softc;
1076	struct ifreq	*ifr = (struct ifreq *)data;
1077#if defined(INET) || defined(INET6)
1078	struct ifaddr	*ifa = (struct ifaddr *)data;
1079#endif
1080	bool		avoid_reset = FALSE;
1081	int		error = 0;
1082
1083	if (adapter->in_detach)
1084		return (error);
1085
1086	switch (command) {
1087	case SIOCSIFADDR:
1088#ifdef INET
1089		if (ifa->ifa_addr->sa_family == AF_INET)
1090			avoid_reset = TRUE;
1091#endif
1092#ifdef INET6
1093		if (ifa->ifa_addr->sa_family == AF_INET6)
1094			avoid_reset = TRUE;
1095#endif
1096		/*
1097		** Calling init results in link renegotiation,
1098		** so we avoid doing it when possible.
1099		*/
1100		if (avoid_reset) {
1101			ifp->if_flags |= IFF_UP;
1102			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1103				igb_init(adapter);
1104#ifdef INET
1105			if (!(ifp->if_flags & IFF_NOARP))
1106				arp_ifinit(ifp, ifa);
1107#endif
1108		} else
1109			error = ether_ioctl(ifp, command, data);
1110		break;
1111	case SIOCSIFMTU:
1112	    {
1113		int max_frame_size;
1114
1115		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1116
1117		IGB_CORE_LOCK(adapter);
1118		max_frame_size = 9234;
1119		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1120		    ETHER_CRC_LEN) {
1121			IGB_CORE_UNLOCK(adapter);
1122			error = EINVAL;
1123			break;
1124		}
1125
1126		ifp->if_mtu = ifr->ifr_mtu;
1127		adapter->max_frame_size =
1128		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1129		igb_init_locked(adapter);
1130		IGB_CORE_UNLOCK(adapter);
1131		break;
1132	    }
1133	case SIOCSIFFLAGS:
1134		IOCTL_DEBUGOUT("ioctl rcv'd:\
1135		    SIOCSIFFLAGS (Set Interface Flags)");
1136		IGB_CORE_LOCK(adapter);
1137		if (ifp->if_flags & IFF_UP) {
1138			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1139				if ((ifp->if_flags ^ adapter->if_flags) &
1140				    (IFF_PROMISC | IFF_ALLMULTI)) {
1141					igb_disable_promisc(adapter);
1142					igb_set_promisc(adapter);
1143				}
1144			} else
1145				igb_init_locked(adapter);
1146		} else
1147			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1148				igb_stop(adapter);
1149		adapter->if_flags = ifp->if_flags;
1150		IGB_CORE_UNLOCK(adapter);
1151		break;
1152	case SIOCADDMULTI:
1153	case SIOCDELMULTI:
1154		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1155		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1156			IGB_CORE_LOCK(adapter);
1157			igb_disable_intr(adapter);
1158			igb_set_multi(adapter);
1159#ifdef DEVICE_POLLING
1160			if (!(ifp->if_capenable & IFCAP_POLLING))
1161#endif
1162				igb_enable_intr(adapter);
1163			IGB_CORE_UNLOCK(adapter);
1164		}
1165		break;
1166	case SIOCSIFMEDIA:
1167		/* Check SOL/IDER usage */
1168		IGB_CORE_LOCK(adapter);
1169		if (e1000_check_reset_block(&adapter->hw)) {
1170			IGB_CORE_UNLOCK(adapter);
1171			device_printf(adapter->dev, "Media change is"
1172			    " blocked due to SOL/IDER session.\n");
1173			break;
1174		}
1175		IGB_CORE_UNLOCK(adapter);
1176	case SIOCGIFMEDIA:
1177		IOCTL_DEBUGOUT("ioctl rcv'd: \
1178		    SIOCxIFMEDIA (Get/Set Interface Media)");
1179		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1180		break;
1181	case SIOCSIFCAP:
1182	    {
1183		int mask, reinit;
1184
1185		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1186		reinit = 0;
1187		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1188#ifdef DEVICE_POLLING
1189		if (mask & IFCAP_POLLING) {
1190			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1191				error = ether_poll_register(igb_poll, ifp);
1192				if (error)
1193					return (error);
1194				IGB_CORE_LOCK(adapter);
1195				igb_disable_intr(adapter);
1196				ifp->if_capenable |= IFCAP_POLLING;
1197				IGB_CORE_UNLOCK(adapter);
1198			} else {
1199				error = ether_poll_deregister(ifp);
1200				/* Enable interrupt even in error case */
1201				IGB_CORE_LOCK(adapter);
1202				igb_enable_intr(adapter);
1203				ifp->if_capenable &= ~IFCAP_POLLING;
1204				IGB_CORE_UNLOCK(adapter);
1205			}
1206		}
1207#endif
1208		if (mask & IFCAP_HWCSUM) {
1209			ifp->if_capenable ^= IFCAP_HWCSUM;
1210			reinit = 1;
1211		}
1212		if (mask & IFCAP_TSO4) {
1213			ifp->if_capenable ^= IFCAP_TSO4;
1214			reinit = 1;
1215		}
1216		if (mask & IFCAP_VLAN_HWTAGGING) {
1217			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1218			reinit = 1;
1219		}
1220		if (mask & IFCAP_VLAN_HWFILTER) {
1221			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1222			reinit = 1;
1223		}
1224		if (mask & IFCAP_VLAN_HWTSO) {
1225			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1226			reinit = 1;
1227		}
1228		if (mask & IFCAP_LRO) {
1229			ifp->if_capenable ^= IFCAP_LRO;
1230			reinit = 1;
1231		}
1232		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1233			igb_init(adapter);
1234		VLAN_CAPABILITIES(ifp);
1235		break;
1236	    }
1237
1238	default:
1239		error = ether_ioctl(ifp, command, data);
1240		break;
1241	}
1242
1243	return (error);
1244}
1245
1246
1247/*********************************************************************
1248 *  Init entry point
1249 *
1250 *  This routine is used in two ways. It is used by the stack as
1251 *  init entry point in network interface structure. It is also used
1252 *  by the driver as a hw/sw initialization routine to get to a
1253 *  consistent state.
1254 *
1255 *  return 0 on success, positive on failure
1256 **********************************************************************/
1257
1258static void
1259igb_init_locked(struct adapter *adapter)
1260{
1261	struct ifnet	*ifp = adapter->ifp;
1262	device_t	dev = adapter->dev;
1263
1264	INIT_DEBUGOUT("igb_init: begin");
1265
1266	IGB_CORE_LOCK_ASSERT(adapter);
1267
1268	igb_disable_intr(adapter);
1269	callout_stop(&adapter->timer);
1270
1271	/* Get the latest mac address, User can use a LAA */
1272        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1273              ETHER_ADDR_LEN);
1274
1275	/* Put the address into the Receive Address Array */
1276	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1277
1278	igb_reset(adapter);
1279	igb_update_link_status(adapter);
1280
1281	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1282
1283	/* Set hardware offload abilities */
1284	ifp->if_hwassist = 0;
1285	if (ifp->if_capenable & IFCAP_TXCSUM) {
1286		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1287#if __FreeBSD_version >= 800000
1288		if (adapter->hw.mac.type == e1000_82576)
1289			ifp->if_hwassist |= CSUM_SCTP;
1290#endif
1291	}
1292
1293	if (ifp->if_capenable & IFCAP_TSO4)
1294		ifp->if_hwassist |= CSUM_TSO;
1295
1296	/* Configure for OS presence */
1297	igb_init_manageability(adapter);
1298
1299	/* Prepare transmit descriptors and buffers */
1300	igb_setup_transmit_structures(adapter);
1301	igb_initialize_transmit_units(adapter);
1302
1303	/* Setup Multicast table */
1304	igb_set_multi(adapter);
1305
1306	/*
1307	** Figure out the desired mbuf pool
1308	** for doing jumbo/packetsplit
1309	*/
1310	if (adapter->max_frame_size <= 2048)
1311		adapter->rx_mbuf_sz = MCLBYTES;
1312	else if (adapter->max_frame_size <= 4096)
1313		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1314	else
1315		adapter->rx_mbuf_sz = MJUM9BYTES;
1316
1317	/* Prepare receive descriptors and buffers */
1318	if (igb_setup_receive_structures(adapter)) {
1319		device_printf(dev, "Could not setup receive structures\n");
1320		return;
1321	}
1322	igb_initialize_receive_units(adapter);
1323
1324        /* Enable VLAN support */
1325	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1326		igb_setup_vlan_hw_support(adapter);
1327
1328	/* Don't lose promiscuous settings */
1329	igb_set_promisc(adapter);
1330
1331	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1332	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1333
1334	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1335	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1336
1337	if (adapter->msix > 1) /* Set up queue routing */
1338		igb_configure_queues(adapter);
1339
1340	/* this clears any pending interrupts */
1341	E1000_READ_REG(&adapter->hw, E1000_ICR);
1342#ifdef DEVICE_POLLING
1343	/*
1344	 * Only enable interrupts if we are not polling, make sure
1345	 * they are off otherwise.
1346	 */
1347	if (ifp->if_capenable & IFCAP_POLLING)
1348		igb_disable_intr(adapter);
1349	else
1350#endif /* DEVICE_POLLING */
1351	{
1352		igb_enable_intr(adapter);
1353		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1354	}
1355
1356	/* Set Energy Efficient Ethernet */
1357	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1358		e1000_set_eee_i350(&adapter->hw);
1359}
1360
1361static void
1362igb_init(void *arg)
1363{
1364	struct adapter *adapter = arg;
1365
1366	IGB_CORE_LOCK(adapter);
1367	igb_init_locked(adapter);
1368	IGB_CORE_UNLOCK(adapter);
1369}
1370
1371
1372static void
1373igb_handle_que(void *context, int pending)
1374{
1375	struct igb_queue *que = context;
1376	struct adapter *adapter = que->adapter;
1377	struct tx_ring *txr = que->txr;
1378	struct ifnet	*ifp = adapter->ifp;
1379
1380	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1381		bool	more;
1382
1383		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1384
1385		IGB_TX_LOCK(txr);
1386		igb_txeof(txr);
1387#if __FreeBSD_version >= 800000
1388		/* Process the stack queue only if not depleted */
1389		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1390		    !drbr_empty(ifp, txr->br))
1391			igb_mq_start_locked(ifp, txr, NULL);
1392#else
1393		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1394			igb_start_locked(txr, ifp);
1395#endif
1396		IGB_TX_UNLOCK(txr);
1397		/* Do we need another? */
1398		if (more) {
1399			taskqueue_enqueue(que->tq, &que->que_task);
1400			return;
1401		}
1402	}
1403
1404#ifdef DEVICE_POLLING
1405	if (ifp->if_capenable & IFCAP_POLLING)
1406		return;
1407#endif
1408	/* Reenable this interrupt */
1409	if (que->eims)
1410		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1411	else
1412		igb_enable_intr(adapter);
1413}
1414
1415/* Deal with link in a sleepable context */
1416static void
1417igb_handle_link(void *context, int pending)
1418{
1419	struct adapter *adapter = context;
1420
1421	IGB_CORE_LOCK(adapter);
1422	igb_handle_link_locked(adapter);
1423	IGB_CORE_UNLOCK(adapter);
1424}
1425
1426static void
1427igb_handle_link_locked(struct adapter *adapter)
1428{
1429	struct tx_ring	*txr = adapter->tx_rings;
1430	struct ifnet *ifp = adapter->ifp;
1431
1432	IGB_CORE_LOCK_ASSERT(adapter);
1433	adapter->hw.mac.get_link_status = 1;
1434	igb_update_link_status(adapter);
1435	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1436		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1437			IGB_TX_LOCK(txr);
1438#if __FreeBSD_version >= 800000
1439			/* Process the stack queue only if not depleted */
1440			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1441			    !drbr_empty(ifp, txr->br))
1442				igb_mq_start_locked(ifp, txr, NULL);
1443#else
1444			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445				igb_start_locked(txr, ifp);
1446#endif
1447			IGB_TX_UNLOCK(txr);
1448		}
1449	}
1450}
1451
1452/*********************************************************************
1453 *
1454 *  MSI/Legacy Deferred
1455 *  Interrupt Service routine
1456 *
1457 *********************************************************************/
1458static int
1459igb_irq_fast(void *arg)
1460{
1461	struct adapter		*adapter = arg;
1462	struct igb_queue	*que = adapter->queues;
1463	u32			reg_icr;
1464
1465
1466	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467
1468	/* Hot eject?  */
1469	if (reg_icr == 0xffffffff)
1470		return FILTER_STRAY;
1471
1472	/* Definitely not our interrupt.  */
1473	if (reg_icr == 0x0)
1474		return FILTER_STRAY;
1475
1476	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1477		return FILTER_STRAY;
1478
1479	/*
1480	 * Mask interrupts until the taskqueue is finished running.  This is
1481	 * cheap, just assume that it is needed.  This also works around the
1482	 * MSI message reordering errata on certain systems.
1483	 */
1484	igb_disable_intr(adapter);
1485	taskqueue_enqueue(que->tq, &que->que_task);
1486
1487	/* Link status change */
1488	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1489		taskqueue_enqueue(que->tq, &adapter->link_task);
1490
1491	if (reg_icr & E1000_ICR_RXO)
1492		adapter->rx_overruns++;
1493	return FILTER_HANDLED;
1494}
1495
1496#ifdef DEVICE_POLLING
1497/*********************************************************************
1498 *
1499 *  Legacy polling routine : if using this code you MUST be sure that
1500 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1501 *
1502 *********************************************************************/
1503#if __FreeBSD_version >= 800000
1504#define POLL_RETURN_COUNT(a) (a)
1505static int
1506#else
1507#define POLL_RETURN_COUNT(a)
1508static void
1509#endif
1510igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1511{
1512	struct adapter		*adapter = ifp->if_softc;
1513	struct igb_queue	*que = adapter->queues;
1514	struct tx_ring		*txr = adapter->tx_rings;
1515	u32			reg_icr, rx_done = 0;
1516	u32			loop = IGB_MAX_LOOP;
1517	bool			more;
1518
1519	IGB_CORE_LOCK(adapter);
1520	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1521		IGB_CORE_UNLOCK(adapter);
1522		return POLL_RETURN_COUNT(rx_done);
1523	}
1524
1525	if (cmd == POLL_AND_CHECK_STATUS) {
1526		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1527		/* Link status change */
1528		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1529			igb_handle_link_locked(adapter);
1530
1531		if (reg_icr & E1000_ICR_RXO)
1532			adapter->rx_overruns++;
1533	}
1534	IGB_CORE_UNLOCK(adapter);
1535
1536	igb_rxeof(que, count, &rx_done);
1537
1538	IGB_TX_LOCK(txr);
1539	do {
1540		more = igb_txeof(txr);
1541	} while (loop-- && more);
1542#if __FreeBSD_version >= 800000
1543	if (!drbr_empty(ifp, txr->br))
1544		igb_mq_start_locked(ifp, txr, NULL);
1545#else
1546	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547		igb_start_locked(txr, ifp);
1548#endif
1549	IGB_TX_UNLOCK(txr);
1550	return POLL_RETURN_COUNT(rx_done);
1551}
1552#endif /* DEVICE_POLLING */
1553
1554/*********************************************************************
1555 *
1556 *  MSIX Que Interrupt Service routine
1557 *
1558 **********************************************************************/
1559static void
1560igb_msix_que(void *arg)
1561{
1562	struct igb_queue *que = arg;
1563	struct adapter *adapter = que->adapter;
1564	struct ifnet   *ifp = adapter->ifp;
1565	struct tx_ring *txr = que->txr;
1566	struct rx_ring *rxr = que->rxr;
1567	u32		newitr = 0;
1568	bool		more_rx;
1569
1570	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1571	++que->irqs;
1572
1573	IGB_TX_LOCK(txr);
1574	igb_txeof(txr);
1575#if __FreeBSD_version >= 800000
1576	/* Process the stack queue only if not depleted */
1577	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1578	    !drbr_empty(ifp, txr->br))
1579		igb_mq_start_locked(ifp, txr, NULL);
1580#else
1581	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1582		igb_start_locked(txr, ifp);
1583#endif
1584	IGB_TX_UNLOCK(txr);
1585
1586	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1587
1588	if (adapter->enable_aim == FALSE)
1589		goto no_calc;
1590	/*
1591	** Do Adaptive Interrupt Moderation:
1592        **  - Write out last calculated setting
1593	**  - Calculate based on average size over
1594	**    the last interval.
1595	*/
1596        if (que->eitr_setting)
1597                E1000_WRITE_REG(&adapter->hw,
1598                    E1000_EITR(que->msix), que->eitr_setting);
1599
1600        que->eitr_setting = 0;
1601
1602        /* Idle, do nothing */
1603        if ((txr->bytes == 0) && (rxr->bytes == 0))
1604                goto no_calc;
1605
1606        /* Used half Default if sub-gig */
1607        if (adapter->link_speed != 1000)
1608                newitr = IGB_DEFAULT_ITR / 2;
1609        else {
1610		if ((txr->bytes) && (txr->packets))
1611                	newitr = txr->bytes/txr->packets;
1612		if ((rxr->bytes) && (rxr->packets))
1613			newitr = max(newitr,
1614			    (rxr->bytes / rxr->packets));
1615                newitr += 24; /* account for hardware frame, crc */
1616		/* set an upper boundary */
1617		newitr = min(newitr, 3000);
1618		/* Be nice to the mid range */
1619                if ((newitr > 300) && (newitr < 1200))
1620                        newitr = (newitr / 3);
1621                else
1622                        newitr = (newitr / 2);
1623        }
1624        newitr &= 0x7FFC;  /* Mask invalid bits */
1625        if (adapter->hw.mac.type == e1000_82575)
1626                newitr |= newitr << 16;
1627        else
1628                newitr |= E1000_EITR_CNT_IGNR;
1629
1630        /* save for next interrupt */
1631        que->eitr_setting = newitr;
1632
1633        /* Reset state */
1634        txr->bytes = 0;
1635        txr->packets = 0;
1636        rxr->bytes = 0;
1637        rxr->packets = 0;
1638
1639no_calc:
1640	/* Schedule a clean task if needed*/
1641	if (more_rx)
1642		taskqueue_enqueue(que->tq, &que->que_task);
1643	else
1644		/* Reenable this interrupt */
1645		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1646	return;
1647}
1648
1649
1650/*********************************************************************
1651 *
1652 *  MSIX Link Interrupt Service routine
1653 *
1654 **********************************************************************/
1655
1656static void
1657igb_msix_link(void *arg)
1658{
1659	struct adapter	*adapter = arg;
1660	u32       	icr;
1661
1662	++adapter->link_irq;
1663	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1664	if (!(icr & E1000_ICR_LSC))
1665		goto spurious;
1666	igb_handle_link(adapter, 0);
1667
1668spurious:
1669	/* Rearm */
1670	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1671	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1672	return;
1673}
1674
1675
1676/*********************************************************************
1677 *
1678 *  Media Ioctl callback
1679 *
1680 *  This routine is called whenever the user queries the status of
1681 *  the interface using ifconfig.
1682 *
1683 **********************************************************************/
1684static void
1685igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1686{
1687	struct adapter *adapter = ifp->if_softc;
1688	u_char fiber_type = IFM_1000_SX;
1689
1690	INIT_DEBUGOUT("igb_media_status: begin");
1691
1692	IGB_CORE_LOCK(adapter);
1693	igb_update_link_status(adapter);
1694
1695	ifmr->ifm_status = IFM_AVALID;
1696	ifmr->ifm_active = IFM_ETHER;
1697
1698	if (!adapter->link_active) {
1699		IGB_CORE_UNLOCK(adapter);
1700		return;
1701	}
1702
1703	ifmr->ifm_status |= IFM_ACTIVE;
1704
1705	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1706	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1707		ifmr->ifm_active |= fiber_type | IFM_FDX;
1708	else {
1709		switch (adapter->link_speed) {
1710		case 10:
1711			ifmr->ifm_active |= IFM_10_T;
1712			break;
1713		case 100:
1714			ifmr->ifm_active |= IFM_100_TX;
1715			break;
1716		case 1000:
1717			ifmr->ifm_active |= IFM_1000_T;
1718			break;
1719		}
1720		if (adapter->link_duplex == FULL_DUPLEX)
1721			ifmr->ifm_active |= IFM_FDX;
1722		else
1723			ifmr->ifm_active |= IFM_HDX;
1724	}
1725	IGB_CORE_UNLOCK(adapter);
1726}
1727
1728/*********************************************************************
1729 *
1730 *  Media Ioctl callback
1731 *
1732 *  This routine is called when the user changes speed/duplex using
1733 *  media/mediopt option with ifconfig.
1734 *
1735 **********************************************************************/
1736static int
1737igb_media_change(struct ifnet *ifp)
1738{
1739	struct adapter *adapter = ifp->if_softc;
1740	struct ifmedia  *ifm = &adapter->media;
1741
1742	INIT_DEBUGOUT("igb_media_change: begin");
1743
1744	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1745		return (EINVAL);
1746
1747	IGB_CORE_LOCK(adapter);
1748	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1749	case IFM_AUTO:
1750		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1751		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1752		break;
1753	case IFM_1000_LX:
1754	case IFM_1000_SX:
1755	case IFM_1000_T:
1756		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1757		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1758		break;
1759	case IFM_100_TX:
1760		adapter->hw.mac.autoneg = FALSE;
1761		adapter->hw.phy.autoneg_advertised = 0;
1762		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1763			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1764		else
1765			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1766		break;
1767	case IFM_10_T:
1768		adapter->hw.mac.autoneg = FALSE;
1769		adapter->hw.phy.autoneg_advertised = 0;
1770		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1772		else
1773			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1774		break;
1775	default:
1776		device_printf(adapter->dev, "Unsupported media type\n");
1777	}
1778
1779	igb_init_locked(adapter);
1780	IGB_CORE_UNLOCK(adapter);
1781
1782	return (0);
1783}
1784
1785
1786/*********************************************************************
1787 *
1788 *  This routine maps the mbufs to Advanced TX descriptors.
1789 *
1790 **********************************************************************/
1791static int
1792igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793{
1794	struct adapter		*adapter = txr->adapter;
1795	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1796	bus_dmamap_t		map;
1797	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1798	union e1000_adv_tx_desc	*txd = NULL;
1799	struct mbuf		*m_head = *m_headp;
1800	struct ether_vlan_header *eh = NULL;
1801	struct ip		*ip = NULL;
1802	struct tcphdr		*th = NULL;
1803	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1804	int			ehdrlen, poff;
1805	int			nsegs, i, first, last = 0;
1806	int			error, do_tso, remap = 1;
1807
1808	/* Set basic descriptor constants */
1809	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1810	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1811	if (m_head->m_flags & M_VLANTAG)
1812		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1813
1814retry:
1815	m_head = *m_headp;
1816	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1817	hdrlen = ehdrlen = poff = 0;
1818
1819	/*
1820	 * Intel recommends entire IP/TCP header length reside in a single
1821	 * buffer. If multiple descriptors are used to describe the IP and
1822	 * TCP header, each descriptor should describe one or more
1823	 * complete headers; descriptors referencing only parts of headers
1824	 * are not supported. If all layer headers are not coalesced into
1825	 * a single buffer, each buffer should not cross a 4KB boundary,
1826	 * or be larger than the maximum read request size.
1827	 * Controller also requires modifing IP/TCP header to make TSO work
1828	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1829	 * IP/TCP header into a single buffer to meet the requirement of
1830	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1831	 * which also has similiar restrictions.
1832	 */
1833	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1834		if (do_tso || (m_head->m_next != NULL &&
1835		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1836			if (M_WRITABLE(*m_headp) == 0) {
1837				m_head = m_dup(*m_headp, M_DONTWAIT);
1838				m_freem(*m_headp);
1839				if (m_head == NULL) {
1840					*m_headp = NULL;
1841					return (ENOBUFS);
1842				}
1843				*m_headp = m_head;
1844			}
1845		}
1846		/*
1847		 * Assume IPv4, we don't have TSO/checksum offload support
1848		 * for IPv6 yet.
1849		 */
1850		ehdrlen = sizeof(struct ether_header);
1851		m_head = m_pullup(m_head, ehdrlen);
1852		if (m_head == NULL) {
1853			*m_headp = NULL;
1854			return (ENOBUFS);
1855		}
1856		eh = mtod(m_head, struct ether_vlan_header *);
1857		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1858			ehdrlen = sizeof(struct ether_vlan_header);
1859			m_head = m_pullup(m_head, ehdrlen);
1860			if (m_head == NULL) {
1861				*m_headp = NULL;
1862				return (ENOBUFS);
1863			}
1864		}
1865		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1866		if (m_head == NULL) {
1867			*m_headp = NULL;
1868			return (ENOBUFS);
1869		}
1870		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1871		poff = ehdrlen + (ip->ip_hl << 2);
1872		if (do_tso) {
1873			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878			/*
1879			 * The pseudo TCP checksum does not include TCP payload
1880			 * length so driver should recompute the checksum here
1881			 * what hardware expect to see. This is adherence of
1882			 * Microsoft's Large Send specification.
1883			 */
1884			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1885			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1886			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1887			/* Keep track of the full header length */
1888			hdrlen = poff + (th->th_off << 2);
1889		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1890			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1891			if (m_head == NULL) {
1892				*m_headp = NULL;
1893				return (ENOBUFS);
1894			}
1895			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1896			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1897			if (m_head == NULL) {
1898				*m_headp = NULL;
1899				return (ENOBUFS);
1900			}
1901			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1902			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1904			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1905			if (m_head == NULL) {
1906				*m_headp = NULL;
1907				return (ENOBUFS);
1908			}
1909			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1910		}
1911		*m_headp = m_head;
1912	}
1913
1914	/*
1915	 * Map the packet for DMA
1916	 *
1917	 * Capture the first descriptor index,
1918	 * this descriptor will have the index
1919	 * of the EOP which is the only one that
1920	 * now gets a DONE bit writeback.
1921	 */
1922	first = txr->next_avail_desc;
1923	tx_buffer = &txr->tx_buffers[first];
1924	tx_buffer_mapped = tx_buffer;
1925	map = tx_buffer->map;
1926
1927	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1928	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1929
1930	/*
1931	 * There are two types of errors we can (try) to handle:
1932	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1933	 *   out of segments.  Defragment the mbuf chain and try again.
1934	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1935	 *   at this point in time.  Defer sending and try again later.
1936	 * All other errors, in particular EINVAL, are fatal and prevent the
1937	 * mbuf chain from ever going through.  Drop it and report error.
1938	 */
1939	if (error == EFBIG && remap) {
1940		struct mbuf *m;
1941
1942		m = m_defrag(*m_headp, M_DONTWAIT);
1943		if (m == NULL) {
1944			adapter->mbuf_defrag_failed++;
1945			m_freem(*m_headp);
1946			*m_headp = NULL;
1947			return (ENOBUFS);
1948		}
1949		*m_headp = m;
1950
1951		/* Try it again, but only once */
1952		remap = 0;
1953		goto retry;
1954	} else if (error == ENOMEM) {
1955		adapter->no_tx_dma_setup++;
1956		return (error);
1957	} else if (error != 0) {
1958		adapter->no_tx_dma_setup++;
1959		m_freem(*m_headp);
1960		*m_headp = NULL;
1961		return (error);
1962	}
1963
1964	/*
1965	** Make sure we don't overrun the ring,
1966	** we need nsegs descriptors and one for
1967	** the context descriptor used for the
1968	** offloads.
1969	*/
1970        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1971                txr->no_desc_avail++;
1972		bus_dmamap_unload(txr->txtag, map);
1973		return (ENOBUFS);
1974        }
1975	m_head = *m_headp;
1976
1977	/* Do hardware assists:
1978         * Set up the context descriptor, used
1979         * when any hardware offload is done.
1980         * This includes CSUM, VLAN, and TSO.
1981         * It will use the first descriptor.
1982         */
1983
1984	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1985		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1986			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1987			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1988			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1989		} else
1990			return (ENXIO);
1991	} else if (igb_tx_ctx_setup(txr, m_head))
1992			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1993
1994	/* Calculate payload length */
1995	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1996	    << E1000_ADVTXD_PAYLEN_SHIFT);
1997
1998	/* 82575 needs the queue index added */
1999	if (adapter->hw.mac.type == e1000_82575)
2000		olinfo_status |= txr->me << 4;
2001
2002	/* Set up our transmit descriptors */
2003	i = txr->next_avail_desc;
2004	for (int j = 0; j < nsegs; j++) {
2005		bus_size_t seg_len;
2006		bus_addr_t seg_addr;
2007
2008		tx_buffer = &txr->tx_buffers[i];
2009		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2010		seg_addr = segs[j].ds_addr;
2011		seg_len  = segs[j].ds_len;
2012
2013		txd->read.buffer_addr = htole64(seg_addr);
2014		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2015		txd->read.olinfo_status = htole32(olinfo_status);
2016		last = i;
2017		if (++i == adapter->num_tx_desc)
2018			i = 0;
2019		tx_buffer->m_head = NULL;
2020		tx_buffer->next_eop = -1;
2021	}
2022
2023	txr->next_avail_desc = i;
2024	txr->tx_avail -= nsegs;
2025        tx_buffer->m_head = m_head;
2026
2027	/*
2028	** Here we swap the map so the last descriptor,
2029	** which gets the completion interrupt has the
2030	** real map, and the first descriptor gets the
2031	** unused map from this descriptor.
2032	*/
2033	tx_buffer_mapped->map = tx_buffer->map;
2034	tx_buffer->map = map;
2035        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2036
2037        /*
2038         * Last Descriptor of Packet
2039	 * needs End Of Packet (EOP)
2040	 * and Report Status (RS)
2041         */
2042        txd->read.cmd_type_len |=
2043	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2044	/*
2045	 * Keep track in the first buffer which
2046	 * descriptor will be written back
2047	 */
2048	tx_buffer = &txr->tx_buffers[first];
2049	tx_buffer->next_eop = last;
2050	/* Update the watchdog time early and often */
2051	txr->watchdog_time = ticks;
2052
2053	/*
2054	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2055	 * that this frame is available to transmit.
2056	 */
2057	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2058	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2059	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2060	++txr->tx_packets;
2061
2062	return (0);
2063}
2064static void
2065igb_set_promisc(struct adapter *adapter)
2066{
2067	struct ifnet	*ifp = adapter->ifp;
2068	struct e1000_hw *hw = &adapter->hw;
2069	u32		reg;
2070
2071	if (adapter->vf_ifp) {
2072		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2073		return;
2074	}
2075
2076	reg = E1000_READ_REG(hw, E1000_RCTL);
2077	if (ifp->if_flags & IFF_PROMISC) {
2078		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2079		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2080	} else if (ifp->if_flags & IFF_ALLMULTI) {
2081		reg |= E1000_RCTL_MPE;
2082		reg &= ~E1000_RCTL_UPE;
2083		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2084	}
2085}
2086
2087static void
2088igb_disable_promisc(struct adapter *adapter)
2089{
2090	struct e1000_hw *hw = &adapter->hw;
2091	u32		reg;
2092
2093	if (adapter->vf_ifp) {
2094		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2095		return;
2096	}
2097	reg = E1000_READ_REG(hw, E1000_RCTL);
2098	reg &=  (~E1000_RCTL_UPE);
2099	reg &=  (~E1000_RCTL_MPE);
2100	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2101}
2102
2103
2104/*********************************************************************
2105 *  Multicast Update
2106 *
2107 *  This routine is called whenever multicast address list is updated.
2108 *
2109 **********************************************************************/
2110
2111static void
2112igb_set_multi(struct adapter *adapter)
2113{
2114	struct ifnet	*ifp = adapter->ifp;
2115	struct ifmultiaddr *ifma;
2116	u32 reg_rctl = 0;
2117	u8  *mta;
2118
2119	int mcnt = 0;
2120
2121	IOCTL_DEBUGOUT("igb_set_multi: begin");
2122
2123	mta = adapter->mta;
2124	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2125	    MAX_NUM_MULTICAST_ADDRESSES);
2126
2127#if __FreeBSD_version < 800000
2128	IF_ADDR_LOCK(ifp);
2129#else
2130	if_maddr_rlock(ifp);
2131#endif
2132	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2133		if (ifma->ifma_addr->sa_family != AF_LINK)
2134			continue;
2135
2136		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2137			break;
2138
2139		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2140		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2141		mcnt++;
2142	}
2143#if __FreeBSD_version < 800000
2144	IF_ADDR_UNLOCK(ifp);
2145#else
2146	if_maddr_runlock(ifp);
2147#endif
2148
2149	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2150		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2151		reg_rctl |= E1000_RCTL_MPE;
2152		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2153	} else
2154		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2155}
2156
2157
2158/*********************************************************************
2159 *  Timer routine:
2160 *  	This routine checks for link status,
2161 *	updates statistics, and does the watchdog.
2162 *
2163 **********************************************************************/
2164
2165static void
2166igb_local_timer(void *arg)
2167{
2168	struct adapter		*adapter = arg;
2169	device_t		dev = adapter->dev;
2170	struct ifnet		*ifp = adapter->ifp;
2171	struct tx_ring		*txr = adapter->tx_rings;
2172	struct igb_queue	*que = adapter->queues;
2173	int			hung = 0, busy = 0;
2174
2175
2176	IGB_CORE_LOCK_ASSERT(adapter);
2177
2178	igb_update_link_status(adapter);
2179	igb_update_stats_counters(adapter);
2180
2181        /*
2182        ** Check the TX queues status
2183	**	- central locked handling of OACTIVE
2184	**	- watchdog only if all queues show hung
2185        */
2186	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2187		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2188		    (adapter->pause_frames == 0))
2189			++hung;
2190		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2191			++busy;
2192		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2193			taskqueue_enqueue(que->tq, &que->que_task);
2194	}
2195	if (hung == adapter->num_queues)
2196		goto timeout;
2197	if (busy == adapter->num_queues)
2198		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2199	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2200	    (busy < adapter->num_queues))
2201		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2202
2203	adapter->pause_frames = 0;
2204	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2205#ifndef DEVICE_POLLING
2206	/* Schedule all queue interrupts - deadlock protection */
2207	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2208#endif
2209	return;
2210
2211timeout:
2212	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2213	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2214            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2215            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2216	device_printf(dev,"TX(%d) desc avail = %d,"
2217            "Next TX to Clean = %d\n",
2218            txr->me, txr->tx_avail, txr->next_to_clean);
2219	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2220	adapter->watchdog_events++;
2221	igb_init_locked(adapter);
2222}
2223
2224static void
2225igb_update_link_status(struct adapter *adapter)
2226{
2227	struct e1000_hw *hw = &adapter->hw;
2228	struct ifnet *ifp = adapter->ifp;
2229	device_t dev = adapter->dev;
2230	struct tx_ring *txr = adapter->tx_rings;
2231	u32 link_check, thstat, ctrl;
2232
2233	link_check = thstat = ctrl = 0;
2234
2235	/* Get the cached link value or read for real */
2236        switch (hw->phy.media_type) {
2237        case e1000_media_type_copper:
2238                if (hw->mac.get_link_status) {
2239			/* Do the work to read phy */
2240                        e1000_check_for_link(hw);
2241                        link_check = !hw->mac.get_link_status;
2242                } else
2243                        link_check = TRUE;
2244                break;
2245        case e1000_media_type_fiber:
2246                e1000_check_for_link(hw);
2247                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2248                                 E1000_STATUS_LU);
2249                break;
2250        case e1000_media_type_internal_serdes:
2251                e1000_check_for_link(hw);
2252                link_check = adapter->hw.mac.serdes_has_link;
2253                break;
2254	/* VF device is type_unknown */
2255        case e1000_media_type_unknown:
2256                e1000_check_for_link(hw);
2257		link_check = !hw->mac.get_link_status;
2258		/* Fall thru */
2259        default:
2260                break;
2261        }
2262
2263	/* Check for thermal downshift or shutdown */
2264	if (hw->mac.type == e1000_i350) {
2265		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2266		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2267	}
2268
2269	/* Now we check if a transition has happened */
2270	if (link_check && (adapter->link_active == 0)) {
2271		e1000_get_speed_and_duplex(&adapter->hw,
2272		    &adapter->link_speed, &adapter->link_duplex);
2273		if (bootverbose)
2274			device_printf(dev, "Link is up %d Mbps %s\n",
2275			    adapter->link_speed,
2276			    ((adapter->link_duplex == FULL_DUPLEX) ?
2277			    "Full Duplex" : "Half Duplex"));
2278		adapter->link_active = 1;
2279		ifp->if_baudrate = adapter->link_speed * 1000000;
2280		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2281		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2282			device_printf(dev, "Link: thermal downshift\n");
2283		/* This can sleep */
2284		if_link_state_change(ifp, LINK_STATE_UP);
2285	} else if (!link_check && (adapter->link_active == 1)) {
2286		ifp->if_baudrate = adapter->link_speed = 0;
2287		adapter->link_duplex = 0;
2288		if (bootverbose)
2289			device_printf(dev, "Link is Down\n");
2290		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2291		    (thstat & E1000_THSTAT_PWR_DOWN))
2292			device_printf(dev, "Link: thermal shutdown\n");
2293		adapter->link_active = 0;
2294		/* This can sleep */
2295		if_link_state_change(ifp, LINK_STATE_DOWN);
2296		/* Reset queue state */
2297		for (int i = 0; i < adapter->num_queues; i++, txr++)
2298			txr->queue_status = IGB_QUEUE_IDLE;
2299	}
2300}
2301
2302/*********************************************************************
2303 *
2304 *  This routine disables all traffic on the adapter by issuing a
2305 *  global reset on the MAC and deallocates TX/RX buffers.
2306 *
2307 **********************************************************************/
2308
2309static void
2310igb_stop(void *arg)
2311{
2312	struct adapter	*adapter = arg;
2313	struct ifnet	*ifp = adapter->ifp;
2314	struct tx_ring *txr = adapter->tx_rings;
2315
2316	IGB_CORE_LOCK_ASSERT(adapter);
2317
2318	INIT_DEBUGOUT("igb_stop: begin");
2319
2320	igb_disable_intr(adapter);
2321
2322	callout_stop(&adapter->timer);
2323
2324	/* Tell the stack that the interface is no longer active */
2325	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2326	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2327
2328	/* Disarm watchdog timer. */
2329	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2330		IGB_TX_LOCK(txr);
2331		txr->queue_status = IGB_QUEUE_IDLE;
2332		IGB_TX_UNLOCK(txr);
2333	}
2334
2335	e1000_reset_hw(&adapter->hw);
2336	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2337
2338	e1000_led_off(&adapter->hw);
2339	e1000_cleanup_led(&adapter->hw);
2340}
2341
2342
2343/*********************************************************************
2344 *
2345 *  Determine hardware revision.
2346 *
2347 **********************************************************************/
2348static void
2349igb_identify_hardware(struct adapter *adapter)
2350{
2351	device_t dev = adapter->dev;
2352
2353	/* Make sure our PCI config space has the necessary stuff set */
2354	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2355	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2356	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2357		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2358		    "bits were not set!\n");
2359		adapter->hw.bus.pci_cmd_word |=
2360		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2361		pci_write_config(dev, PCIR_COMMAND,
2362		    adapter->hw.bus.pci_cmd_word, 2);
2363	}
2364
2365	/* Save off the information about this board */
2366	adapter->hw.vendor_id = pci_get_vendor(dev);
2367	adapter->hw.device_id = pci_get_device(dev);
2368	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2369	adapter->hw.subsystem_vendor_id =
2370	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2371	adapter->hw.subsystem_device_id =
2372	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2373
2374	/* Set MAC type early for PCI setup */
2375	e1000_set_mac_type(&adapter->hw);
2376
2377	/* Are we a VF device? */
2378	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2379	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2380		adapter->vf_ifp = 1;
2381	else
2382		adapter->vf_ifp = 0;
2383}
2384
2385static int
2386igb_allocate_pci_resources(struct adapter *adapter)
2387{
2388	device_t	dev = adapter->dev;
2389	int		rid;
2390
2391	rid = PCIR_BAR(0);
2392	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2393	    &rid, RF_ACTIVE);
2394	if (adapter->pci_mem == NULL) {
2395		device_printf(dev, "Unable to allocate bus resource: memory\n");
2396		return (ENXIO);
2397	}
2398	adapter->osdep.mem_bus_space_tag =
2399	    rman_get_bustag(adapter->pci_mem);
2400	adapter->osdep.mem_bus_space_handle =
2401	    rman_get_bushandle(adapter->pci_mem);
2402	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2403
2404	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2405
2406	/* This will setup either MSI/X or MSI */
2407	adapter->msix = igb_setup_msix(adapter);
2408	adapter->hw.back = &adapter->osdep;
2409
2410	return (0);
2411}
2412
2413/*********************************************************************
2414 *
2415 *  Setup the Legacy or MSI Interrupt handler
2416 *
2417 **********************************************************************/
2418static int
2419igb_allocate_legacy(struct adapter *adapter)
2420{
2421	device_t		dev = adapter->dev;
2422	struct igb_queue	*que = adapter->queues;
2423	struct tx_ring		*txr = adapter->tx_rings;
2424	int			error, rid = 0;
2425
2426	/* Turn off all interrupts */
2427	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2428
2429	/* MSI RID is 1 */
2430	if (adapter->msix == 1)
2431		rid = 1;
2432
2433	/* We allocate a single interrupt resource */
2434	adapter->res = bus_alloc_resource_any(dev,
2435	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2436	if (adapter->res == NULL) {
2437		device_printf(dev, "Unable to allocate bus resource: "
2438		    "interrupt\n");
2439		return (ENXIO);
2440	}
2441
2442#if __FreeBSD_version >= 800000
2443	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2444#endif
2445
2446	/*
2447	 * Try allocating a fast interrupt and the associated deferred
2448	 * processing contexts.
2449	 */
2450	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2451	/* Make tasklet for deferred link handling */
2452	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2453	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2454	    taskqueue_thread_enqueue, &que->tq);
2455	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2456	    device_get_nameunit(adapter->dev));
2457	if ((error = bus_setup_intr(dev, adapter->res,
2458	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2459	    adapter, &adapter->tag)) != 0) {
2460		device_printf(dev, "Failed to register fast interrupt "
2461			    "handler: %d\n", error);
2462		taskqueue_free(que->tq);
2463		que->tq = NULL;
2464		return (error);
2465	}
2466
2467	return (0);
2468}
2469
2470
2471/*********************************************************************
2472 *
2473 *  Setup the MSIX Queue Interrupt handlers:
2474 *
2475 **********************************************************************/
2476static int
2477igb_allocate_msix(struct adapter *adapter)
2478{
2479	device_t		dev = adapter->dev;
2480	struct igb_queue	*que = adapter->queues;
2481	int			error, rid, vector = 0;
2482
2483	/* Be sure to start with all interrupts disabled */
2484	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2485	E1000_WRITE_FLUSH(&adapter->hw);
2486
2487	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2488		rid = vector +1;
2489		que->res = bus_alloc_resource_any(dev,
2490		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2491		if (que->res == NULL) {
2492			device_printf(dev,
2493			    "Unable to allocate bus resource: "
2494			    "MSIX Queue Interrupt\n");
2495			return (ENXIO);
2496		}
2497		error = bus_setup_intr(dev, que->res,
2498	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2499		    igb_msix_que, que, &que->tag);
2500		if (error) {
2501			que->res = NULL;
2502			device_printf(dev, "Failed to register Queue handler");
2503			return (error);
2504		}
2505#if __FreeBSD_version >= 800504
2506		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2507#endif
2508		que->msix = vector;
2509		if (adapter->hw.mac.type == e1000_82575)
2510			que->eims = E1000_EICR_TX_QUEUE0 << i;
2511		else
2512			que->eims = 1 << vector;
2513		/*
2514		** Bind the msix vector, and thus the
2515		** rings to the corresponding cpu.
2516		*/
2517		if (adapter->num_queues > 1) {
2518			if (igb_last_bind_cpu < 0)
2519				igb_last_bind_cpu = CPU_FIRST();
2520			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2521			device_printf(dev,
2522				"Bound queue %d to cpu %d\n",
2523				i,igb_last_bind_cpu);
2524			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2525			igb_last_bind_cpu = igb_last_bind_cpu % mp_ncpus;
2526		}
2527#if __FreeBSD_version >= 800000
2528		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2529		    que->txr);
2530#endif
2531		/* Make tasklet for deferred handling */
2532		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2533		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2534		    taskqueue_thread_enqueue, &que->tq);
2535		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2536		    device_get_nameunit(adapter->dev));
2537	}
2538
2539	/* And Link */
2540	rid = vector + 1;
2541	adapter->res = bus_alloc_resource_any(dev,
2542	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2543	if (adapter->res == NULL) {
2544		device_printf(dev,
2545		    "Unable to allocate bus resource: "
2546		    "MSIX Link Interrupt\n");
2547		return (ENXIO);
2548	}
2549	if ((error = bus_setup_intr(dev, adapter->res,
2550	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2551	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2552		device_printf(dev, "Failed to register Link handler");
2553		return (error);
2554	}
2555#if __FreeBSD_version >= 800504
2556	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2557#endif
2558	adapter->linkvec = vector;
2559
2560	return (0);
2561}
2562
2563
2564static void
2565igb_configure_queues(struct adapter *adapter)
2566{
2567	struct	e1000_hw	*hw = &adapter->hw;
2568	struct	igb_queue	*que;
2569	u32			tmp, ivar = 0, newitr = 0;
2570
2571	/* First turn on RSS capability */
2572	if (adapter->hw.mac.type != e1000_82575)
2573		E1000_WRITE_REG(hw, E1000_GPIE,
2574		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2575		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2576
2577	/* Turn on MSIX */
2578	switch (adapter->hw.mac.type) {
2579	case e1000_82580:
2580	case e1000_i350:
2581	case e1000_i210:
2582	case e1000_i211:
2583	case e1000_vfadapt:
2584	case e1000_vfadapt_i350:
2585		/* RX entries */
2586		for (int i = 0; i < adapter->num_queues; i++) {
2587			u32 index = i >> 1;
2588			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2589			que = &adapter->queues[i];
2590			if (i & 1) {
2591				ivar &= 0xFF00FFFF;
2592				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2593			} else {
2594				ivar &= 0xFFFFFF00;
2595				ivar |= que->msix | E1000_IVAR_VALID;
2596			}
2597			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2598		}
2599		/* TX entries */
2600		for (int i = 0; i < adapter->num_queues; i++) {
2601			u32 index = i >> 1;
2602			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2603			que = &adapter->queues[i];
2604			if (i & 1) {
2605				ivar &= 0x00FFFFFF;
2606				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2607			} else {
2608				ivar &= 0xFFFF00FF;
2609				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2610			}
2611			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2612			adapter->que_mask |= que->eims;
2613		}
2614
2615		/* And for the link interrupt */
2616		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2617		adapter->link_mask = 1 << adapter->linkvec;
2618		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2619		break;
2620	case e1000_82576:
2621		/* RX entries */
2622		for (int i = 0; i < adapter->num_queues; i++) {
2623			u32 index = i & 0x7; /* Each IVAR has two entries */
2624			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2625			que = &adapter->queues[i];
2626			if (i < 8) {
2627				ivar &= 0xFFFFFF00;
2628				ivar |= que->msix | E1000_IVAR_VALID;
2629			} else {
2630				ivar &= 0xFF00FFFF;
2631				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2632			}
2633			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2634			adapter->que_mask |= que->eims;
2635		}
2636		/* TX entries */
2637		for (int i = 0; i < adapter->num_queues; i++) {
2638			u32 index = i & 0x7; /* Each IVAR has two entries */
2639			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2640			que = &adapter->queues[i];
2641			if (i < 8) {
2642				ivar &= 0xFFFF00FF;
2643				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2644			} else {
2645				ivar &= 0x00FFFFFF;
2646				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2647			}
2648			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2649			adapter->que_mask |= que->eims;
2650		}
2651
2652		/* And for the link interrupt */
2653		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2654		adapter->link_mask = 1 << adapter->linkvec;
2655		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2656		break;
2657
2658	case e1000_82575:
2659                /* enable MSI-X support*/
2660		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2661                tmp |= E1000_CTRL_EXT_PBA_CLR;
2662                /* Auto-Mask interrupts upon ICR read. */
2663                tmp |= E1000_CTRL_EXT_EIAME;
2664                tmp |= E1000_CTRL_EXT_IRCA;
2665                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2666
2667		/* Queues */
2668		for (int i = 0; i < adapter->num_queues; i++) {
2669			que = &adapter->queues[i];
2670			tmp = E1000_EICR_RX_QUEUE0 << i;
2671			tmp |= E1000_EICR_TX_QUEUE0 << i;
2672			que->eims = tmp;
2673			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2674			    i, que->eims);
2675			adapter->que_mask |= que->eims;
2676		}
2677
2678		/* Link */
2679		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2680		    E1000_EIMS_OTHER);
2681		adapter->link_mask |= E1000_EIMS_OTHER;
2682	default:
2683		break;
2684	}
2685
2686	/* Set the starting interrupt rate */
2687	if (igb_max_interrupt_rate > 0)
2688		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2689
2690        if (hw->mac.type == e1000_82575)
2691                newitr |= newitr << 16;
2692        else
2693                newitr |= E1000_EITR_CNT_IGNR;
2694
2695	for (int i = 0; i < adapter->num_queues; i++) {
2696		que = &adapter->queues[i];
2697		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2698	}
2699
2700	return;
2701}
2702
2703
2704static void
2705igb_free_pci_resources(struct adapter *adapter)
2706{
2707	struct		igb_queue *que = adapter->queues;
2708	device_t	dev = adapter->dev;
2709	int		rid;
2710
2711	/*
2712	** There is a slight possibility of a failure mode
2713	** in attach that will result in entering this function
2714	** before interrupt resources have been initialized, and
2715	** in that case we do not want to execute the loops below
2716	** We can detect this reliably by the state of the adapter
2717	** res pointer.
2718	*/
2719	if (adapter->res == NULL)
2720		goto mem;
2721
2722	/*
2723	 * First release all the interrupt resources:
2724	 */
2725	for (int i = 0; i < adapter->num_queues; i++, que++) {
2726		rid = que->msix + 1;
2727		if (que->tag != NULL) {
2728			bus_teardown_intr(dev, que->res, que->tag);
2729			que->tag = NULL;
2730		}
2731		if (que->res != NULL)
2732			bus_release_resource(dev,
2733			    SYS_RES_IRQ, rid, que->res);
2734	}
2735
2736	/* Clean the Legacy or Link interrupt last */
2737	if (adapter->linkvec) /* we are doing MSIX */
2738		rid = adapter->linkvec + 1;
2739	else
2740		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2741
2742	que = adapter->queues;
2743	if (adapter->tag != NULL) {
2744		taskqueue_drain(que->tq, &adapter->link_task);
2745		bus_teardown_intr(dev, adapter->res, adapter->tag);
2746		adapter->tag = NULL;
2747	}
2748	if (adapter->res != NULL)
2749		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2750
2751	for (int i = 0; i < adapter->num_queues; i++, que++) {
2752		if (que->tq != NULL) {
2753#if __FreeBSD_version >= 800000
2754			taskqueue_drain(que->tq, &que->txr->txq_task);
2755#endif
2756			taskqueue_drain(que->tq, &que->que_task);
2757			taskqueue_free(que->tq);
2758		}
2759	}
2760mem:
2761	if (adapter->msix)
2762		pci_release_msi(dev);
2763
2764	if (adapter->msix_mem != NULL)
2765		bus_release_resource(dev, SYS_RES_MEMORY,
2766		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2767
2768	if (adapter->pci_mem != NULL)
2769		bus_release_resource(dev, SYS_RES_MEMORY,
2770		    PCIR_BAR(0), adapter->pci_mem);
2771
2772}
2773
2774/*
2775 * Setup Either MSI/X or MSI
2776 */
2777static int
2778igb_setup_msix(struct adapter *adapter)
2779{
2780	device_t dev = adapter->dev;
2781	int rid, want, queues, msgs, maxqueues;
2782
2783	/* tuneable override */
2784	if (igb_enable_msix == 0)
2785		goto msi;
2786
2787	/* First try MSI/X */
2788	rid = PCIR_BAR(IGB_MSIX_BAR);
2789	adapter->msix_mem = bus_alloc_resource_any(dev,
2790	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2791       	if (!adapter->msix_mem) {
2792		/* May not be enabled */
2793		device_printf(adapter->dev,
2794		    "Unable to map MSIX table \n");
2795		goto msi;
2796	}
2797
2798	msgs = pci_msix_count(dev);
2799	if (msgs == 0) { /* system has msix disabled */
2800		bus_release_resource(dev, SYS_RES_MEMORY,
2801		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2802		adapter->msix_mem = NULL;
2803		goto msi;
2804	}
2805
2806	/* Figure out a reasonable auto config value */
2807	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2808
2809	/* Manual override */
2810	if (igb_num_queues != 0)
2811		queues = igb_num_queues;
2812
2813	/* Sanity check based on HW */
2814	switch (adapter->hw.mac.type) {
2815		case e1000_82575:
2816			maxqueues = 4;
2817			break;
2818		case e1000_82576:
2819		case e1000_82580:
2820		case e1000_i350:
2821			maxqueues = 8;
2822			break;
2823		case e1000_i210:
2824			maxqueues = 4;
2825			break;
2826		case e1000_i211:
2827			maxqueues = 2;
2828			break;
2829		default:  /* VF interfaces */
2830			maxqueues = 1;
2831			break;
2832	}
2833	if (queues > maxqueues)
2834		queues = maxqueues;
2835
2836	/*
2837	** One vector (RX/TX pair) per queue
2838	** plus an additional for Link interrupt
2839	*/
2840	want = queues + 1;
2841	if (msgs >= want)
2842		msgs = want;
2843	else {
2844               	device_printf(adapter->dev,
2845		    "MSIX Configuration Problem, "
2846		    "%d vectors configured, but %d queues wanted!\n",
2847		    msgs, want);
2848		return (0);
2849	}
2850	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2851               	device_printf(adapter->dev,
2852		    "Using MSIX interrupts with %d vectors\n", msgs);
2853		adapter->num_queues = queues;
2854		return (msgs);
2855	}
2856msi:
2857       	msgs = pci_msi_count(dev);
2858	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2859		device_printf(adapter->dev," Using MSI interrupt\n");
2860		return (msgs);
2861	}
2862	return (0);
2863}
2864
2865/*********************************************************************
2866 *
2867 *  Set up an fresh starting state
2868 *
2869 **********************************************************************/
2870static void
2871igb_reset(struct adapter *adapter)
2872{
2873	device_t	dev = adapter->dev;
2874	struct e1000_hw *hw = &adapter->hw;
2875	struct e1000_fc_info *fc = &hw->fc;
2876	struct ifnet	*ifp = adapter->ifp;
2877	u32		pba = 0;
2878	u16		hwm;
2879
2880	INIT_DEBUGOUT("igb_reset: begin");
2881
2882	/* Let the firmware know the OS is in control */
2883	igb_get_hw_control(adapter);
2884
2885	/*
2886	 * Packet Buffer Allocation (PBA)
2887	 * Writing PBA sets the receive portion of the buffer
2888	 * the remainder is used for the transmit buffer.
2889	 */
2890	switch (hw->mac.type) {
2891	case e1000_82575:
2892		pba = E1000_PBA_32K;
2893		break;
2894	case e1000_82576:
2895	case e1000_vfadapt:
2896		pba = E1000_READ_REG(hw, E1000_RXPBS);
2897		pba &= E1000_RXPBS_SIZE_MASK_82576;
2898		break;
2899	case e1000_82580:
2900	case e1000_i350:
2901	case e1000_vfadapt_i350:
2902		pba = E1000_READ_REG(hw, E1000_RXPBS);
2903		pba = e1000_rxpbs_adjust_82580(pba);
2904		break;
2905	case e1000_i210:
2906	case e1000_i211:
2907		pba = E1000_PBA_34K;
2908	default:
2909		break;
2910	}
2911
2912	/* Special needs in case of Jumbo frames */
2913	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2914		u32 tx_space, min_tx, min_rx;
2915		pba = E1000_READ_REG(hw, E1000_PBA);
2916		tx_space = pba >> 16;
2917		pba &= 0xffff;
2918		min_tx = (adapter->max_frame_size +
2919		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2920		min_tx = roundup2(min_tx, 1024);
2921		min_tx >>= 10;
2922                min_rx = adapter->max_frame_size;
2923                min_rx = roundup2(min_rx, 1024);
2924                min_rx >>= 10;
2925		if (tx_space < min_tx &&
2926		    ((min_tx - tx_space) < pba)) {
2927			pba = pba - (min_tx - tx_space);
2928			/*
2929                         * if short on rx space, rx wins
2930                         * and must trump tx adjustment
2931			 */
2932                        if (pba < min_rx)
2933                                pba = min_rx;
2934		}
2935		E1000_WRITE_REG(hw, E1000_PBA, pba);
2936	}
2937
2938	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2939
2940	/*
2941	 * These parameters control the automatic generation (Tx) and
2942	 * response (Rx) to Ethernet PAUSE frames.
2943	 * - High water mark should allow for at least two frames to be
2944	 *   received after sending an XOFF.
2945	 * - Low water mark works best when it is very near the high water mark.
2946	 *   This allows the receiver to restart by sending XON when it has
2947	 *   drained a bit.
2948	 */
2949	hwm = min(((pba << 10) * 9 / 10),
2950	    ((pba << 10) - 2 * adapter->max_frame_size));
2951
2952	if (hw->mac.type < e1000_82576) {
2953		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2954		fc->low_water = fc->high_water - 8;
2955	} else {
2956		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2957		fc->low_water = fc->high_water - 16;
2958	}
2959
2960	fc->pause_time = IGB_FC_PAUSE_TIME;
2961	fc->send_xon = TRUE;
2962	if (adapter->fc)
2963		fc->requested_mode = adapter->fc;
2964	else
2965		fc->requested_mode = e1000_fc_default;
2966
2967	/* Issue a global reset */
2968	e1000_reset_hw(hw);
2969	E1000_WRITE_REG(hw, E1000_WUC, 0);
2970
2971	if (e1000_init_hw(hw) < 0)
2972		device_printf(dev, "Hardware Initialization Failed\n");
2973
2974	/* Setup DMA Coalescing */
2975	if ((hw->mac.type > e1000_82580) &&
2976	    (hw->mac.type != e1000_i211)) {
2977		u32 dmac;
2978		u32 reg = ~E1000_DMACR_DMAC_EN;
2979
2980		if (adapter->dmac == 0) { /* Disabling it */
2981			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2982			goto reset_out;
2983		}
2984
2985		/* Set starting thresholds */
2986		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2987		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2988
2989		hwm = 64 * pba - adapter->max_frame_size / 16;
2990		if (hwm < 64 * (pba - 6))
2991			hwm = 64 * (pba - 6);
2992		reg = E1000_READ_REG(hw, E1000_FCRTC);
2993		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2994		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2995		    & E1000_FCRTC_RTH_COAL_MASK);
2996		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2997
2998
2999		dmac = pba - adapter->max_frame_size / 512;
3000		if (dmac < pba - 10)
3001			dmac = pba - 10;
3002		reg = E1000_READ_REG(hw, E1000_DMACR);
3003		reg &= ~E1000_DMACR_DMACTHR_MASK;
3004		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3005		    & E1000_DMACR_DMACTHR_MASK);
3006		/* transition to L0x or L1 if available..*/
3007		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3008		/* timer = value in adapter->dmac in 32usec intervals */
3009		reg |= (adapter->dmac >> 5);
3010		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3011
3012		/* Set the interval before transition */
3013		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3014		reg |= 0x80000004;
3015		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3016
3017		/* free space in tx packet buffer to wake from DMA coal */
3018		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3019		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3020
3021		/* make low power state decision controlled by DMA coal */
3022		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3023		reg &= ~E1000_PCIEMISC_LX_DECISION;
3024		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3025		device_printf(dev, "DMA Coalescing enabled\n");
3026
3027	} else if (hw->mac.type == e1000_82580) {
3028		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3029		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3030		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3031		    reg & ~E1000_PCIEMISC_LX_DECISION);
3032	}
3033
3034reset_out:
3035	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3036	e1000_get_phy_info(hw);
3037	e1000_check_for_link(hw);
3038	return;
3039}
3040
3041/*********************************************************************
3042 *
3043 *  Setup networking device structure and register an interface.
3044 *
3045 **********************************************************************/
3046static int
3047igb_setup_interface(device_t dev, struct adapter *adapter)
3048{
3049	struct ifnet   *ifp;
3050
3051	INIT_DEBUGOUT("igb_setup_interface: begin");
3052
3053	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3054	if (ifp == NULL) {
3055		device_printf(dev, "can not allocate ifnet structure\n");
3056		return (-1);
3057	}
3058	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3059	ifp->if_init =  igb_init;
3060	ifp->if_softc = adapter;
3061	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3062	ifp->if_ioctl = igb_ioctl;
3063#if __FreeBSD_version >= 800000
3064	ifp->if_transmit = igb_mq_start;
3065	ifp->if_qflush = igb_qflush;
3066#else
3067	ifp->if_start = igb_start;
3068	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3069	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3070	IFQ_SET_READY(&ifp->if_snd);
3071#endif
3072
3073	ether_ifattach(ifp, adapter->hw.mac.addr);
3074
3075	ifp->if_capabilities = ifp->if_capenable = 0;
3076
3077	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3078	ifp->if_capabilities |= IFCAP_TSO4;
3079	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3080	ifp->if_capenable = ifp->if_capabilities;
3081
3082	/* Don't enable LRO by default */
3083	ifp->if_capabilities |= IFCAP_LRO;
3084
3085#ifdef DEVICE_POLLING
3086	ifp->if_capabilities |= IFCAP_POLLING;
3087#endif
3088
3089	/*
3090	 * Tell the upper layer(s) we
3091	 * support full VLAN capability.
3092	 */
3093	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3094	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3095			     |  IFCAP_VLAN_HWTSO
3096			     |  IFCAP_VLAN_MTU;
3097	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3098			  |  IFCAP_VLAN_HWTSO
3099			  |  IFCAP_VLAN_MTU;
3100
3101	/*
3102	** Don't turn this on by default, if vlans are
3103	** created on another pseudo device (eg. lagg)
3104	** then vlan events are not passed thru, breaking
3105	** operation, but with HW FILTER off it works. If
3106	** using vlans directly on the igb driver you can
3107	** enable this and get full hardware tag filtering.
3108	*/
3109	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3110
3111	/*
3112	 * Specify the media types supported by this adapter and register
3113	 * callbacks to update media and link information
3114	 */
3115	ifmedia_init(&adapter->media, IFM_IMASK,
3116	    igb_media_change, igb_media_status);
3117	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3118	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3119		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3120			    0, NULL);
3121		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3122	} else {
3123		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3124		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3125			    0, NULL);
3126		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3127			    0, NULL);
3128		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3129			    0, NULL);
3130		if (adapter->hw.phy.type != e1000_phy_ife) {
3131			ifmedia_add(&adapter->media,
3132				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3133			ifmedia_add(&adapter->media,
3134				IFM_ETHER | IFM_1000_T, 0, NULL);
3135		}
3136	}
3137	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3138	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3139	return (0);
3140}
3141
3142
3143/*
3144 * Manage DMA'able memory.
3145 */
3146static void
3147igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3148{
3149	if (error)
3150		return;
3151	*(bus_addr_t *) arg = segs[0].ds_addr;
3152}
3153
3154static int
3155igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3156        struct igb_dma_alloc *dma, int mapflags)
3157{
3158	int error;
3159
3160	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3161				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3162				BUS_SPACE_MAXADDR,	/* lowaddr */
3163				BUS_SPACE_MAXADDR,	/* highaddr */
3164				NULL, NULL,		/* filter, filterarg */
3165				size,			/* maxsize */
3166				1,			/* nsegments */
3167				size,			/* maxsegsize */
3168				0,			/* flags */
3169				NULL,			/* lockfunc */
3170				NULL,			/* lockarg */
3171				&dma->dma_tag);
3172	if (error) {
3173		device_printf(adapter->dev,
3174		    "%s: bus_dma_tag_create failed: %d\n",
3175		    __func__, error);
3176		goto fail_0;
3177	}
3178
3179	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3180	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3181	if (error) {
3182		device_printf(adapter->dev,
3183		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3184		    __func__, (uintmax_t)size, error);
3185		goto fail_2;
3186	}
3187
3188	dma->dma_paddr = 0;
3189	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3190	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3191	if (error || dma->dma_paddr == 0) {
3192		device_printf(adapter->dev,
3193		    "%s: bus_dmamap_load failed: %d\n",
3194		    __func__, error);
3195		goto fail_3;
3196	}
3197
3198	return (0);
3199
3200fail_3:
3201	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3202fail_2:
3203	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3204	bus_dma_tag_destroy(dma->dma_tag);
3205fail_0:
3206	dma->dma_map = NULL;
3207	dma->dma_tag = NULL;
3208
3209	return (error);
3210}
3211
3212static void
3213igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3214{
3215	if (dma->dma_tag == NULL)
3216		return;
3217	if (dma->dma_map != NULL) {
3218		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3219		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3220		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3221		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3222		dma->dma_map = NULL;
3223	}
3224	bus_dma_tag_destroy(dma->dma_tag);
3225	dma->dma_tag = NULL;
3226}
3227
3228
3229/*********************************************************************
3230 *
3231 *  Allocate memory for the transmit and receive rings, and then
3232 *  the descriptors associated with each, called only once at attach.
3233 *
3234 **********************************************************************/
3235static int
3236igb_allocate_queues(struct adapter *adapter)
3237{
3238	device_t dev = adapter->dev;
3239	struct igb_queue	*que = NULL;
3240	struct tx_ring		*txr = NULL;
3241	struct rx_ring		*rxr = NULL;
3242	int rsize, tsize, error = E1000_SUCCESS;
3243	int txconf = 0, rxconf = 0;
3244
3245	/* First allocate the top level queue structs */
3246	if (!(adapter->queues =
3247	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3248	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249		device_printf(dev, "Unable to allocate queue memory\n");
3250		error = ENOMEM;
3251		goto fail;
3252	}
3253
3254	/* Next allocate the TX ring struct memory */
3255	if (!(adapter->tx_rings =
3256	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3257	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3258		device_printf(dev, "Unable to allocate TX ring memory\n");
3259		error = ENOMEM;
3260		goto tx_fail;
3261	}
3262
3263	/* Now allocate the RX */
3264	if (!(adapter->rx_rings =
3265	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3266	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3267		device_printf(dev, "Unable to allocate RX ring memory\n");
3268		error = ENOMEM;
3269		goto rx_fail;
3270	}
3271
3272	tsize = roundup2(adapter->num_tx_desc *
3273	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3274	/*
3275	 * Now set up the TX queues, txconf is needed to handle the
3276	 * possibility that things fail midcourse and we need to
3277	 * undo memory gracefully
3278	 */
3279	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3280		/* Set up some basics */
3281		txr = &adapter->tx_rings[i];
3282		txr->adapter = adapter;
3283		txr->me = i;
3284
3285		/* Initialize the TX lock */
3286		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3287		    device_get_nameunit(dev), txr->me);
3288		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3289
3290		if (igb_dma_malloc(adapter, tsize,
3291			&txr->txdma, BUS_DMA_NOWAIT)) {
3292			device_printf(dev,
3293			    "Unable to allocate TX Descriptor memory\n");
3294			error = ENOMEM;
3295			goto err_tx_desc;
3296		}
3297		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3298		bzero((void *)txr->tx_base, tsize);
3299
3300        	/* Now allocate transmit buffers for the ring */
3301        	if (igb_allocate_transmit_buffers(txr)) {
3302			device_printf(dev,
3303			    "Critical Failure setting up transmit buffers\n");
3304			error = ENOMEM;
3305			goto err_tx_desc;
3306        	}
3307#if __FreeBSD_version >= 800000
3308		/* Allocate a buf ring */
3309		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3310		    M_WAITOK, &txr->tx_mtx);
3311#endif
3312	}
3313
3314	/*
3315	 * Next the RX queues...
3316	 */
3317	rsize = roundup2(adapter->num_rx_desc *
3318	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3319	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3320		rxr = &adapter->rx_rings[i];
3321		rxr->adapter = adapter;
3322		rxr->me = i;
3323
3324		/* Initialize the RX lock */
3325		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3326		    device_get_nameunit(dev), txr->me);
3327		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3328
3329		if (igb_dma_malloc(adapter, rsize,
3330			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3331			device_printf(dev,
3332			    "Unable to allocate RxDescriptor memory\n");
3333			error = ENOMEM;
3334			goto err_rx_desc;
3335		}
3336		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3337		bzero((void *)rxr->rx_base, rsize);
3338
3339        	/* Allocate receive buffers for the ring*/
3340		if (igb_allocate_receive_buffers(rxr)) {
3341			device_printf(dev,
3342			    "Critical Failure setting up receive buffers\n");
3343			error = ENOMEM;
3344			goto err_rx_desc;
3345		}
3346	}
3347
3348	/*
3349	** Finally set up the queue holding structs
3350	*/
3351	for (int i = 0; i < adapter->num_queues; i++) {
3352		que = &adapter->queues[i];
3353		que->adapter = adapter;
3354		que->txr = &adapter->tx_rings[i];
3355		que->rxr = &adapter->rx_rings[i];
3356	}
3357
3358	return (0);
3359
3360err_rx_desc:
3361	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3362		igb_dma_free(adapter, &rxr->rxdma);
3363err_tx_desc:
3364	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3365		igb_dma_free(adapter, &txr->txdma);
3366	free(adapter->rx_rings, M_DEVBUF);
3367rx_fail:
3368#if __FreeBSD_version >= 800000
3369	buf_ring_free(txr->br, M_DEVBUF);
3370#endif
3371	free(adapter->tx_rings, M_DEVBUF);
3372tx_fail:
3373	free(adapter->queues, M_DEVBUF);
3374fail:
3375	return (error);
3376}
3377
3378/*********************************************************************
3379 *
3380 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3381 *  the information needed to transmit a packet on the wire. This is
3382 *  called only once at attach, setup is done every reset.
3383 *
3384 **********************************************************************/
3385static int
3386igb_allocate_transmit_buffers(struct tx_ring *txr)
3387{
3388	struct adapter *adapter = txr->adapter;
3389	device_t dev = adapter->dev;
3390	struct igb_tx_buffer *txbuf;
3391	int error, i;
3392
3393	/*
3394	 * Setup DMA descriptor areas.
3395	 */
3396	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3397			       1, 0,			/* alignment, bounds */
3398			       BUS_SPACE_MAXADDR,	/* lowaddr */
3399			       BUS_SPACE_MAXADDR,	/* highaddr */
3400			       NULL, NULL,		/* filter, filterarg */
3401			       IGB_TSO_SIZE,		/* maxsize */
3402			       IGB_MAX_SCATTER,		/* nsegments */
3403			       PAGE_SIZE,		/* maxsegsize */
3404			       0,			/* flags */
3405			       NULL,			/* lockfunc */
3406			       NULL,			/* lockfuncarg */
3407			       &txr->txtag))) {
3408		device_printf(dev,"Unable to allocate TX DMA tag\n");
3409		goto fail;
3410	}
3411
3412	if (!(txr->tx_buffers =
3413	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3414	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3415		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3416		error = ENOMEM;
3417		goto fail;
3418	}
3419
3420        /* Create the descriptor buffer dma maps */
3421	txbuf = txr->tx_buffers;
3422	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3423		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3424		if (error != 0) {
3425			device_printf(dev, "Unable to create TX DMA map\n");
3426			goto fail;
3427		}
3428	}
3429
3430	return 0;
3431fail:
3432	/* We free all, it handles case where we are in the middle */
3433	igb_free_transmit_structures(adapter);
3434	return (error);
3435}
3436
3437/*********************************************************************
3438 *
3439 *  Initialize a transmit ring.
3440 *
3441 **********************************************************************/
3442static void
3443igb_setup_transmit_ring(struct tx_ring *txr)
3444{
3445	struct adapter *adapter = txr->adapter;
3446	struct igb_tx_buffer *txbuf;
3447	int i;
3448#ifdef DEV_NETMAP
3449	struct netmap_adapter *na = NA(adapter->ifp);
3450	struct netmap_slot *slot;
3451#endif /* DEV_NETMAP */
3452
3453	/* Clear the old descriptor contents */
3454	IGB_TX_LOCK(txr);
3455#ifdef DEV_NETMAP
3456	slot = netmap_reset(na, NR_TX, txr->me, 0);
3457#endif /* DEV_NETMAP */
3458	bzero((void *)txr->tx_base,
3459	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3460	/* Reset indices */
3461	txr->next_avail_desc = 0;
3462	txr->next_to_clean = 0;
3463
3464	/* Free any existing tx buffers. */
3465        txbuf = txr->tx_buffers;
3466	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3467		if (txbuf->m_head != NULL) {
3468			bus_dmamap_sync(txr->txtag, txbuf->map,
3469			    BUS_DMASYNC_POSTWRITE);
3470			bus_dmamap_unload(txr->txtag, txbuf->map);
3471			m_freem(txbuf->m_head);
3472			txbuf->m_head = NULL;
3473		}
3474#ifdef DEV_NETMAP
3475		if (slot) {
3476			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3477			/* no need to set the address */
3478			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3479		}
3480#endif /* DEV_NETMAP */
3481		/* clear the watch index */
3482		txbuf->next_eop = -1;
3483        }
3484
3485	/* Set number of descriptors available */
3486	txr->tx_avail = adapter->num_tx_desc;
3487
3488	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3489	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3490	IGB_TX_UNLOCK(txr);
3491}
3492
3493/*********************************************************************
3494 *
3495 *  Initialize all transmit rings.
3496 *
3497 **********************************************************************/
3498static void
3499igb_setup_transmit_structures(struct adapter *adapter)
3500{
3501	struct tx_ring *txr = adapter->tx_rings;
3502
3503	for (int i = 0; i < adapter->num_queues; i++, txr++)
3504		igb_setup_transmit_ring(txr);
3505
3506	return;
3507}
3508
3509/*********************************************************************
3510 *
3511 *  Enable transmit unit.
3512 *
3513 **********************************************************************/
3514static void
3515igb_initialize_transmit_units(struct adapter *adapter)
3516{
3517	struct tx_ring	*txr = adapter->tx_rings;
3518	struct e1000_hw *hw = &adapter->hw;
3519	u32		tctl, txdctl;
3520
3521	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3522	tctl = txdctl = 0;
3523
3524	/* Setup the Tx Descriptor Rings */
3525	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3526		u64 bus_addr = txr->txdma.dma_paddr;
3527
3528		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3529		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3530		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3531		    (uint32_t)(bus_addr >> 32));
3532		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3533		    (uint32_t)bus_addr);
3534
3535		/* Setup the HW Tx Head and Tail descriptor pointers */
3536		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3537		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3538
3539		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3540		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3541		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3542
3543		txr->queue_status = IGB_QUEUE_IDLE;
3544
3545		txdctl |= IGB_TX_PTHRESH;
3546		txdctl |= IGB_TX_HTHRESH << 8;
3547		txdctl |= IGB_TX_WTHRESH << 16;
3548		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3549		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3550	}
3551
3552	if (adapter->vf_ifp)
3553		return;
3554
3555	e1000_config_collision_dist(hw);
3556
3557	/* Program the Transmit Control Register */
3558	tctl = E1000_READ_REG(hw, E1000_TCTL);
3559	tctl &= ~E1000_TCTL_CT;
3560	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3561		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3562
3563	/* This write will effectively turn on the transmit unit. */
3564	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3565}
3566
3567/*********************************************************************
3568 *
3569 *  Free all transmit rings.
3570 *
3571 **********************************************************************/
3572static void
3573igb_free_transmit_structures(struct adapter *adapter)
3574{
3575	struct tx_ring *txr = adapter->tx_rings;
3576
3577	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3578		IGB_TX_LOCK(txr);
3579		igb_free_transmit_buffers(txr);
3580		igb_dma_free(adapter, &txr->txdma);
3581		IGB_TX_UNLOCK(txr);
3582		IGB_TX_LOCK_DESTROY(txr);
3583	}
3584	free(adapter->tx_rings, M_DEVBUF);
3585}
3586
3587/*********************************************************************
3588 *
3589 *  Free transmit ring related data structures.
3590 *
3591 **********************************************************************/
3592static void
3593igb_free_transmit_buffers(struct tx_ring *txr)
3594{
3595	struct adapter *adapter = txr->adapter;
3596	struct igb_tx_buffer *tx_buffer;
3597	int             i;
3598
3599	INIT_DEBUGOUT("free_transmit_ring: begin");
3600
3601	if (txr->tx_buffers == NULL)
3602		return;
3603
3604	tx_buffer = txr->tx_buffers;
3605	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3606		if (tx_buffer->m_head != NULL) {
3607			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3608			    BUS_DMASYNC_POSTWRITE);
3609			bus_dmamap_unload(txr->txtag,
3610			    tx_buffer->map);
3611			m_freem(tx_buffer->m_head);
3612			tx_buffer->m_head = NULL;
3613			if (tx_buffer->map != NULL) {
3614				bus_dmamap_destroy(txr->txtag,
3615				    tx_buffer->map);
3616				tx_buffer->map = NULL;
3617			}
3618		} else if (tx_buffer->map != NULL) {
3619			bus_dmamap_unload(txr->txtag,
3620			    tx_buffer->map);
3621			bus_dmamap_destroy(txr->txtag,
3622			    tx_buffer->map);
3623			tx_buffer->map = NULL;
3624		}
3625	}
3626#if __FreeBSD_version >= 800000
3627	if (txr->br != NULL)
3628		buf_ring_free(txr->br, M_DEVBUF);
3629#endif
3630	if (txr->tx_buffers != NULL) {
3631		free(txr->tx_buffers, M_DEVBUF);
3632		txr->tx_buffers = NULL;
3633	}
3634	if (txr->txtag != NULL) {
3635		bus_dma_tag_destroy(txr->txtag);
3636		txr->txtag = NULL;
3637	}
3638	return;
3639}
3640
3641/**********************************************************************
3642 *
3643 *  Setup work for hardware segmentation offload (TSO)
3644 *
3645 **********************************************************************/
3646static bool
3647igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3648	struct ip *ip, struct tcphdr *th)
3649{
3650	struct adapter *adapter = txr->adapter;
3651	struct e1000_adv_tx_context_desc *TXD;
3652	struct igb_tx_buffer        *tx_buffer;
3653	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3654	u32 mss_l4len_idx = 0;
3655	u16 vtag = 0;
3656	int ctxd, ip_hlen, tcp_hlen;
3657
3658	ctxd = txr->next_avail_desc;
3659	tx_buffer = &txr->tx_buffers[ctxd];
3660	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3661
3662	ip->ip_sum = 0;
3663	ip_hlen = ip->ip_hl << 2;
3664	tcp_hlen = th->th_off << 2;
3665
3666	/* VLAN MACLEN IPLEN */
3667	if (mp->m_flags & M_VLANTAG) {
3668		vtag = htole16(mp->m_pkthdr.ether_vtag);
3669		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3670	}
3671
3672	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3673	vlan_macip_lens |= ip_hlen;
3674	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3675
3676	/* ADV DTYPE TUCMD */
3677	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3678	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3679	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3680	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3681
3682	/* MSS L4LEN IDX */
3683	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3684	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3685	/* 82575 needs the queue index added */
3686	if (adapter->hw.mac.type == e1000_82575)
3687		mss_l4len_idx |= txr->me << 4;
3688	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3689
3690	TXD->seqnum_seed = htole32(0);
3691	tx_buffer->m_head = NULL;
3692	tx_buffer->next_eop = -1;
3693
3694	if (++ctxd == adapter->num_tx_desc)
3695		ctxd = 0;
3696
3697	txr->tx_avail--;
3698	txr->next_avail_desc = ctxd;
3699	return TRUE;
3700}
3701
3702
3703/*********************************************************************
3704 *
3705 *  Context Descriptor setup for VLAN or CSUM
3706 *
3707 **********************************************************************/
3708
3709static bool
3710igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3711{
3712	struct adapter *adapter = txr->adapter;
3713	struct e1000_adv_tx_context_desc *TXD;
3714	struct igb_tx_buffer        *tx_buffer;
3715	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3716	struct ether_vlan_header *eh;
3717	struct ip *ip = NULL;
3718	struct ip6_hdr *ip6;
3719	int  ehdrlen, ctxd, ip_hlen = 0;
3720	u16	etype, vtag = 0;
3721	u8	ipproto = 0;
3722	bool	offload = TRUE;
3723
3724	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3725		offload = FALSE;
3726
3727	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3728	ctxd = txr->next_avail_desc;
3729	tx_buffer = &txr->tx_buffers[ctxd];
3730	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3731
3732	/*
3733	** In advanced descriptors the vlan tag must
3734	** be placed into the context descriptor, thus
3735	** we need to be here just for that setup.
3736	*/
3737	if (mp->m_flags & M_VLANTAG) {
3738		vtag = htole16(mp->m_pkthdr.ether_vtag);
3739		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3740	} else if (offload == FALSE)
3741		return FALSE;
3742
3743	/*
3744	 * Determine where frame payload starts.
3745	 * Jump over vlan headers if already present,
3746	 * helpful for QinQ too.
3747	 */
3748	eh = mtod(mp, struct ether_vlan_header *);
3749	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3750		etype = ntohs(eh->evl_proto);
3751		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3752	} else {
3753		etype = ntohs(eh->evl_encap_proto);
3754		ehdrlen = ETHER_HDR_LEN;
3755	}
3756
3757	/* Set the ether header length */
3758	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3759
3760	switch (etype) {
3761		case ETHERTYPE_IP:
3762			ip = (struct ip *)(mp->m_data + ehdrlen);
3763			ip_hlen = ip->ip_hl << 2;
3764			if (mp->m_len < ehdrlen + ip_hlen) {
3765				offload = FALSE;
3766				break;
3767			}
3768			ipproto = ip->ip_p;
3769			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3770			break;
3771		case ETHERTYPE_IPV6:
3772			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3773			ip_hlen = sizeof(struct ip6_hdr);
3774			ipproto = ip6->ip6_nxt;
3775			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3776			break;
3777		default:
3778			offload = FALSE;
3779			break;
3780	}
3781
3782	vlan_macip_lens |= ip_hlen;
3783	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3784
3785	switch (ipproto) {
3786		case IPPROTO_TCP:
3787			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3788				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3789			break;
3790		case IPPROTO_UDP:
3791			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3792				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3793			break;
3794#if __FreeBSD_version >= 800000
3795		case IPPROTO_SCTP:
3796			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3797				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3798			break;
3799#endif
3800		default:
3801			offload = FALSE;
3802			break;
3803	}
3804
3805	/* 82575 needs the queue index added */
3806	if (adapter->hw.mac.type == e1000_82575)
3807		mss_l4len_idx = txr->me << 4;
3808
3809	/* Now copy bits into descriptor */
3810	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3811	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3812	TXD->seqnum_seed = htole32(0);
3813	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3814
3815	tx_buffer->m_head = NULL;
3816	tx_buffer->next_eop = -1;
3817
3818	/* We've consumed the first desc, adjust counters */
3819	if (++ctxd == adapter->num_tx_desc)
3820		ctxd = 0;
3821	txr->next_avail_desc = ctxd;
3822	--txr->tx_avail;
3823
3824        return (offload);
3825}
3826
3827
3828/**********************************************************************
3829 *
3830 *  Examine each tx_buffer in the used queue. If the hardware is done
3831 *  processing the packet then free associated resources. The
3832 *  tx_buffer is put back on the free queue.
3833 *
3834 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3835 **********************************************************************/
3836static bool
3837igb_txeof(struct tx_ring *txr)
3838{
3839	struct adapter	*adapter = txr->adapter;
3840        int first, last, done, processed;
3841        struct igb_tx_buffer *tx_buffer;
3842        struct e1000_tx_desc   *tx_desc, *eop_desc;
3843	struct ifnet   *ifp = adapter->ifp;
3844
3845	IGB_TX_LOCK_ASSERT(txr);
3846
3847#ifdef DEV_NETMAP
3848	if (ifp->if_capenable & IFCAP_NETMAP) {
3849		struct netmap_adapter *na = NA(ifp);
3850
3851		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3852		IGB_TX_UNLOCK(txr);
3853		IGB_CORE_LOCK(adapter);
3854		selwakeuppri(&na->tx_si, PI_NET);
3855		IGB_CORE_UNLOCK(adapter);
3856		IGB_TX_LOCK(txr);
3857		return FALSE;
3858	}
3859#endif /* DEV_NETMAP */
3860        if (txr->tx_avail == adapter->num_tx_desc) {
3861		txr->queue_status = IGB_QUEUE_IDLE;
3862                return FALSE;
3863	}
3864
3865	processed = 0;
3866        first = txr->next_to_clean;
3867        tx_desc = &txr->tx_base[first];
3868        tx_buffer = &txr->tx_buffers[first];
3869	last = tx_buffer->next_eop;
3870        eop_desc = &txr->tx_base[last];
3871
3872	/*
3873	 * What this does is get the index of the
3874	 * first descriptor AFTER the EOP of the
3875	 * first packet, that way we can do the
3876	 * simple comparison on the inner while loop.
3877	 */
3878	if (++last == adapter->num_tx_desc)
3879 		last = 0;
3880	done = last;
3881
3882        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3883            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3884
3885        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3886		/* We clean the range of the packet */
3887		while (first != done) {
3888                	tx_desc->upper.data = 0;
3889                	tx_desc->lower.data = 0;
3890                	tx_desc->buffer_addr = 0;
3891                	++txr->tx_avail;
3892			++processed;
3893
3894			if (tx_buffer->m_head) {
3895				txr->bytes +=
3896				    tx_buffer->m_head->m_pkthdr.len;
3897				bus_dmamap_sync(txr->txtag,
3898				    tx_buffer->map,
3899				    BUS_DMASYNC_POSTWRITE);
3900				bus_dmamap_unload(txr->txtag,
3901				    tx_buffer->map);
3902
3903                        	m_freem(tx_buffer->m_head);
3904                        	tx_buffer->m_head = NULL;
3905                	}
3906			tx_buffer->next_eop = -1;
3907			txr->watchdog_time = ticks;
3908
3909	                if (++first == adapter->num_tx_desc)
3910				first = 0;
3911
3912	                tx_buffer = &txr->tx_buffers[first];
3913			tx_desc = &txr->tx_base[first];
3914		}
3915		++txr->packets;
3916		++ifp->if_opackets;
3917		/* See if we can continue to the next packet */
3918		last = tx_buffer->next_eop;
3919		if (last != -1) {
3920        		eop_desc = &txr->tx_base[last];
3921			/* Get new done point */
3922			if (++last == adapter->num_tx_desc) last = 0;
3923			done = last;
3924		} else
3925			break;
3926        }
3927        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3928            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3929
3930        txr->next_to_clean = first;
3931
3932	/*
3933	** Watchdog calculation, we know there's
3934	** work outstanding or the first return
3935	** would have been taken, so none processed
3936	** for too long indicates a hang.
3937	*/
3938	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3939		txr->queue_status |= IGB_QUEUE_HUNG;
3940        /*
3941         * If we have a minimum free,
3942         * clear depleted state bit
3943         */
3944        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3945                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3946
3947	/* All clean, turn off the watchdog */
3948	if (txr->tx_avail == adapter->num_tx_desc) {
3949		txr->queue_status = IGB_QUEUE_IDLE;
3950		return (FALSE);
3951        }
3952
3953	return (TRUE);
3954}
3955
3956/*********************************************************************
3957 *
3958 *  Refresh mbuf buffers for RX descriptor rings
3959 *   - now keeps its own state so discards due to resource
3960 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3961 *     it just returns, keeping its placeholder, thus it can simply
3962 *     be recalled to try again.
3963 *
3964 **********************************************************************/
3965static void
3966igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3967{
3968	struct adapter		*adapter = rxr->adapter;
3969	bus_dma_segment_t	hseg[1];
3970	bus_dma_segment_t	pseg[1];
3971	struct igb_rx_buf	*rxbuf;
3972	struct mbuf		*mh, *mp;
3973	int			i, j, nsegs, error;
3974	bool			refreshed = FALSE;
3975
3976	i = j = rxr->next_to_refresh;
3977	/*
3978	** Get one descriptor beyond
3979	** our work mark to control
3980	** the loop.
3981        */
3982	if (++j == adapter->num_rx_desc)
3983		j = 0;
3984
3985	while (j != limit) {
3986		rxbuf = &rxr->rx_buffers[i];
3987		/* No hdr mbuf used with header split off */
3988		if (rxr->hdr_split == FALSE)
3989			goto no_split;
3990		if (rxbuf->m_head == NULL) {
3991			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3992			if (mh == NULL)
3993				goto update;
3994		} else
3995			mh = rxbuf->m_head;
3996
3997		mh->m_pkthdr.len = mh->m_len = MHLEN;
3998		mh->m_len = MHLEN;
3999		mh->m_flags |= M_PKTHDR;
4000		/* Get the memory mapping */
4001		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4002		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4003		if (error != 0) {
4004			printf("Refresh mbufs: hdr dmamap load"
4005			    " failure - %d\n", error);
4006			m_free(mh);
4007			rxbuf->m_head = NULL;
4008			goto update;
4009		}
4010		rxbuf->m_head = mh;
4011		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4012		    BUS_DMASYNC_PREREAD);
4013		rxr->rx_base[i].read.hdr_addr =
4014		    htole64(hseg[0].ds_addr);
4015no_split:
4016		if (rxbuf->m_pack == NULL) {
4017			mp = m_getjcl(M_DONTWAIT, MT_DATA,
4018			    M_PKTHDR, adapter->rx_mbuf_sz);
4019			if (mp == NULL)
4020				goto update;
4021		} else
4022			mp = rxbuf->m_pack;
4023
4024		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4025		/* Get the memory mapping */
4026		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4027		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4028		if (error != 0) {
4029			printf("Refresh mbufs: payload dmamap load"
4030			    " failure - %d\n", error);
4031			m_free(mp);
4032			rxbuf->m_pack = NULL;
4033			goto update;
4034		}
4035		rxbuf->m_pack = mp;
4036		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4037		    BUS_DMASYNC_PREREAD);
4038		rxr->rx_base[i].read.pkt_addr =
4039		    htole64(pseg[0].ds_addr);
4040		refreshed = TRUE; /* I feel wefreshed :) */
4041
4042		i = j; /* our next is precalculated */
4043		rxr->next_to_refresh = i;
4044		if (++j == adapter->num_rx_desc)
4045			j = 0;
4046	}
4047update:
4048	if (refreshed) /* update tail */
4049		E1000_WRITE_REG(&adapter->hw,
4050		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4051	return;
4052}
4053
4054
4055/*********************************************************************
4056 *
4057 *  Allocate memory for rx_buffer structures. Since we use one
4058 *  rx_buffer per received packet, the maximum number of rx_buffer's
4059 *  that we'll need is equal to the number of receive descriptors
4060 *  that we've allocated.
4061 *
4062 **********************************************************************/
4063static int
4064igb_allocate_receive_buffers(struct rx_ring *rxr)
4065{
4066	struct	adapter 	*adapter = rxr->adapter;
4067	device_t 		dev = adapter->dev;
4068	struct igb_rx_buf	*rxbuf;
4069	int             	i, bsize, error;
4070
4071	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4072	if (!(rxr->rx_buffers =
4073	    (struct igb_rx_buf *) malloc(bsize,
4074	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4075		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4076		error = ENOMEM;
4077		goto fail;
4078	}
4079
4080	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4081				   1, 0,		/* alignment, bounds */
4082				   BUS_SPACE_MAXADDR,	/* lowaddr */
4083				   BUS_SPACE_MAXADDR,	/* highaddr */
4084				   NULL, NULL,		/* filter, filterarg */
4085				   MSIZE,		/* maxsize */
4086				   1,			/* nsegments */
4087				   MSIZE,		/* maxsegsize */
4088				   0,			/* flags */
4089				   NULL,		/* lockfunc */
4090				   NULL,		/* lockfuncarg */
4091				   &rxr->htag))) {
4092		device_printf(dev, "Unable to create RX DMA tag\n");
4093		goto fail;
4094	}
4095
4096	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4097				   1, 0,		/* alignment, bounds */
4098				   BUS_SPACE_MAXADDR,	/* lowaddr */
4099				   BUS_SPACE_MAXADDR,	/* highaddr */
4100				   NULL, NULL,		/* filter, filterarg */
4101				   MJUM9BYTES,		/* maxsize */
4102				   1,			/* nsegments */
4103				   MJUM9BYTES,		/* maxsegsize */
4104				   0,			/* flags */
4105				   NULL,		/* lockfunc */
4106				   NULL,		/* lockfuncarg */
4107				   &rxr->ptag))) {
4108		device_printf(dev, "Unable to create RX payload DMA tag\n");
4109		goto fail;
4110	}
4111
4112	for (i = 0; i < adapter->num_rx_desc; i++) {
4113		rxbuf = &rxr->rx_buffers[i];
4114		error = bus_dmamap_create(rxr->htag,
4115		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4116		if (error) {
4117			device_printf(dev,
4118			    "Unable to create RX head DMA maps\n");
4119			goto fail;
4120		}
4121		error = bus_dmamap_create(rxr->ptag,
4122		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4123		if (error) {
4124			device_printf(dev,
4125			    "Unable to create RX packet DMA maps\n");
4126			goto fail;
4127		}
4128	}
4129
4130	return (0);
4131
4132fail:
4133	/* Frees all, but can handle partial completion */
4134	igb_free_receive_structures(adapter);
4135	return (error);
4136}
4137
4138
4139static void
4140igb_free_receive_ring(struct rx_ring *rxr)
4141{
4142	struct	adapter		*adapter = rxr->adapter;
4143	struct igb_rx_buf	*rxbuf;
4144
4145
4146	for (int i = 0; i < adapter->num_rx_desc; i++) {
4147		rxbuf = &rxr->rx_buffers[i];
4148		if (rxbuf->m_head != NULL) {
4149			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4150			    BUS_DMASYNC_POSTREAD);
4151			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4152			rxbuf->m_head->m_flags |= M_PKTHDR;
4153			m_freem(rxbuf->m_head);
4154		}
4155		if (rxbuf->m_pack != NULL) {
4156			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4157			    BUS_DMASYNC_POSTREAD);
4158			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4159			rxbuf->m_pack->m_flags |= M_PKTHDR;
4160			m_freem(rxbuf->m_pack);
4161		}
4162		rxbuf->m_head = NULL;
4163		rxbuf->m_pack = NULL;
4164	}
4165}
4166
4167
4168/*********************************************************************
4169 *
4170 *  Initialize a receive ring and its buffers.
4171 *
4172 **********************************************************************/
4173static int
4174igb_setup_receive_ring(struct rx_ring *rxr)
4175{
4176	struct	adapter		*adapter;
4177	struct  ifnet		*ifp;
4178	device_t		dev;
4179	struct igb_rx_buf	*rxbuf;
4180	bus_dma_segment_t	pseg[1], hseg[1];
4181	struct lro_ctrl		*lro = &rxr->lro;
4182	int			rsize, nsegs, error = 0;
4183#ifdef DEV_NETMAP
4184	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4185	struct netmap_slot *slot;
4186#endif /* DEV_NETMAP */
4187
4188	adapter = rxr->adapter;
4189	dev = adapter->dev;
4190	ifp = adapter->ifp;
4191
4192	/* Clear the ring contents */
4193	IGB_RX_LOCK(rxr);
4194#ifdef DEV_NETMAP
4195	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4196#endif /* DEV_NETMAP */
4197	rsize = roundup2(adapter->num_rx_desc *
4198	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4199	bzero((void *)rxr->rx_base, rsize);
4200
4201	/*
4202	** Free current RX buffer structures and their mbufs
4203	*/
4204	igb_free_receive_ring(rxr);
4205
4206	/* Configure for header split? */
4207	if (igb_header_split)
4208		rxr->hdr_split = TRUE;
4209
4210        /* Now replenish the ring mbufs */
4211	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4212		struct mbuf	*mh, *mp;
4213
4214		rxbuf = &rxr->rx_buffers[j];
4215#ifdef DEV_NETMAP
4216		if (slot) {
4217			/* slot sj is mapped to the i-th NIC-ring entry */
4218			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4219			uint64_t paddr;
4220			void *addr;
4221
4222			addr = PNMB(slot + sj, &paddr);
4223			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4224			/* Update descriptor */
4225			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4226			continue;
4227		}
4228#endif /* DEV_NETMAP */
4229		if (rxr->hdr_split == FALSE)
4230			goto skip_head;
4231
4232		/* First the header */
4233		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4234		if (rxbuf->m_head == NULL) {
4235			error = ENOBUFS;
4236                        goto fail;
4237		}
4238		m_adj(rxbuf->m_head, ETHER_ALIGN);
4239		mh = rxbuf->m_head;
4240		mh->m_len = mh->m_pkthdr.len = MHLEN;
4241		mh->m_flags |= M_PKTHDR;
4242		/* Get the memory mapping */
4243		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4244		    rxbuf->hmap, rxbuf->m_head, hseg,
4245		    &nsegs, BUS_DMA_NOWAIT);
4246		if (error != 0) /* Nothing elegant to do here */
4247                        goto fail;
4248		bus_dmamap_sync(rxr->htag,
4249		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4250		/* Update descriptor */
4251		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4252
4253skip_head:
4254		/* Now the payload cluster */
4255		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4256		    M_PKTHDR, adapter->rx_mbuf_sz);
4257		if (rxbuf->m_pack == NULL) {
4258			error = ENOBUFS;
4259                        goto fail;
4260		}
4261		mp = rxbuf->m_pack;
4262		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4263		/* Get the memory mapping */
4264		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4265		    rxbuf->pmap, mp, pseg,
4266		    &nsegs, BUS_DMA_NOWAIT);
4267		if (error != 0)
4268                        goto fail;
4269		bus_dmamap_sync(rxr->ptag,
4270		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4271		/* Update descriptor */
4272		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4273        }
4274
4275	/* Setup our descriptor indices */
4276	rxr->next_to_check = 0;
4277	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4278	rxr->lro_enabled = FALSE;
4279	rxr->rx_split_packets = 0;
4280	rxr->rx_bytes = 0;
4281
4282	rxr->fmp = NULL;
4283	rxr->lmp = NULL;
4284	rxr->discard = FALSE;
4285
4286	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4287	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4288
4289	/*
4290	** Now set up the LRO interface, we
4291	** also only do head split when LRO
4292	** is enabled, since so often they
4293	** are undesireable in similar setups.
4294	*/
4295	if (ifp->if_capenable & IFCAP_LRO) {
4296		error = tcp_lro_init(lro);
4297		if (error) {
4298			device_printf(dev, "LRO Initialization failed!\n");
4299			goto fail;
4300		}
4301		INIT_DEBUGOUT("RX LRO Initialized\n");
4302		rxr->lro_enabled = TRUE;
4303		lro->ifp = adapter->ifp;
4304	}
4305
4306	IGB_RX_UNLOCK(rxr);
4307	return (0);
4308
4309fail:
4310	igb_free_receive_ring(rxr);
4311	IGB_RX_UNLOCK(rxr);
4312	return (error);
4313}
4314
4315
4316/*********************************************************************
4317 *
4318 *  Initialize all receive rings.
4319 *
4320 **********************************************************************/
4321static int
4322igb_setup_receive_structures(struct adapter *adapter)
4323{
4324	struct rx_ring *rxr = adapter->rx_rings;
4325	int i;
4326
4327	for (i = 0; i < adapter->num_queues; i++, rxr++)
4328		if (igb_setup_receive_ring(rxr))
4329			goto fail;
4330
4331	return (0);
4332fail:
4333	/*
4334	 * Free RX buffers allocated so far, we will only handle
4335	 * the rings that completed, the failing case will have
4336	 * cleaned up for itself. 'i' is the endpoint.
4337	 */
4338	for (int j = 0; j > i; ++j) {
4339		rxr = &adapter->rx_rings[i];
4340		IGB_RX_LOCK(rxr);
4341		igb_free_receive_ring(rxr);
4342		IGB_RX_UNLOCK(rxr);
4343	}
4344
4345	return (ENOBUFS);
4346}
4347
4348/*********************************************************************
4349 *
4350 *  Enable receive unit.
4351 *
4352 **********************************************************************/
4353static void
4354igb_initialize_receive_units(struct adapter *adapter)
4355{
4356	struct rx_ring	*rxr = adapter->rx_rings;
4357	struct ifnet	*ifp = adapter->ifp;
4358	struct e1000_hw *hw = &adapter->hw;
4359	u32		rctl, rxcsum, psize, srrctl = 0;
4360
4361	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4362
4363	/*
4364	 * Make sure receives are disabled while setting
4365	 * up the descriptor ring
4366	 */
4367	rctl = E1000_READ_REG(hw, E1000_RCTL);
4368	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4369
4370	/*
4371	** Set up for header split
4372	*/
4373	if (igb_header_split) {
4374		/* Use a standard mbuf for the header */
4375		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4376		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4377	} else
4378		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4379
4380	/*
4381	** Set up for jumbo frames
4382	*/
4383	if (ifp->if_mtu > ETHERMTU) {
4384		rctl |= E1000_RCTL_LPE;
4385		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4386			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4387			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4388		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4389			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4390			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4391		}
4392		/* Set maximum packet len */
4393		psize = adapter->max_frame_size;
4394		/* are we on a vlan? */
4395		if (adapter->ifp->if_vlantrunk != NULL)
4396			psize += VLAN_TAG_SIZE;
4397		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4398	} else {
4399		rctl &= ~E1000_RCTL_LPE;
4400		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4401		rctl |= E1000_RCTL_SZ_2048;
4402	}
4403
4404	/* Setup the Base and Length of the Rx Descriptor Rings */
4405	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4406		u64 bus_addr = rxr->rxdma.dma_paddr;
4407		u32 rxdctl;
4408
4409		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4410		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4411		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4412		    (uint32_t)(bus_addr >> 32));
4413		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4414		    (uint32_t)bus_addr);
4415		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4416		/* Enable this Queue */
4417		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4418		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4419		rxdctl &= 0xFFF00000;
4420		rxdctl |= IGB_RX_PTHRESH;
4421		rxdctl |= IGB_RX_HTHRESH << 8;
4422		rxdctl |= IGB_RX_WTHRESH << 16;
4423		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4424	}
4425
4426	/*
4427	** Setup for RX MultiQueue
4428	*/
4429	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4430	if (adapter->num_queues >1) {
4431		u32 random[10], mrqc, shift = 0;
4432		union igb_reta {
4433			u32 dword;
4434			u8  bytes[4];
4435		} reta;
4436
4437		arc4rand(&random, sizeof(random), 0);
4438		if (adapter->hw.mac.type == e1000_82575)
4439			shift = 6;
4440		/* Warning FM follows */
4441		for (int i = 0; i < 128; i++) {
4442			reta.bytes[i & 3] =
4443			    (i % adapter->num_queues) << shift;
4444			if ((i & 3) == 3)
4445				E1000_WRITE_REG(hw,
4446				    E1000_RETA(i >> 2), reta.dword);
4447		}
4448		/* Now fill in hash table */
4449		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4450		for (int i = 0; i < 10; i++)
4451			E1000_WRITE_REG_ARRAY(hw,
4452			    E1000_RSSRK(0), i, random[i]);
4453
4454		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4455		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4456		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4457		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4458		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4459		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4460		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4461		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4462
4463		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4464
4465		/*
4466		** NOTE: Receive Full-Packet Checksum Offload
4467		** is mutually exclusive with Multiqueue. However
4468		** this is not the same as TCP/IP checksums which
4469		** still work.
4470		*/
4471		rxcsum |= E1000_RXCSUM_PCSD;
4472#if __FreeBSD_version >= 800000
4473		/* For SCTP Offload */
4474		if ((hw->mac.type == e1000_82576)
4475		    && (ifp->if_capenable & IFCAP_RXCSUM))
4476			rxcsum |= E1000_RXCSUM_CRCOFL;
4477#endif
4478	} else {
4479		/* Non RSS setup */
4480		if (ifp->if_capenable & IFCAP_RXCSUM) {
4481			rxcsum |= E1000_RXCSUM_IPPCSE;
4482#if __FreeBSD_version >= 800000
4483			if (adapter->hw.mac.type == e1000_82576)
4484				rxcsum |= E1000_RXCSUM_CRCOFL;
4485#endif
4486		} else
4487			rxcsum &= ~E1000_RXCSUM_TUOFL;
4488	}
4489	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4490
4491	/* Setup the Receive Control Register */
4492	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4493	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4494		   E1000_RCTL_RDMTS_HALF |
4495		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4496	/* Strip CRC bytes. */
4497	rctl |= E1000_RCTL_SECRC;
4498	/* Make sure VLAN Filters are off */
4499	rctl &= ~E1000_RCTL_VFE;
4500	/* Don't store bad packets */
4501	rctl &= ~E1000_RCTL_SBP;
4502
4503	/* Enable Receives */
4504	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4505
4506	/*
4507	 * Setup the HW Rx Head and Tail Descriptor Pointers
4508	 *   - needs to be after enable
4509	 */
4510	for (int i = 0; i < adapter->num_queues; i++) {
4511		rxr = &adapter->rx_rings[i];
4512		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4513#ifdef DEV_NETMAP
4514		/*
4515		 * an init() while a netmap client is active must
4516		 * preserve the rx buffers passed to userspace.
4517		 * In this driver it means we adjust RDT to
4518		 * somthing different from next_to_refresh
4519		 * (which is not used in netmap mode).
4520		 */
4521		if (ifp->if_capenable & IFCAP_NETMAP) {
4522			struct netmap_adapter *na = NA(adapter->ifp);
4523			struct netmap_kring *kring = &na->rx_rings[i];
4524			int t = rxr->next_to_refresh - kring->nr_hwavail;
4525
4526			if (t >= adapter->num_rx_desc)
4527				t -= adapter->num_rx_desc;
4528			else if (t < 0)
4529				t += adapter->num_rx_desc;
4530			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4531		} else
4532#endif /* DEV_NETMAP */
4533		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4534	}
4535	return;
4536}
4537
4538/*********************************************************************
4539 *
4540 *  Free receive rings.
4541 *
4542 **********************************************************************/
4543static void
4544igb_free_receive_structures(struct adapter *adapter)
4545{
4546	struct rx_ring *rxr = adapter->rx_rings;
4547
4548	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4549		struct lro_ctrl	*lro = &rxr->lro;
4550		igb_free_receive_buffers(rxr);
4551		tcp_lro_free(lro);
4552		igb_dma_free(adapter, &rxr->rxdma);
4553	}
4554
4555	free(adapter->rx_rings, M_DEVBUF);
4556}
4557
4558/*********************************************************************
4559 *
4560 *  Free receive ring data structures.
4561 *
4562 **********************************************************************/
4563static void
4564igb_free_receive_buffers(struct rx_ring *rxr)
4565{
4566	struct adapter		*adapter = rxr->adapter;
4567	struct igb_rx_buf	*rxbuf;
4568	int i;
4569
4570	INIT_DEBUGOUT("free_receive_structures: begin");
4571
4572	/* Cleanup any existing buffers */
4573	if (rxr->rx_buffers != NULL) {
4574		for (i = 0; i < adapter->num_rx_desc; i++) {
4575			rxbuf = &rxr->rx_buffers[i];
4576			if (rxbuf->m_head != NULL) {
4577				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4578				    BUS_DMASYNC_POSTREAD);
4579				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4580				rxbuf->m_head->m_flags |= M_PKTHDR;
4581				m_freem(rxbuf->m_head);
4582			}
4583			if (rxbuf->m_pack != NULL) {
4584				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4585				    BUS_DMASYNC_POSTREAD);
4586				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4587				rxbuf->m_pack->m_flags |= M_PKTHDR;
4588				m_freem(rxbuf->m_pack);
4589			}
4590			rxbuf->m_head = NULL;
4591			rxbuf->m_pack = NULL;
4592			if (rxbuf->hmap != NULL) {
4593				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4594				rxbuf->hmap = NULL;
4595			}
4596			if (rxbuf->pmap != NULL) {
4597				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4598				rxbuf->pmap = NULL;
4599			}
4600		}
4601		if (rxr->rx_buffers != NULL) {
4602			free(rxr->rx_buffers, M_DEVBUF);
4603			rxr->rx_buffers = NULL;
4604		}
4605	}
4606
4607	if (rxr->htag != NULL) {
4608		bus_dma_tag_destroy(rxr->htag);
4609		rxr->htag = NULL;
4610	}
4611	if (rxr->ptag != NULL) {
4612		bus_dma_tag_destroy(rxr->ptag);
4613		rxr->ptag = NULL;
4614	}
4615}
4616
4617static __inline void
4618igb_rx_discard(struct rx_ring *rxr, int i)
4619{
4620	struct igb_rx_buf	*rbuf;
4621
4622	rbuf = &rxr->rx_buffers[i];
4623
4624	/* Partially received? Free the chain */
4625	if (rxr->fmp != NULL) {
4626		rxr->fmp->m_flags |= M_PKTHDR;
4627		m_freem(rxr->fmp);
4628		rxr->fmp = NULL;
4629		rxr->lmp = NULL;
4630	}
4631
4632	/*
4633	** With advanced descriptors the writeback
4634	** clobbers the buffer addrs, so its easier
4635	** to just free the existing mbufs and take
4636	** the normal refresh path to get new buffers
4637	** and mapping.
4638	*/
4639	if (rbuf->m_head) {
4640		m_free(rbuf->m_head);
4641		rbuf->m_head = NULL;
4642	}
4643
4644	if (rbuf->m_pack) {
4645		m_free(rbuf->m_pack);
4646		rbuf->m_pack = NULL;
4647	}
4648
4649	return;
4650}
4651
4652static __inline void
4653igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4654{
4655
4656	/*
4657	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4658	 * should be computed by hardware. Also it should not have VLAN tag in
4659	 * ethernet header.
4660	 */
4661	if (rxr->lro_enabled &&
4662	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4663	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4664	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4665	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4666	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4667	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4668		/*
4669		 * Send to the stack if:
4670		 **  - LRO not enabled, or
4671		 **  - no LRO resources, or
4672		 **  - lro enqueue fails
4673		 */
4674		if (rxr->lro.lro_cnt != 0)
4675			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4676				return;
4677	}
4678	IGB_RX_UNLOCK(rxr);
4679	(*ifp->if_input)(ifp, m);
4680	IGB_RX_LOCK(rxr);
4681}
4682
4683/*********************************************************************
4684 *
4685 *  This routine executes in interrupt context. It replenishes
4686 *  the mbufs in the descriptor and sends data which has been
4687 *  dma'ed into host memory to upper layer.
4688 *
4689 *  We loop at most count times if count is > 0, or until done if
4690 *  count < 0.
4691 *
4692 *  Return TRUE if more to clean, FALSE otherwise
4693 *********************************************************************/
4694static bool
4695igb_rxeof(struct igb_queue *que, int count, int *done)
4696{
4697	struct adapter		*adapter = que->adapter;
4698	struct rx_ring		*rxr = que->rxr;
4699	struct ifnet		*ifp = adapter->ifp;
4700	struct lro_ctrl		*lro = &rxr->lro;
4701	struct lro_entry	*queued;
4702	int			i, processed = 0, rxdone = 0;
4703	u32			ptype, staterr = 0;
4704	union e1000_adv_rx_desc	*cur;
4705
4706	IGB_RX_LOCK(rxr);
4707	/* Sync the ring. */
4708	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4709	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4710
4711#ifdef DEV_NETMAP
4712	if (ifp->if_capenable & IFCAP_NETMAP) {
4713		struct netmap_adapter *na = NA(ifp);
4714
4715		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4716		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4717		IGB_RX_UNLOCK(rxr);
4718		IGB_CORE_LOCK(adapter);
4719		selwakeuppri(&na->rx_si, PI_NET);
4720		IGB_CORE_UNLOCK(adapter);
4721		return (0);
4722	}
4723#endif /* DEV_NETMAP */
4724
4725	/* Main clean loop */
4726	for (i = rxr->next_to_check; count != 0;) {
4727		struct mbuf		*sendmp, *mh, *mp;
4728		struct igb_rx_buf	*rxbuf;
4729		u16			hlen, plen, hdr, vtag;
4730		bool			eop = FALSE;
4731
4732		cur = &rxr->rx_base[i];
4733		staterr = le32toh(cur->wb.upper.status_error);
4734		if ((staterr & E1000_RXD_STAT_DD) == 0)
4735			break;
4736		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4737			break;
4738		count--;
4739		sendmp = mh = mp = NULL;
4740		cur->wb.upper.status_error = 0;
4741		rxbuf = &rxr->rx_buffers[i];
4742		plen = le16toh(cur->wb.upper.length);
4743		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4744		if ((adapter->hw.mac.type == e1000_i350) &&
4745		    (staterr & E1000_RXDEXT_STATERR_LB))
4746			vtag = be16toh(cur->wb.upper.vlan);
4747		else
4748			vtag = le16toh(cur->wb.upper.vlan);
4749		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4750		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4751
4752		/* Make sure all segments of a bad packet are discarded */
4753		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4754		    (rxr->discard)) {
4755			ifp->if_ierrors++;
4756			++rxr->rx_discarded;
4757			if (!eop) /* Catch subsequent segs */
4758				rxr->discard = TRUE;
4759			else
4760				rxr->discard = FALSE;
4761			igb_rx_discard(rxr, i);
4762			goto next_desc;
4763		}
4764
4765		/*
4766		** The way the hardware is configured to
4767		** split, it will ONLY use the header buffer
4768		** when header split is enabled, otherwise we
4769		** get normal behavior, ie, both header and
4770		** payload are DMA'd into the payload buffer.
4771		**
4772		** The fmp test is to catch the case where a
4773		** packet spans multiple descriptors, in that
4774		** case only the first header is valid.
4775		*/
4776		if (rxr->hdr_split && rxr->fmp == NULL) {
4777			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4778			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4779			if (hlen > IGB_HDR_BUF)
4780				hlen = IGB_HDR_BUF;
4781			mh = rxr->rx_buffers[i].m_head;
4782			mh->m_len = hlen;
4783			/* clear buf pointer for refresh */
4784			rxbuf->m_head = NULL;
4785			/*
4786			** Get the payload length, this
4787			** could be zero if its a small
4788			** packet.
4789			*/
4790			if (plen > 0) {
4791				mp = rxr->rx_buffers[i].m_pack;
4792				mp->m_len = plen;
4793				mh->m_next = mp;
4794				/* clear buf pointer */
4795				rxbuf->m_pack = NULL;
4796				rxr->rx_split_packets++;
4797			}
4798		} else {
4799			/*
4800			** Either no header split, or a
4801			** secondary piece of a fragmented
4802			** split packet.
4803			*/
4804			mh = rxr->rx_buffers[i].m_pack;
4805			mh->m_len = plen;
4806			/* clear buf info for refresh */
4807			rxbuf->m_pack = NULL;
4808		}
4809
4810		++processed; /* So we know when to refresh */
4811
4812		/* Initial frame - setup */
4813		if (rxr->fmp == NULL) {
4814			mh->m_pkthdr.len = mh->m_len;
4815			/* Save the head of the chain */
4816			rxr->fmp = mh;
4817			rxr->lmp = mh;
4818			if (mp != NULL) {
4819				/* Add payload if split */
4820				mh->m_pkthdr.len += mp->m_len;
4821				rxr->lmp = mh->m_next;
4822			}
4823		} else {
4824			/* Chain mbuf's together */
4825			rxr->lmp->m_next = mh;
4826			rxr->lmp = rxr->lmp->m_next;
4827			rxr->fmp->m_pkthdr.len += mh->m_len;
4828		}
4829
4830		if (eop) {
4831			rxr->fmp->m_pkthdr.rcvif = ifp;
4832			ifp->if_ipackets++;
4833			rxr->rx_packets++;
4834			/* capture data for AIM */
4835			rxr->packets++;
4836			rxr->bytes += rxr->fmp->m_pkthdr.len;
4837			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4838
4839			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4840				igb_rx_checksum(staterr, rxr->fmp, ptype);
4841
4842			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4843			    (staterr & E1000_RXD_STAT_VP) != 0) {
4844				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4845				rxr->fmp->m_flags |= M_VLANTAG;
4846			}
4847#if __FreeBSD_version >= 800000
4848			rxr->fmp->m_pkthdr.flowid = que->msix;
4849			rxr->fmp->m_flags |= M_FLOWID;
4850#endif
4851			sendmp = rxr->fmp;
4852			/* Make sure to set M_PKTHDR. */
4853			sendmp->m_flags |= M_PKTHDR;
4854			rxr->fmp = NULL;
4855			rxr->lmp = NULL;
4856		}
4857
4858next_desc:
4859		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4860		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4861
4862		/* Advance our pointers to the next descriptor. */
4863		if (++i == adapter->num_rx_desc)
4864			i = 0;
4865		/*
4866		** Send to the stack or LRO
4867		*/
4868		if (sendmp != NULL) {
4869			rxr->next_to_check = i;
4870			igb_rx_input(rxr, ifp, sendmp, ptype);
4871			i = rxr->next_to_check;
4872			rxdone++;
4873		}
4874
4875		/* Every 8 descriptors we go to refresh mbufs */
4876		if (processed == 8) {
4877                        igb_refresh_mbufs(rxr, i);
4878                        processed = 0;
4879		}
4880	}
4881
4882	/* Catch any remainders */
4883	if (igb_rx_unrefreshed(rxr))
4884		igb_refresh_mbufs(rxr, i);
4885
4886	rxr->next_to_check = i;
4887
4888	/*
4889	 * Flush any outstanding LRO work
4890	 */
4891	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4892		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4893		tcp_lro_flush(lro, queued);
4894	}
4895
4896	if (done != NULL)
4897		*done = rxdone;
4898
4899	IGB_RX_UNLOCK(rxr);
4900	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4901}
4902
4903/*********************************************************************
4904 *
4905 *  Verify that the hardware indicated that the checksum is valid.
4906 *  Inform the stack about the status of checksum so that stack
4907 *  doesn't spend time verifying the checksum.
4908 *
4909 *********************************************************************/
4910static void
4911igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4912{
4913	u16 status = (u16)staterr;
4914	u8  errors = (u8) (staterr >> 24);
4915	int sctp;
4916
4917	/* Ignore Checksum bit is set */
4918	if (status & E1000_RXD_STAT_IXSM) {
4919		mp->m_pkthdr.csum_flags = 0;
4920		return;
4921	}
4922
4923	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4924	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4925		sctp = 1;
4926	else
4927		sctp = 0;
4928	if (status & E1000_RXD_STAT_IPCS) {
4929		/* Did it pass? */
4930		if (!(errors & E1000_RXD_ERR_IPE)) {
4931			/* IP Checksum Good */
4932			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4933			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4934		} else
4935			mp->m_pkthdr.csum_flags = 0;
4936	}
4937
4938	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4939		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4940#if __FreeBSD_version >= 800000
4941		if (sctp) /* reassign */
4942			type = CSUM_SCTP_VALID;
4943#endif
4944		/* Did it pass? */
4945		if (!(errors & E1000_RXD_ERR_TCPE)) {
4946			mp->m_pkthdr.csum_flags |= type;
4947			if (sctp == 0)
4948				mp->m_pkthdr.csum_data = htons(0xffff);
4949		}
4950	}
4951	return;
4952}
4953
4954/*
4955 * This routine is run via an vlan
4956 * config EVENT
4957 */
4958static void
4959igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4960{
4961	struct adapter	*adapter = ifp->if_softc;
4962	u32		index, bit;
4963
4964	if (ifp->if_softc !=  arg)   /* Not our event */
4965		return;
4966
4967	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4968                return;
4969
4970	IGB_CORE_LOCK(adapter);
4971	index = (vtag >> 5) & 0x7F;
4972	bit = vtag & 0x1F;
4973	adapter->shadow_vfta[index] |= (1 << bit);
4974	++adapter->num_vlans;
4975	/* Change hw filter setting */
4976	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4977		igb_setup_vlan_hw_support(adapter);
4978	IGB_CORE_UNLOCK(adapter);
4979}
4980
4981/*
4982 * This routine is run via an vlan
4983 * unconfig EVENT
4984 */
4985static void
4986igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4987{
4988	struct adapter	*adapter = ifp->if_softc;
4989	u32		index, bit;
4990
4991	if (ifp->if_softc !=  arg)
4992		return;
4993
4994	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4995                return;
4996
4997	IGB_CORE_LOCK(adapter);
4998	index = (vtag >> 5) & 0x7F;
4999	bit = vtag & 0x1F;
5000	adapter->shadow_vfta[index] &= ~(1 << bit);
5001	--adapter->num_vlans;
5002	/* Change hw filter setting */
5003	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5004		igb_setup_vlan_hw_support(adapter);
5005	IGB_CORE_UNLOCK(adapter);
5006}
5007
5008static void
5009igb_setup_vlan_hw_support(struct adapter *adapter)
5010{
5011	struct e1000_hw *hw = &adapter->hw;
5012	struct ifnet	*ifp = adapter->ifp;
5013	u32             reg;
5014
5015	if (adapter->vf_ifp) {
5016		e1000_rlpml_set_vf(hw,
5017		    adapter->max_frame_size + VLAN_TAG_SIZE);
5018		return;
5019	}
5020
5021	reg = E1000_READ_REG(hw, E1000_CTRL);
5022	reg |= E1000_CTRL_VME;
5023	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5024
5025	/* Enable the Filter Table */
5026	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5027		reg = E1000_READ_REG(hw, E1000_RCTL);
5028		reg &= ~E1000_RCTL_CFIEN;
5029		reg |= E1000_RCTL_VFE;
5030		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5031	}
5032
5033	/* Update the frame size */
5034	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5035	    adapter->max_frame_size + VLAN_TAG_SIZE);
5036
5037	/* Don't bother with table if no vlans */
5038	if ((adapter->num_vlans == 0) ||
5039	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5040                return;
5041	/*
5042	** A soft reset zero's out the VFTA, so
5043	** we need to repopulate it now.
5044	*/
5045	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5046                if (adapter->shadow_vfta[i] != 0) {
5047			if (adapter->vf_ifp)
5048				e1000_vfta_set_vf(hw,
5049				    adapter->shadow_vfta[i], TRUE);
5050			else
5051				e1000_write_vfta(hw,
5052				    i, adapter->shadow_vfta[i]);
5053		}
5054}
5055
5056static void
5057igb_enable_intr(struct adapter *adapter)
5058{
5059	/* With RSS set up what to auto clear */
5060	if (adapter->msix_mem) {
5061		u32 mask = (adapter->que_mask | adapter->link_mask);
5062		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5063		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5064		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5065		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5066		    E1000_IMS_LSC);
5067	} else {
5068		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5069		    IMS_ENABLE_MASK);
5070	}
5071	E1000_WRITE_FLUSH(&adapter->hw);
5072
5073	return;
5074}
5075
5076static void
5077igb_disable_intr(struct adapter *adapter)
5078{
5079	if (adapter->msix_mem) {
5080		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5081		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5082	}
5083	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5084	E1000_WRITE_FLUSH(&adapter->hw);
5085	return;
5086}
5087
5088/*
5089 * Bit of a misnomer, what this really means is
5090 * to enable OS management of the system... aka
5091 * to disable special hardware management features
5092 */
5093static void
5094igb_init_manageability(struct adapter *adapter)
5095{
5096	if (adapter->has_manage) {
5097		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5098		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5099
5100		/* disable hardware interception of ARP */
5101		manc &= ~(E1000_MANC_ARP_EN);
5102
5103                /* enable receiving management packets to the host */
5104		manc |= E1000_MANC_EN_MNG2HOST;
5105		manc2h |= 1 << 5;  /* Mng Port 623 */
5106		manc2h |= 1 << 6;  /* Mng Port 664 */
5107		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5108		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5109	}
5110}
5111
5112/*
5113 * Give control back to hardware management
5114 * controller if there is one.
5115 */
5116static void
5117igb_release_manageability(struct adapter *adapter)
5118{
5119	if (adapter->has_manage) {
5120		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5121
5122		/* re-enable hardware interception of ARP */
5123		manc |= E1000_MANC_ARP_EN;
5124		manc &= ~E1000_MANC_EN_MNG2HOST;
5125
5126		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5127	}
5128}
5129
5130/*
5131 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5132 * For ASF and Pass Through versions of f/w this means that
5133 * the driver is loaded.
5134 *
5135 */
5136static void
5137igb_get_hw_control(struct adapter *adapter)
5138{
5139	u32 ctrl_ext;
5140
5141	if (adapter->vf_ifp)
5142		return;
5143
5144	/* Let firmware know the driver has taken over */
5145	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5146	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5147	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5148}
5149
5150/*
5151 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5152 * For ASF and Pass Through versions of f/w this means that the
5153 * driver is no longer loaded.
5154 *
5155 */
5156static void
5157igb_release_hw_control(struct adapter *adapter)
5158{
5159	u32 ctrl_ext;
5160
5161	if (adapter->vf_ifp)
5162		return;
5163
5164	/* Let firmware taken over control of h/w */
5165	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5166	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5167	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5168}
5169
5170static int
5171igb_is_valid_ether_addr(uint8_t *addr)
5172{
5173	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5174
5175	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5176		return (FALSE);
5177	}
5178
5179	return (TRUE);
5180}
5181
5182
5183/*
5184 * Enable PCI Wake On Lan capability
5185 */
5186static void
5187igb_enable_wakeup(device_t dev)
5188{
5189	u16     cap, status;
5190	u8      id;
5191
5192	/* First find the capabilities pointer*/
5193	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5194	/* Read the PM Capabilities */
5195	id = pci_read_config(dev, cap, 1);
5196	if (id != PCIY_PMG)     /* Something wrong */
5197		return;
5198	/* OK, we have the power capabilities, so
5199	   now get the status register */
5200	cap += PCIR_POWER_STATUS;
5201	status = pci_read_config(dev, cap, 2);
5202	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5203	pci_write_config(dev, cap, status, 2);
5204	return;
5205}
5206
5207static void
5208igb_led_func(void *arg, int onoff)
5209{
5210	struct adapter	*adapter = arg;
5211
5212	IGB_CORE_LOCK(adapter);
5213	if (onoff) {
5214		e1000_setup_led(&adapter->hw);
5215		e1000_led_on(&adapter->hw);
5216	} else {
5217		e1000_led_off(&adapter->hw);
5218		e1000_cleanup_led(&adapter->hw);
5219	}
5220	IGB_CORE_UNLOCK(adapter);
5221}
5222
5223/**********************************************************************
5224 *
5225 *  Update the board statistics counters.
5226 *
5227 **********************************************************************/
5228static void
5229igb_update_stats_counters(struct adapter *adapter)
5230{
5231	struct ifnet		*ifp;
5232        struct e1000_hw		*hw = &adapter->hw;
5233	struct e1000_hw_stats	*stats;
5234
5235	/*
5236	** The virtual function adapter has only a
5237	** small controlled set of stats, do only
5238	** those and return.
5239	*/
5240	if (adapter->vf_ifp) {
5241		igb_update_vf_stats_counters(adapter);
5242		return;
5243	}
5244
5245	stats = (struct e1000_hw_stats	*)adapter->stats;
5246
5247	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5248	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5249		stats->symerrs +=
5250		    E1000_READ_REG(hw,E1000_SYMERRS);
5251		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5252	}
5253
5254	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5255	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5256	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5257	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5258
5259	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5260	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5261	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5262	stats->dc += E1000_READ_REG(hw, E1000_DC);
5263	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5264	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5265	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5266	/*
5267	** For watchdog management we need to know if we have been
5268	** paused during the last interval, so capture that here.
5269	*/
5270        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5271        stats->xoffrxc += adapter->pause_frames;
5272	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5273	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5274	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5275	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5276	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5277	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5278	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5279	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5280	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5281	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5282	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5283	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5284
5285	/* For the 64-bit byte counters the low dword must be read first. */
5286	/* Both registers clear on the read of the high dword */
5287
5288	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5289	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5290	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5291	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5292
5293	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5294	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5295	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5296	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5297	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5298
5299	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5300	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5301
5302	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5303	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5304	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5305	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5306	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5307	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5308	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5309	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5310	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5311	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5312
5313	/* Interrupt Counts */
5314
5315	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5316	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5317	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5318	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5319	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5320	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5321	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5322	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5323	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5324
5325	/* Host to Card Statistics */
5326
5327	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5328	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5329	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5330	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5331	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5332	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5333	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5334	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5335	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5336	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5337	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5338	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5339	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5340	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5341
5342	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5343	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5344	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5345	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5346	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5347	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5348
5349	ifp = adapter->ifp;
5350	ifp->if_collisions = stats->colc;
5351
5352	/* Rx Errors */
5353	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5354	    stats->crcerrs + stats->algnerrc +
5355	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5356
5357	/* Tx Errors */
5358	ifp->if_oerrors = stats->ecol +
5359	    stats->latecol + adapter->watchdog_events;
5360
5361	/* Driver specific counters */
5362	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5363	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5364	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5365	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5366	adapter->packet_buf_alloc_tx =
5367	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5368	adapter->packet_buf_alloc_rx =
5369	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5370}
5371
5372
5373/**********************************************************************
5374 *
5375 *  Initialize the VF board statistics counters.
5376 *
5377 **********************************************************************/
5378static void
5379igb_vf_init_stats(struct adapter *adapter)
5380{
5381        struct e1000_hw *hw = &adapter->hw;
5382	struct e1000_vf_stats	*stats;
5383
5384	stats = (struct e1000_vf_stats	*)adapter->stats;
5385	if (stats == NULL)
5386		return;
5387        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5388        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5389        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5390        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5391        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5392}
5393
5394/**********************************************************************
5395 *
5396 *  Update the VF board statistics counters.
5397 *
5398 **********************************************************************/
5399static void
5400igb_update_vf_stats_counters(struct adapter *adapter)
5401{
5402	struct e1000_hw *hw = &adapter->hw;
5403	struct e1000_vf_stats	*stats;
5404
5405	if (adapter->link_speed == 0)
5406		return;
5407
5408	stats = (struct e1000_vf_stats	*)adapter->stats;
5409
5410	UPDATE_VF_REG(E1000_VFGPRC,
5411	    stats->last_gprc, stats->gprc);
5412	UPDATE_VF_REG(E1000_VFGORC,
5413	    stats->last_gorc, stats->gorc);
5414	UPDATE_VF_REG(E1000_VFGPTC,
5415	    stats->last_gptc, stats->gptc);
5416	UPDATE_VF_REG(E1000_VFGOTC,
5417	    stats->last_gotc, stats->gotc);
5418	UPDATE_VF_REG(E1000_VFMPRC,
5419	    stats->last_mprc, stats->mprc);
5420}
5421
5422/* Export a single 32-bit register via a read-only sysctl. */
5423static int
5424igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5425{
5426	struct adapter *adapter;
5427	u_int val;
5428
5429	adapter = oidp->oid_arg1;
5430	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5431	return (sysctl_handle_int(oidp, &val, 0, req));
5432}
5433
5434/*
5435**  Tuneable interrupt rate handler
5436*/
5437static int
5438igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5439{
5440	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5441	int			error;
5442	u32			reg, usec, rate;
5443
5444	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5445	usec = ((reg & 0x7FFC) >> 2);
5446	if (usec > 0)
5447		rate = 1000000 / usec;
5448	else
5449		rate = 0;
5450	error = sysctl_handle_int(oidp, &rate, 0, req);
5451	if (error || !req->newptr)
5452		return error;
5453	return 0;
5454}
5455
5456/*
5457 * Add sysctl variables, one per statistic, to the system.
5458 */
5459static void
5460igb_add_hw_stats(struct adapter *adapter)
5461{
5462	device_t dev = adapter->dev;
5463
5464	struct tx_ring *txr = adapter->tx_rings;
5465	struct rx_ring *rxr = adapter->rx_rings;
5466
5467	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5468	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5469	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5470	struct e1000_hw_stats *stats = adapter->stats;
5471
5472	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5473	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5474
5475#define QUEUE_NAME_LEN 32
5476	char namebuf[QUEUE_NAME_LEN];
5477
5478	/* Driver Statistics */
5479	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5480			CTLFLAG_RD, &adapter->link_irq, 0,
5481			"Link MSIX IRQ Handled");
5482	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5483			CTLFLAG_RD, &adapter->dropped_pkts,
5484			"Driver dropped packets");
5485	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5486			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5487			"Driver tx dma failure in xmit");
5488	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5489			CTLFLAG_RD, &adapter->rx_overruns,
5490			"RX overruns");
5491	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5492			CTLFLAG_RD, &adapter->watchdog_events,
5493			"Watchdog timeouts");
5494
5495	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5496			CTLFLAG_RD, &adapter->device_control,
5497			"Device Control Register");
5498	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5499			CTLFLAG_RD, &adapter->rx_control,
5500			"Receiver Control Register");
5501	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5502			CTLFLAG_RD, &adapter->int_mask,
5503			"Interrupt Mask");
5504	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5505			CTLFLAG_RD, &adapter->eint_mask,
5506			"Extended Interrupt Mask");
5507	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5508			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5509			"Transmit Buffer Packet Allocation");
5510	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5511			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5512			"Receive Buffer Packet Allocation");
5513	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5514			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5515			"Flow Control High Watermark");
5516	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5517			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5518			"Flow Control Low Watermark");
5519
5520	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5521		struct lro_ctrl *lro = &rxr->lro;
5522
5523		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5524		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5525					    CTLFLAG_RD, NULL, "Queue Name");
5526		queue_list = SYSCTL_CHILDREN(queue_node);
5527
5528		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5529				CTLFLAG_RD, &adapter->queues[i],
5530				sizeof(&adapter->queues[i]),
5531				igb_sysctl_interrupt_rate_handler,
5532				"IU", "Interrupt Rate");
5533
5534		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5535				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5536				igb_sysctl_reg_handler, "IU",
5537 				"Transmit Descriptor Head");
5538		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5539				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5540				igb_sysctl_reg_handler, "IU",
5541 				"Transmit Descriptor Tail");
5542		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5543				CTLFLAG_RD, &txr->no_desc_avail,
5544				"Queue No Descriptor Available");
5545		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5546				CTLFLAG_RD, &txr->tx_packets,
5547				"Queue Packets Transmitted");
5548
5549		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5550				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5551				igb_sysctl_reg_handler, "IU",
5552				"Receive Descriptor Head");
5553		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5554				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5555				igb_sysctl_reg_handler, "IU",
5556				"Receive Descriptor Tail");
5557		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5558				CTLFLAG_RD, &rxr->rx_packets,
5559				"Queue Packets Received");
5560		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5561				CTLFLAG_RD, &rxr->rx_bytes,
5562				"Queue Bytes Received");
5563		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5564				CTLFLAG_RD, &lro->lro_queued, 0,
5565				"LRO Queued");
5566		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5567				CTLFLAG_RD, &lro->lro_flushed, 0,
5568				"LRO Flushed");
5569	}
5570
5571	/* MAC stats get their own sub node */
5572
5573	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5574				    CTLFLAG_RD, NULL, "MAC Statistics");
5575	stat_list = SYSCTL_CHILDREN(stat_node);
5576
5577	/*
5578	** VF adapter has a very limited set of stats
5579	** since its not managing the metal, so to speak.
5580	*/
5581	if (adapter->vf_ifp) {
5582	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5583			CTLFLAG_RD, &stats->gprc,
5584			"Good Packets Received");
5585	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5586			CTLFLAG_RD, &stats->gptc,
5587			"Good Packets Transmitted");
5588 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5589 			CTLFLAG_RD, &stats->gorc,
5590 			"Good Octets Received");
5591 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5592 			CTLFLAG_RD, &stats->gotc,
5593 			"Good Octets Transmitted");
5594	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5595			CTLFLAG_RD, &stats->mprc,
5596			"Multicast Packets Received");
5597		return;
5598	}
5599
5600	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5601			CTLFLAG_RD, &stats->ecol,
5602			"Excessive collisions");
5603	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5604			CTLFLAG_RD, &stats->scc,
5605			"Single collisions");
5606	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5607			CTLFLAG_RD, &stats->mcc,
5608			"Multiple collisions");
5609	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5610			CTLFLAG_RD, &stats->latecol,
5611			"Late collisions");
5612	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5613			CTLFLAG_RD, &stats->colc,
5614			"Collision Count");
5615	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5616			CTLFLAG_RD, &stats->symerrs,
5617			"Symbol Errors");
5618	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5619			CTLFLAG_RD, &stats->sec,
5620			"Sequence Errors");
5621	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5622			CTLFLAG_RD, &stats->dc,
5623			"Defer Count");
5624	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5625			CTLFLAG_RD, &stats->mpc,
5626			"Missed Packets");
5627	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5628			CTLFLAG_RD, &stats->rnbc,
5629			"Receive No Buffers");
5630	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5631			CTLFLAG_RD, &stats->ruc,
5632			"Receive Undersize");
5633	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5634			CTLFLAG_RD, &stats->rfc,
5635			"Fragmented Packets Received ");
5636	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5637			CTLFLAG_RD, &stats->roc,
5638			"Oversized Packets Received");
5639	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5640			CTLFLAG_RD, &stats->rjc,
5641			"Recevied Jabber");
5642	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5643			CTLFLAG_RD, &stats->rxerrc,
5644			"Receive Errors");
5645	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5646			CTLFLAG_RD, &stats->crcerrs,
5647			"CRC errors");
5648	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5649			CTLFLAG_RD, &stats->algnerrc,
5650			"Alignment Errors");
5651	/* On 82575 these are collision counts */
5652	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5653			CTLFLAG_RD, &stats->cexterr,
5654			"Collision/Carrier extension errors");
5655	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5656			CTLFLAG_RD, &stats->xonrxc,
5657			"XON Received");
5658	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5659			CTLFLAG_RD, &stats->xontxc,
5660			"XON Transmitted");
5661	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5662			CTLFLAG_RD, &stats->xoffrxc,
5663			"XOFF Received");
5664	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5665			CTLFLAG_RD, &stats->xofftxc,
5666			"XOFF Transmitted");
5667	/* Packet Reception Stats */
5668	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5669			CTLFLAG_RD, &stats->tpr,
5670			"Total Packets Received ");
5671	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5672			CTLFLAG_RD, &stats->gprc,
5673			"Good Packets Received");
5674	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5675			CTLFLAG_RD, &stats->bprc,
5676			"Broadcast Packets Received");
5677	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5678			CTLFLAG_RD, &stats->mprc,
5679			"Multicast Packets Received");
5680	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5681			CTLFLAG_RD, &stats->prc64,
5682			"64 byte frames received ");
5683	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5684			CTLFLAG_RD, &stats->prc127,
5685			"65-127 byte frames received");
5686	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5687			CTLFLAG_RD, &stats->prc255,
5688			"128-255 byte frames received");
5689	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5690			CTLFLAG_RD, &stats->prc511,
5691			"256-511 byte frames received");
5692	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5693			CTLFLAG_RD, &stats->prc1023,
5694			"512-1023 byte frames received");
5695	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5696			CTLFLAG_RD, &stats->prc1522,
5697			"1023-1522 byte frames received");
5698 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5699 			CTLFLAG_RD, &stats->gorc,
5700 			"Good Octets Received");
5701
5702	/* Packet Transmission Stats */
5703 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5704 			CTLFLAG_RD, &stats->gotc,
5705 			"Good Octets Transmitted");
5706	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5707			CTLFLAG_RD, &stats->tpt,
5708			"Total Packets Transmitted");
5709	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5710			CTLFLAG_RD, &stats->gptc,
5711			"Good Packets Transmitted");
5712	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5713			CTLFLAG_RD, &stats->bptc,
5714			"Broadcast Packets Transmitted");
5715	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5716			CTLFLAG_RD, &stats->mptc,
5717			"Multicast Packets Transmitted");
5718	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5719			CTLFLAG_RD, &stats->ptc64,
5720			"64 byte frames transmitted ");
5721	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5722			CTLFLAG_RD, &stats->ptc127,
5723			"65-127 byte frames transmitted");
5724	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5725			CTLFLAG_RD, &stats->ptc255,
5726			"128-255 byte frames transmitted");
5727	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5728			CTLFLAG_RD, &stats->ptc511,
5729			"256-511 byte frames transmitted");
5730	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5731			CTLFLAG_RD, &stats->ptc1023,
5732			"512-1023 byte frames transmitted");
5733	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5734			CTLFLAG_RD, &stats->ptc1522,
5735			"1024-1522 byte frames transmitted");
5736	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5737			CTLFLAG_RD, &stats->tsctc,
5738			"TSO Contexts Transmitted");
5739	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5740			CTLFLAG_RD, &stats->tsctfc,
5741			"TSO Contexts Failed");
5742
5743
5744	/* Interrupt Stats */
5745
5746	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5747				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5748	int_list = SYSCTL_CHILDREN(int_node);
5749
5750	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5751			CTLFLAG_RD, &stats->iac,
5752			"Interrupt Assertion Count");
5753
5754	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5755			CTLFLAG_RD, &stats->icrxptc,
5756			"Interrupt Cause Rx Pkt Timer Expire Count");
5757
5758	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5759			CTLFLAG_RD, &stats->icrxatc,
5760			"Interrupt Cause Rx Abs Timer Expire Count");
5761
5762	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5763			CTLFLAG_RD, &stats->ictxptc,
5764			"Interrupt Cause Tx Pkt Timer Expire Count");
5765
5766	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5767			CTLFLAG_RD, &stats->ictxatc,
5768			"Interrupt Cause Tx Abs Timer Expire Count");
5769
5770	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5771			CTLFLAG_RD, &stats->ictxqec,
5772			"Interrupt Cause Tx Queue Empty Count");
5773
5774	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5775			CTLFLAG_RD, &stats->ictxqmtc,
5776			"Interrupt Cause Tx Queue Min Thresh Count");
5777
5778	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5779			CTLFLAG_RD, &stats->icrxdmtc,
5780			"Interrupt Cause Rx Desc Min Thresh Count");
5781
5782	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5783			CTLFLAG_RD, &stats->icrxoc,
5784			"Interrupt Cause Receiver Overrun Count");
5785
5786	/* Host to Card Stats */
5787
5788	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5789				    CTLFLAG_RD, NULL,
5790				    "Host to Card Statistics");
5791
5792	host_list = SYSCTL_CHILDREN(host_node);
5793
5794	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5795			CTLFLAG_RD, &stats->cbtmpc,
5796			"Circuit Breaker Tx Packet Count");
5797
5798	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5799			CTLFLAG_RD, &stats->htdpmc,
5800			"Host Transmit Discarded Packets");
5801
5802	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5803			CTLFLAG_RD, &stats->rpthc,
5804			"Rx Packets To Host");
5805
5806	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5807			CTLFLAG_RD, &stats->cbrmpc,
5808			"Circuit Breaker Rx Packet Count");
5809
5810	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5811			CTLFLAG_RD, &stats->cbrdpc,
5812			"Circuit Breaker Rx Dropped Count");
5813
5814	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5815			CTLFLAG_RD, &stats->hgptc,
5816			"Host Good Packets Tx Count");
5817
5818	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5819			CTLFLAG_RD, &stats->htcbdpc,
5820			"Host Tx Circuit Breaker Dropped Count");
5821
5822	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5823			CTLFLAG_RD, &stats->hgorc,
5824			"Host Good Octets Received Count");
5825
5826	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5827			CTLFLAG_RD, &stats->hgotc,
5828			"Host Good Octets Transmit Count");
5829
5830	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5831			CTLFLAG_RD, &stats->lenerrs,
5832			"Length Errors");
5833
5834	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5835			CTLFLAG_RD, &stats->scvpc,
5836			"SerDes/SGMII Code Violation Pkt Count");
5837
5838	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5839			CTLFLAG_RD, &stats->hrmpc,
5840			"Header Redirection Missed Packet Count");
5841}
5842
5843
5844/**********************************************************************
5845 *
5846 *  This routine provides a way to dump out the adapter eeprom,
5847 *  often a useful debug/service tool. This only dumps the first
5848 *  32 words, stuff that matters is in that extent.
5849 *
5850 **********************************************************************/
5851static int
5852igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5853{
5854	struct adapter *adapter;
5855	int error;
5856	int result;
5857
5858	result = -1;
5859	error = sysctl_handle_int(oidp, &result, 0, req);
5860
5861	if (error || !req->newptr)
5862		return (error);
5863
5864	/*
5865	 * This value will cause a hex dump of the
5866	 * first 32 16-bit words of the EEPROM to
5867	 * the screen.
5868	 */
5869	if (result == 1) {
5870		adapter = (struct adapter *)arg1;
5871		igb_print_nvm_info(adapter);
5872        }
5873
5874	return (error);
5875}
5876
5877static void
5878igb_print_nvm_info(struct adapter *adapter)
5879{
5880	u16	eeprom_data;
5881	int	i, j, row = 0;
5882
5883	/* Its a bit crude, but it gets the job done */
5884	printf("\nInterface EEPROM Dump:\n");
5885	printf("Offset\n0x0000  ");
5886	for (i = 0, j = 0; i < 32; i++, j++) {
5887		if (j == 8) { /* Make the offset block */
5888			j = 0; ++row;
5889			printf("\n0x00%x0  ",row);
5890		}
5891		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5892		printf("%04x ", eeprom_data);
5893	}
5894	printf("\n");
5895}
5896
5897static void
5898igb_set_sysctl_value(struct adapter *adapter, const char *name,
5899	const char *description, int *limit, int value)
5900{
5901	*limit = value;
5902	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5903	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5904	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5905}
5906
5907/*
5908** Set flow control using sysctl:
5909** Flow control values:
5910** 	0 - off
5911**	1 - rx pause
5912**	2 - tx pause
5913**	3 - full
5914*/
5915static int
5916igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5917{
5918	int		error;
5919	static int	input = 3; /* default is full */
5920	struct adapter	*adapter = (struct adapter *) arg1;
5921
5922	error = sysctl_handle_int(oidp, &input, 0, req);
5923
5924	if ((error) || (req->newptr == NULL))
5925		return (error);
5926
5927	switch (input) {
5928		case e1000_fc_rx_pause:
5929		case e1000_fc_tx_pause:
5930		case e1000_fc_full:
5931		case e1000_fc_none:
5932			adapter->hw.fc.requested_mode = input;
5933			adapter->fc = input;
5934			break;
5935		default:
5936			/* Do nothing */
5937			return (error);
5938	}
5939
5940	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5941	e1000_force_mac_fc(&adapter->hw);
5942	return (error);
5943}
5944
5945/*
5946** Manage DMA Coalesce:
5947** Control values:
5948** 	0/1 - off/on
5949**	Legal timer values are:
5950**	250,500,1000-10000 in thousands
5951*/
5952static int
5953igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5954{
5955	struct adapter *adapter = (struct adapter *) arg1;
5956	int		error;
5957
5958	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5959
5960	if ((error) || (req->newptr == NULL))
5961		return (error);
5962
5963	switch (adapter->dmac) {
5964		case 0:
5965			/*Disabling */
5966			break;
5967		case 1: /* Just enable and use default */
5968			adapter->dmac = 1000;
5969			break;
5970		case 250:
5971		case 500:
5972		case 1000:
5973		case 2000:
5974		case 3000:
5975		case 4000:
5976		case 5000:
5977		case 6000:
5978		case 7000:
5979		case 8000:
5980		case 9000:
5981		case 10000:
5982			/* Legal values - allow */
5983			break;
5984		default:
5985			/* Do nothing, illegal value */
5986			adapter->dmac = 0;
5987			return (error);
5988	}
5989	/* Reinit the interface */
5990	igb_init(adapter);
5991	return (error);
5992}
5993
5994/*
5995** Manage Energy Efficient Ethernet:
5996** Control values:
5997**     0/1 - enabled/disabled
5998*/
5999static int
6000igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6001{
6002	struct adapter	*adapter = (struct adapter *) arg1;
6003	int		error, value;
6004
6005	value = adapter->hw.dev_spec._82575.eee_disable;
6006	error = sysctl_handle_int(oidp, &value, 0, req);
6007	if (error || req->newptr == NULL)
6008		return (error);
6009	IGB_CORE_LOCK(adapter);
6010	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6011	igb_init_locked(adapter);
6012	IGB_CORE_UNLOCK(adapter);
6013	return (0);
6014}
6015