if_igb.c revision 239109
1/******************************************************************************
2
3  Copyright (c) 2001-2012, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 239109 2012-08-06 22:43:49Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#if __FreeBSD_version >= 800000
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *,
185		    struct tx_ring *, struct mbuf *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282#ifdef DEVICE_POLLING
283static poll_handler_t igb_poll;
284#endif /* POLLING */
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t igb_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, igb_probe),
293	DEVMETHOD(device_attach, igb_attach),
294	DEVMETHOD(device_detach, igb_detach),
295	DEVMETHOD(device_shutdown, igb_shutdown),
296	DEVMETHOD(device_suspend, igb_suspend),
297	DEVMETHOD(device_resume, igb_resume),
298	{0, 0}
299};
300
301static driver_t igb_driver = {
302	"igb", igb_methods, sizeof(struct adapter),
303};
304
305static devclass_t igb_devclass;
306DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307MODULE_DEPEND(igb, pci, 1, 1, 1);
308MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316/* Descriptor defaults */
317static int igb_rxd = IGB_DEFAULT_RXD;
318static int igb_txd = IGB_DEFAULT_TXD;
319TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320TUNABLE_INT("hw.igb.txd", &igb_txd);
321SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322    "Number of receive descriptors per queue");
323SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324    "Number of transmit descriptors per queue");
325
326/*
327** AIM: Adaptive Interrupt Moderation
328** which means that the interrupt rate
329** is varied over time based on the
330** traffic for that interrupt vector
331*/
332static int igb_enable_aim = TRUE;
333TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335    "Enable adaptive interrupt moderation");
336
337/*
338 * MSIX should be the default for best performance,
339 * but this allows it to be forced off for testing.
340 */
341static int igb_enable_msix = 1;
342TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344    "Enable MSI-X interrupts");
345
346/*
347** Tuneable Interrupt rate
348*/
349static int igb_max_interrupt_rate = 8000;
350TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354/*
355** Header split causes the packet header to
356** be dma'd to a seperate mbuf from the payload.
357** this can have memory alignment benefits. But
358** another plus is that small packets often fit
359** into the header and thus use no cluster. Its
360** a very workload dependent type feature.
361*/
362static int igb_header_split = FALSE;
363TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
364SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
365    "Enable receive mbuf header split");
366
367/*
368** This will autoconfigure based on
369** the number of CPUs if left at 0.
370*/
371static int igb_num_queues = 0;
372TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
373SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
374    "Number of queues to configure, 0 indicates autoconfigure");
375
376/*
377** Global variable to store last used CPU when binding queues
378** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
379** queue is bound to a cpu.
380*/
381static int igb_last_bind_cpu = -1;
382
383/* How many packets rxeof tries to clean at a time */
384static int igb_rx_process_limit = 100;
385TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
386SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387    &igb_rx_process_limit, 0,
388    "Maximum number of received packets to process at a time, -1 means unlimited");
389
390#ifdef DEV_NETMAP	/* see ixgbe.c for details */
391#include <dev/netmap/if_igb_netmap.h>
392#endif /* DEV_NETMAP */
393/*********************************************************************
394 *  Device identification routine
395 *
396 *  igb_probe determines if the driver should be loaded on
397 *  adapter based on PCI vendor/device id of the adapter.
398 *
399 *  return BUS_PROBE_DEFAULT on success, positive on failure
400 *********************************************************************/
401
402static int
403igb_probe(device_t dev)
404{
405	char		adapter_name[60];
406	uint16_t	pci_vendor_id = 0;
407	uint16_t	pci_device_id = 0;
408	uint16_t	pci_subvendor_id = 0;
409	uint16_t	pci_subdevice_id = 0;
410	igb_vendor_info_t *ent;
411
412	INIT_DEBUGOUT("igb_probe: begin");
413
414	pci_vendor_id = pci_get_vendor(dev);
415	if (pci_vendor_id != IGB_VENDOR_ID)
416		return (ENXIO);
417
418	pci_device_id = pci_get_device(dev);
419	pci_subvendor_id = pci_get_subvendor(dev);
420	pci_subdevice_id = pci_get_subdevice(dev);
421
422	ent = igb_vendor_info_array;
423	while (ent->vendor_id != 0) {
424		if ((pci_vendor_id == ent->vendor_id) &&
425		    (pci_device_id == ent->device_id) &&
426
427		    ((pci_subvendor_id == ent->subvendor_id) ||
428		    (ent->subvendor_id == PCI_ANY_ID)) &&
429
430		    ((pci_subdevice_id == ent->subdevice_id) ||
431		    (ent->subdevice_id == PCI_ANY_ID))) {
432			sprintf(adapter_name, "%s %s",
433				igb_strings[ent->index],
434				igb_driver_version);
435			device_set_desc_copy(dev, adapter_name);
436			return (BUS_PROBE_DEFAULT);
437		}
438		ent++;
439	}
440
441	return (ENXIO);
442}
443
444/*********************************************************************
445 *  Device initialization routine
446 *
447 *  The attach entry point is called when the driver is being loaded.
448 *  This routine identifies the type of hardware, allocates all resources
449 *  and initializes the hardware.
450 *
451 *  return 0 on success, positive on failure
452 *********************************************************************/
453
454static int
455igb_attach(device_t dev)
456{
457	struct adapter	*adapter;
458	int		error = 0;
459	u16		eeprom_data;
460
461	INIT_DEBUGOUT("igb_attach: begin");
462
463	if (resource_disabled("igb", device_get_unit(dev))) {
464		device_printf(dev, "Disabled by device hint\n");
465		return (ENXIO);
466	}
467
468	adapter = device_get_softc(dev);
469	adapter->dev = adapter->osdep.dev = dev;
470	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
471
472	/* SYSCTL stuff */
473	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
476	    igb_sysctl_nvm_info, "I", "NVM Information");
477
478	igb_set_sysctl_value(adapter, "enable_aim",
479	    "Interrupt Moderation", &adapter->enable_aim,
480	    igb_enable_aim);
481
482	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
485	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
486
487	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
488
489	/* Determine hardware and mac info */
490	igb_identify_hardware(adapter);
491
492	/* Setup PCI resources */
493	if (igb_allocate_pci_resources(adapter)) {
494		device_printf(dev, "Allocation of PCI resources failed\n");
495		error = ENXIO;
496		goto err_pci;
497	}
498
499	/* Do Shared Code initialization */
500	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
501		device_printf(dev, "Setup of Shared code failed\n");
502		error = ENXIO;
503		goto err_pci;
504	}
505
506	e1000_get_bus_info(&adapter->hw);
507
508	/* Sysctl for limiting the amount of work done in the taskqueue */
509	igb_set_sysctl_value(adapter, "rx_processing_limit",
510	    "max number of rx packets to process",
511	    &adapter->rx_process_limit, igb_rx_process_limit);
512
513	/*
514	 * Validate number of transmit and receive descriptors. It
515	 * must not exceed hardware maximum, and must be multiple
516	 * of E1000_DBA_ALIGN.
517	 */
518	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
519	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
520		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
521		    IGB_DEFAULT_TXD, igb_txd);
522		adapter->num_tx_desc = IGB_DEFAULT_TXD;
523	} else
524		adapter->num_tx_desc = igb_txd;
525	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
526	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
527		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
528		    IGB_DEFAULT_RXD, igb_rxd);
529		adapter->num_rx_desc = IGB_DEFAULT_RXD;
530	} else
531		adapter->num_rx_desc = igb_rxd;
532
533	adapter->hw.mac.autoneg = DO_AUTO_NEG;
534	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
535	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
536
537	/* Copper options */
538	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
539		adapter->hw.phy.mdix = AUTO_ALL_MODES;
540		adapter->hw.phy.disable_polarity_correction = FALSE;
541		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
542	}
543
544	/*
545	 * Set the frame limits assuming
546	 * standard ethernet sized frames.
547	 */
548	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
549	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
550
551	/*
552	** Allocate and Setup Queues
553	*/
554	if (igb_allocate_queues(adapter)) {
555		error = ENOMEM;
556		goto err_pci;
557	}
558
559	/* Allocate the appropriate stats memory */
560	if (adapter->vf_ifp) {
561		adapter->stats =
562		    (struct e1000_vf_stats *)malloc(sizeof \
563		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
564		igb_vf_init_stats(adapter);
565	} else
566		adapter->stats =
567		    (struct e1000_hw_stats *)malloc(sizeof \
568		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
569	if (adapter->stats == NULL) {
570		device_printf(dev, "Can not allocate stats memory\n");
571		error = ENOMEM;
572		goto err_late;
573	}
574
575	/* Allocate multicast array memory. */
576	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
577	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
578	if (adapter->mta == NULL) {
579		device_printf(dev, "Can not allocate multicast setup array\n");
580		error = ENOMEM;
581		goto err_late;
582	}
583
584	/* Some adapter-specific advanced features */
585	if (adapter->hw.mac.type >= e1000_i350) {
586		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
587		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
588		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
589		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
590		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
591		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
592		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
593		    adapter, 0, igb_sysctl_eee, "I",
594		    "Disable Energy Efficient Ethernet");
595		if (adapter->hw.phy.media_type == e1000_media_type_copper)
596			e1000_set_eee_i350(&adapter->hw);
597	}
598
599	/*
600	** Start from a known state, this is
601	** important in reading the nvm and
602	** mac from that.
603	*/
604	e1000_reset_hw(&adapter->hw);
605
606	/* Make sure we have a good EEPROM before we read from it */
607	if (((adapter->hw.mac.type != e1000_i210) &&
608	    (adapter->hw.mac.type != e1000_i211)) &&
609	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
610		/*
611		** Some PCI-E parts fail the first check due to
612		** the link being in sleep state, call it again,
613		** if it fails a second time its a real issue.
614		*/
615		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
616			device_printf(dev,
617			    "The EEPROM Checksum Is Not Valid\n");
618			error = EIO;
619			goto err_late;
620		}
621	}
622
623	/*
624	** Copy the permanent MAC address out of the EEPROM
625	*/
626	if (e1000_read_mac_addr(&adapter->hw) < 0) {
627		device_printf(dev, "EEPROM read error while reading MAC"
628		    " address\n");
629		error = EIO;
630		goto err_late;
631	}
632	/* Check its sanity */
633	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
634		device_printf(dev, "Invalid MAC address\n");
635		error = EIO;
636		goto err_late;
637	}
638
639	/* Setup OS specific network interface */
640	if (igb_setup_interface(dev, adapter) != 0)
641		goto err_late;
642
643	/* Now get a good starting state */
644	igb_reset(adapter);
645
646	/* Initialize statistics */
647	igb_update_stats_counters(adapter);
648
649	adapter->hw.mac.get_link_status = 1;
650	igb_update_link_status(adapter);
651
652	/* Indicate SOL/IDER usage */
653	if (e1000_check_reset_block(&adapter->hw))
654		device_printf(dev,
655		    "PHY reset is blocked due to SOL/IDER session.\n");
656
657	/* Determine if we have to control management hardware */
658	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
659
660	/*
661	 * Setup Wake-on-Lan
662	 */
663	/* APME bit in EEPROM is mapped to WUC.APME */
664	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
665	if (eeprom_data)
666		adapter->wol = E1000_WUFC_MAG;
667
668	/* Register for VLAN events */
669	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
670	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
671	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
672	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
673
674	igb_add_hw_stats(adapter);
675
676	/* Tell the stack that the interface is not active */
677	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
678	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
679
680	adapter->led_dev = led_create(igb_led_func, adapter,
681	    device_get_nameunit(dev));
682
683	/*
684	** Configure Interrupts
685	*/
686	if ((adapter->msix > 1) && (igb_enable_msix))
687		error = igb_allocate_msix(adapter);
688	else /* MSI or Legacy */
689		error = igb_allocate_legacy(adapter);
690	if (error)
691		goto err_late;
692
693#ifdef DEV_NETMAP
694	igb_netmap_attach(adapter);
695#endif /* DEV_NETMAP */
696	INIT_DEBUGOUT("igb_attach: end");
697
698	return (0);
699
700err_late:
701	igb_detach(dev);
702	igb_free_transmit_structures(adapter);
703	igb_free_receive_structures(adapter);
704	igb_release_hw_control(adapter);
705err_pci:
706	igb_free_pci_resources(adapter);
707	if (adapter->ifp != NULL)
708		if_free(adapter->ifp);
709	free(adapter->mta, M_DEVBUF);
710	IGB_CORE_LOCK_DESTROY(adapter);
711
712	return (error);
713}
714
715/*********************************************************************
716 *  Device removal routine
717 *
718 *  The detach entry point is called when the driver is being removed.
719 *  This routine stops the adapter and deallocates all the resources
720 *  that were allocated for driver operation.
721 *
722 *  return 0 on success, positive on failure
723 *********************************************************************/
724
725static int
726igb_detach(device_t dev)
727{
728	struct adapter	*adapter = device_get_softc(dev);
729	struct ifnet	*ifp = adapter->ifp;
730
731	INIT_DEBUGOUT("igb_detach: begin");
732
733	/* Make sure VLANS are not using driver */
734	if (adapter->ifp->if_vlantrunk != NULL) {
735		device_printf(dev,"Vlan in use, detach first\n");
736		return (EBUSY);
737	}
738
739	ether_ifdetach(adapter->ifp);
740
741	if (adapter->led_dev != NULL)
742		led_destroy(adapter->led_dev);
743
744#ifdef DEVICE_POLLING
745	if (ifp->if_capenable & IFCAP_POLLING)
746		ether_poll_deregister(ifp);
747#endif
748
749	IGB_CORE_LOCK(adapter);
750	adapter->in_detach = 1;
751	igb_stop(adapter);
752	IGB_CORE_UNLOCK(adapter);
753
754	e1000_phy_hw_reset(&adapter->hw);
755
756	/* Give control back to firmware */
757	igb_release_manageability(adapter);
758	igb_release_hw_control(adapter);
759
760	if (adapter->wol) {
761		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
762		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
763		igb_enable_wakeup(dev);
764	}
765
766	/* Unregister VLAN events */
767	if (adapter->vlan_attach != NULL)
768		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
769	if (adapter->vlan_detach != NULL)
770		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
771
772	callout_drain(&adapter->timer);
773
774#ifdef DEV_NETMAP
775	netmap_detach(adapter->ifp);
776#endif /* DEV_NETMAP */
777	igb_free_pci_resources(adapter);
778	bus_generic_detach(dev);
779	if_free(ifp);
780
781	igb_free_transmit_structures(adapter);
782	igb_free_receive_structures(adapter);
783	if (adapter->mta != NULL)
784		free(adapter->mta, M_DEVBUF);
785
786	IGB_CORE_LOCK_DESTROY(adapter);
787
788	return (0);
789}
790
791/*********************************************************************
792 *
793 *  Shutdown entry point
794 *
795 **********************************************************************/
796
797static int
798igb_shutdown(device_t dev)
799{
800	return igb_suspend(dev);
801}
802
803/*
804 * Suspend/resume device methods.
805 */
806static int
807igb_suspend(device_t dev)
808{
809	struct adapter *adapter = device_get_softc(dev);
810
811	IGB_CORE_LOCK(adapter);
812
813	igb_stop(adapter);
814
815        igb_release_manageability(adapter);
816	igb_release_hw_control(adapter);
817
818        if (adapter->wol) {
819                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
820                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
821                igb_enable_wakeup(dev);
822        }
823
824	IGB_CORE_UNLOCK(adapter);
825
826	return bus_generic_suspend(dev);
827}
828
829static int
830igb_resume(device_t dev)
831{
832	struct adapter *adapter = device_get_softc(dev);
833	struct tx_ring	*txr = adapter->tx_rings;
834	struct ifnet *ifp = adapter->ifp;
835
836	IGB_CORE_LOCK(adapter);
837	igb_init_locked(adapter);
838	igb_init_manageability(adapter);
839
840	if ((ifp->if_flags & IFF_UP) &&
841	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
842		for (int i = 0; i < adapter->num_queues; i++, txr++) {
843			IGB_TX_LOCK(txr);
844#if __FreeBSD_version >= 800000
845			/* Process the stack queue only if not depleted */
846			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
847			    !drbr_empty(ifp, txr->br))
848				igb_mq_start_locked(ifp, txr, NULL);
849#else
850			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
851				igb_start_locked(txr, ifp);
852#endif
853			IGB_TX_UNLOCK(txr);
854		}
855	}
856	IGB_CORE_UNLOCK(adapter);
857
858	return bus_generic_resume(dev);
859}
860
861
862#if __FreeBSD_version < 800000
863
864/*********************************************************************
865 *  Transmit entry point
866 *
867 *  igb_start is called by the stack to initiate a transmit.
868 *  The driver will remain in this routine as long as there are
869 *  packets to transmit and transmit resources are available.
870 *  In case resources are not available stack is notified and
871 *  the packet is requeued.
872 **********************************************************************/
873
874static void
875igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
876{
877	struct adapter	*adapter = ifp->if_softc;
878	struct mbuf	*m_head;
879
880	IGB_TX_LOCK_ASSERT(txr);
881
882	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
883	    IFF_DRV_RUNNING)
884		return;
885	if (!adapter->link_active)
886		return;
887
888	/* Call cleanup if number of TX descriptors low */
889	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
890		igb_txeof(txr);
891
892	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
893		if (txr->tx_avail <= IGB_MAX_SCATTER) {
894			txr->queue_status |= IGB_QUEUE_DEPLETED;
895			break;
896		}
897		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
898		if (m_head == NULL)
899			break;
900		/*
901		 *  Encapsulation can modify our pointer, and or make it
902		 *  NULL on failure.  In that event, we can't requeue.
903		 */
904		if (igb_xmit(txr, &m_head)) {
905			if (m_head != NULL)
906				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
907			if (txr->tx_avail <= IGB_MAX_SCATTER)
908				txr->queue_status |= IGB_QUEUE_DEPLETED;
909			break;
910		}
911
912		/* Send a copy of the frame to the BPF listener */
913		ETHER_BPF_MTAP(ifp, m_head);
914
915		/* Set watchdog on */
916		txr->watchdog_time = ticks;
917		txr->queue_status |= IGB_QUEUE_WORKING;
918	}
919}
920
921/*
922 * Legacy TX driver routine, called from the
923 * stack, always uses tx[0], and spins for it.
924 * Should not be used with multiqueue tx
925 */
926static void
927igb_start(struct ifnet *ifp)
928{
929	struct adapter	*adapter = ifp->if_softc;
930	struct tx_ring	*txr = adapter->tx_rings;
931
932	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
933		IGB_TX_LOCK(txr);
934		igb_start_locked(txr, ifp);
935		IGB_TX_UNLOCK(txr);
936	}
937	return;
938}
939
940#else /* __FreeBSD_version >= 800000 */
941
942/*
943** Multiqueue Transmit driver
944**
945*/
946static int
947igb_mq_start(struct ifnet *ifp, struct mbuf *m)
948{
949	struct adapter		*adapter = ifp->if_softc;
950	struct igb_queue	*que;
951	struct tx_ring		*txr;
952	int 			i, err = 0;
953
954	/* Which queue to use */
955	if ((m->m_flags & M_FLOWID) != 0)
956		i = m->m_pkthdr.flowid % adapter->num_queues;
957	else
958		i = curcpu % adapter->num_queues;
959
960	txr = &adapter->tx_rings[i];
961	que = &adapter->queues[i];
962	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
963	    IGB_TX_TRYLOCK(txr)) {
964		struct mbuf *pm = NULL;
965		/*
966		** Try to queue first to avoid
967		** out-of-order delivery, but
968		** settle for it if that fails
969		*/
970		if (m && drbr_enqueue(ifp, txr->br, m))
971			pm = m;
972		err = igb_mq_start_locked(ifp, txr, pm);
973		IGB_TX_UNLOCK(txr);
974	} else {
975		err = drbr_enqueue(ifp, txr->br, m);
976		taskqueue_enqueue(que->tq, &txr->txq_task);
977	}
978
979	return (err);
980}
981
982static int
983igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
984{
985	struct adapter  *adapter = txr->adapter;
986        struct mbuf     *next;
987        int             err = 0, enq;
988
989	IGB_TX_LOCK_ASSERT(txr);
990
991	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
992	    (txr->queue_status & IGB_QUEUE_DEPLETED) ||
993	    adapter->link_active == 0) {
994		if (m != NULL)
995			err = drbr_enqueue(ifp, txr->br, m);
996		return (err);
997	}
998
999	enq = 0;
1000	if (m == NULL) {
1001		next = drbr_dequeue(ifp, txr->br);
1002	} else if (drbr_needs_enqueue(ifp, txr->br)) {
1003		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
1004			return (err);
1005		next = drbr_dequeue(ifp, txr->br);
1006	} else
1007		next = m;
1008
1009	/* Process the queue */
1010	while (next != NULL) {
1011		if ((err = igb_xmit(txr, &next)) != 0) {
1012			if (next != NULL)
1013				err = drbr_enqueue(ifp, txr->br, next);
1014			break;
1015		}
1016		enq++;
1017		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
1018		ETHER_BPF_MTAP(ifp, next);
1019		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1020			break;
1021		next = drbr_dequeue(ifp, txr->br);
1022	}
1023	if (enq > 0) {
1024		/* Set the watchdog */
1025		txr->queue_status |= IGB_QUEUE_WORKING;
1026		txr->watchdog_time = ticks;
1027	}
1028	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1029		igb_txeof(txr);
1030	if (txr->tx_avail <= IGB_MAX_SCATTER)
1031		txr->queue_status |= IGB_QUEUE_DEPLETED;
1032	return (err);
1033}
1034
1035/*
1036 * Called from a taskqueue to drain queued transmit packets.
1037 */
1038static void
1039igb_deferred_mq_start(void *arg, int pending)
1040{
1041	struct tx_ring *txr = arg;
1042	struct adapter *adapter = txr->adapter;
1043	struct ifnet *ifp = adapter->ifp;
1044
1045	IGB_TX_LOCK(txr);
1046	if (!drbr_empty(ifp, txr->br))
1047		igb_mq_start_locked(ifp, txr, NULL);
1048	IGB_TX_UNLOCK(txr);
1049}
1050
1051/*
1052** Flush all ring buffers
1053*/
1054static void
1055igb_qflush(struct ifnet *ifp)
1056{
1057	struct adapter	*adapter = ifp->if_softc;
1058	struct tx_ring	*txr = adapter->tx_rings;
1059	struct mbuf	*m;
1060
1061	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1062		IGB_TX_LOCK(txr);
1063		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1064			m_freem(m);
1065		IGB_TX_UNLOCK(txr);
1066	}
1067	if_qflush(ifp);
1068}
1069#endif /* __FreeBSD_version >= 800000 */
1070
1071/*********************************************************************
1072 *  Ioctl entry point
1073 *
1074 *  igb_ioctl is called when the user wants to configure the
1075 *  interface.
1076 *
1077 *  return 0 on success, positive on failure
1078 **********************************************************************/
1079
1080static int
1081igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1082{
1083	struct adapter	*adapter = ifp->if_softc;
1084	struct ifreq	*ifr = (struct ifreq *)data;
1085#if defined(INET) || defined(INET6)
1086	struct ifaddr	*ifa = (struct ifaddr *)data;
1087#endif
1088	bool		avoid_reset = FALSE;
1089	int		error = 0;
1090
1091	if (adapter->in_detach)
1092		return (error);
1093
1094	switch (command) {
1095	case SIOCSIFADDR:
1096#ifdef INET
1097		if (ifa->ifa_addr->sa_family == AF_INET)
1098			avoid_reset = TRUE;
1099#endif
1100#ifdef INET6
1101		if (ifa->ifa_addr->sa_family == AF_INET6)
1102			avoid_reset = TRUE;
1103#endif
1104		/*
1105		** Calling init results in link renegotiation,
1106		** so we avoid doing it when possible.
1107		*/
1108		if (avoid_reset) {
1109			ifp->if_flags |= IFF_UP;
1110			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1111				igb_init(adapter);
1112#ifdef INET
1113			if (!(ifp->if_flags & IFF_NOARP))
1114				arp_ifinit(ifp, ifa);
1115#endif
1116		} else
1117			error = ether_ioctl(ifp, command, data);
1118		break;
1119	case SIOCSIFMTU:
1120	    {
1121		int max_frame_size;
1122
1123		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1124
1125		IGB_CORE_LOCK(adapter);
1126		max_frame_size = 9234;
1127		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1128		    ETHER_CRC_LEN) {
1129			IGB_CORE_UNLOCK(adapter);
1130			error = EINVAL;
1131			break;
1132		}
1133
1134		ifp->if_mtu = ifr->ifr_mtu;
1135		adapter->max_frame_size =
1136		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1137		igb_init_locked(adapter);
1138		IGB_CORE_UNLOCK(adapter);
1139		break;
1140	    }
1141	case SIOCSIFFLAGS:
1142		IOCTL_DEBUGOUT("ioctl rcv'd:\
1143		    SIOCSIFFLAGS (Set Interface Flags)");
1144		IGB_CORE_LOCK(adapter);
1145		if (ifp->if_flags & IFF_UP) {
1146			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1147				if ((ifp->if_flags ^ adapter->if_flags) &
1148				    (IFF_PROMISC | IFF_ALLMULTI)) {
1149					igb_disable_promisc(adapter);
1150					igb_set_promisc(adapter);
1151				}
1152			} else
1153				igb_init_locked(adapter);
1154		} else
1155			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1156				igb_stop(adapter);
1157		adapter->if_flags = ifp->if_flags;
1158		IGB_CORE_UNLOCK(adapter);
1159		break;
1160	case SIOCADDMULTI:
1161	case SIOCDELMULTI:
1162		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1163		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1164			IGB_CORE_LOCK(adapter);
1165			igb_disable_intr(adapter);
1166			igb_set_multi(adapter);
1167#ifdef DEVICE_POLLING
1168			if (!(ifp->if_capenable & IFCAP_POLLING))
1169#endif
1170				igb_enable_intr(adapter);
1171			IGB_CORE_UNLOCK(adapter);
1172		}
1173		break;
1174	case SIOCSIFMEDIA:
1175		/* Check SOL/IDER usage */
1176		IGB_CORE_LOCK(adapter);
1177		if (e1000_check_reset_block(&adapter->hw)) {
1178			IGB_CORE_UNLOCK(adapter);
1179			device_printf(adapter->dev, "Media change is"
1180			    " blocked due to SOL/IDER session.\n");
1181			break;
1182		}
1183		IGB_CORE_UNLOCK(adapter);
1184	case SIOCGIFMEDIA:
1185		IOCTL_DEBUGOUT("ioctl rcv'd: \
1186		    SIOCxIFMEDIA (Get/Set Interface Media)");
1187		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1188		break;
1189	case SIOCSIFCAP:
1190	    {
1191		int mask, reinit;
1192
1193		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1194		reinit = 0;
1195		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1196#ifdef DEVICE_POLLING
1197		if (mask & IFCAP_POLLING) {
1198			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1199				error = ether_poll_register(igb_poll, ifp);
1200				if (error)
1201					return (error);
1202				IGB_CORE_LOCK(adapter);
1203				igb_disable_intr(adapter);
1204				ifp->if_capenable |= IFCAP_POLLING;
1205				IGB_CORE_UNLOCK(adapter);
1206			} else {
1207				error = ether_poll_deregister(ifp);
1208				/* Enable interrupt even in error case */
1209				IGB_CORE_LOCK(adapter);
1210				igb_enable_intr(adapter);
1211				ifp->if_capenable &= ~IFCAP_POLLING;
1212				IGB_CORE_UNLOCK(adapter);
1213			}
1214		}
1215#endif
1216		if (mask & IFCAP_HWCSUM) {
1217			ifp->if_capenable ^= IFCAP_HWCSUM;
1218			reinit = 1;
1219		}
1220		if (mask & IFCAP_TSO4) {
1221			ifp->if_capenable ^= IFCAP_TSO4;
1222			reinit = 1;
1223		}
1224		if (mask & IFCAP_VLAN_HWTAGGING) {
1225			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1226			reinit = 1;
1227		}
1228		if (mask & IFCAP_VLAN_HWFILTER) {
1229			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1230			reinit = 1;
1231		}
1232		if (mask & IFCAP_VLAN_HWTSO) {
1233			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1234			reinit = 1;
1235		}
1236		if (mask & IFCAP_LRO) {
1237			ifp->if_capenable ^= IFCAP_LRO;
1238			reinit = 1;
1239		}
1240		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1241			igb_init(adapter);
1242		VLAN_CAPABILITIES(ifp);
1243		break;
1244	    }
1245
1246	default:
1247		error = ether_ioctl(ifp, command, data);
1248		break;
1249	}
1250
1251	return (error);
1252}
1253
1254
1255/*********************************************************************
1256 *  Init entry point
1257 *
1258 *  This routine is used in two ways. It is used by the stack as
1259 *  init entry point in network interface structure. It is also used
1260 *  by the driver as a hw/sw initialization routine to get to a
1261 *  consistent state.
1262 *
1263 *  return 0 on success, positive on failure
1264 **********************************************************************/
1265
1266static void
1267igb_init_locked(struct adapter *adapter)
1268{
1269	struct ifnet	*ifp = adapter->ifp;
1270	device_t	dev = adapter->dev;
1271
1272	INIT_DEBUGOUT("igb_init: begin");
1273
1274	IGB_CORE_LOCK_ASSERT(adapter);
1275
1276	igb_disable_intr(adapter);
1277	callout_stop(&adapter->timer);
1278
1279	/* Get the latest mac address, User can use a LAA */
1280        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1281              ETHER_ADDR_LEN);
1282
1283	/* Put the address into the Receive Address Array */
1284	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1285
1286	igb_reset(adapter);
1287	igb_update_link_status(adapter);
1288
1289	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1290
1291	/* Set hardware offload abilities */
1292	ifp->if_hwassist = 0;
1293	if (ifp->if_capenable & IFCAP_TXCSUM) {
1294		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1295#if __FreeBSD_version >= 800000
1296		if (adapter->hw.mac.type == e1000_82576)
1297			ifp->if_hwassist |= CSUM_SCTP;
1298#endif
1299	}
1300
1301	if (ifp->if_capenable & IFCAP_TSO4)
1302		ifp->if_hwassist |= CSUM_TSO;
1303
1304	/* Configure for OS presence */
1305	igb_init_manageability(adapter);
1306
1307	/* Prepare transmit descriptors and buffers */
1308	igb_setup_transmit_structures(adapter);
1309	igb_initialize_transmit_units(adapter);
1310
1311	/* Setup Multicast table */
1312	igb_set_multi(adapter);
1313
1314	/*
1315	** Figure out the desired mbuf pool
1316	** for doing jumbo/packetsplit
1317	*/
1318	if (adapter->max_frame_size <= 2048)
1319		adapter->rx_mbuf_sz = MCLBYTES;
1320	else if (adapter->max_frame_size <= 4096)
1321		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1322	else
1323		adapter->rx_mbuf_sz = MJUM9BYTES;
1324
1325	/* Prepare receive descriptors and buffers */
1326	if (igb_setup_receive_structures(adapter)) {
1327		device_printf(dev, "Could not setup receive structures\n");
1328		return;
1329	}
1330	igb_initialize_receive_units(adapter);
1331
1332        /* Enable VLAN support */
1333	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1334		igb_setup_vlan_hw_support(adapter);
1335
1336	/* Don't lose promiscuous settings */
1337	igb_set_promisc(adapter);
1338
1339	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1340	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1341
1342	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1343	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1344
1345	if (adapter->msix > 1) /* Set up queue routing */
1346		igb_configure_queues(adapter);
1347
1348	/* this clears any pending interrupts */
1349	E1000_READ_REG(&adapter->hw, E1000_ICR);
1350#ifdef DEVICE_POLLING
1351	/*
1352	 * Only enable interrupts if we are not polling, make sure
1353	 * they are off otherwise.
1354	 */
1355	if (ifp->if_capenable & IFCAP_POLLING)
1356		igb_disable_intr(adapter);
1357	else
1358#endif /* DEVICE_POLLING */
1359	{
1360		igb_enable_intr(adapter);
1361		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1362	}
1363
1364	/* Set Energy Efficient Ethernet */
1365	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1366		e1000_set_eee_i350(&adapter->hw);
1367}
1368
1369static void
1370igb_init(void *arg)
1371{
1372	struct adapter *adapter = arg;
1373
1374	IGB_CORE_LOCK(adapter);
1375	igb_init_locked(adapter);
1376	IGB_CORE_UNLOCK(adapter);
1377}
1378
1379
1380static void
1381igb_handle_que(void *context, int pending)
1382{
1383	struct igb_queue *que = context;
1384	struct adapter *adapter = que->adapter;
1385	struct tx_ring *txr = que->txr;
1386	struct ifnet	*ifp = adapter->ifp;
1387
1388	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1389		bool	more;
1390
1391		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1392
1393		IGB_TX_LOCK(txr);
1394		igb_txeof(txr);
1395#if __FreeBSD_version >= 800000
1396		/* Process the stack queue only if not depleted */
1397		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1398		    !drbr_empty(ifp, txr->br))
1399			igb_mq_start_locked(ifp, txr, NULL);
1400#else
1401		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1402			igb_start_locked(txr, ifp);
1403#endif
1404		IGB_TX_UNLOCK(txr);
1405		/* Do we need another? */
1406		if (more) {
1407			taskqueue_enqueue(que->tq, &que->que_task);
1408			return;
1409		}
1410	}
1411
1412#ifdef DEVICE_POLLING
1413	if (ifp->if_capenable & IFCAP_POLLING)
1414		return;
1415#endif
1416	/* Reenable this interrupt */
1417	if (que->eims)
1418		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1419	else
1420		igb_enable_intr(adapter);
1421}
1422
1423/* Deal with link in a sleepable context */
1424static void
1425igb_handle_link(void *context, int pending)
1426{
1427	struct adapter *adapter = context;
1428
1429	IGB_CORE_LOCK(adapter);
1430	igb_handle_link_locked(adapter);
1431	IGB_CORE_UNLOCK(adapter);
1432}
1433
1434static void
1435igb_handle_link_locked(struct adapter *adapter)
1436{
1437	struct tx_ring	*txr = adapter->tx_rings;
1438	struct ifnet *ifp = adapter->ifp;
1439
1440	IGB_CORE_LOCK_ASSERT(adapter);
1441	adapter->hw.mac.get_link_status = 1;
1442	igb_update_link_status(adapter);
1443	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1444		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1445			IGB_TX_LOCK(txr);
1446#if __FreeBSD_version >= 800000
1447			/* Process the stack queue only if not depleted */
1448			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1449			    !drbr_empty(ifp, txr->br))
1450				igb_mq_start_locked(ifp, txr, NULL);
1451#else
1452			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1453				igb_start_locked(txr, ifp);
1454#endif
1455			IGB_TX_UNLOCK(txr);
1456		}
1457	}
1458}
1459
1460/*********************************************************************
1461 *
1462 *  MSI/Legacy Deferred
1463 *  Interrupt Service routine
1464 *
1465 *********************************************************************/
1466static int
1467igb_irq_fast(void *arg)
1468{
1469	struct adapter		*adapter = arg;
1470	struct igb_queue	*que = adapter->queues;
1471	u32			reg_icr;
1472
1473
1474	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1475
1476	/* Hot eject?  */
1477	if (reg_icr == 0xffffffff)
1478		return FILTER_STRAY;
1479
1480	/* Definitely not our interrupt.  */
1481	if (reg_icr == 0x0)
1482		return FILTER_STRAY;
1483
1484	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1485		return FILTER_STRAY;
1486
1487	/*
1488	 * Mask interrupts until the taskqueue is finished running.  This is
1489	 * cheap, just assume that it is needed.  This also works around the
1490	 * MSI message reordering errata on certain systems.
1491	 */
1492	igb_disable_intr(adapter);
1493	taskqueue_enqueue(que->tq, &que->que_task);
1494
1495	/* Link status change */
1496	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1497		taskqueue_enqueue(que->tq, &adapter->link_task);
1498
1499	if (reg_icr & E1000_ICR_RXO)
1500		adapter->rx_overruns++;
1501	return FILTER_HANDLED;
1502}
1503
1504#ifdef DEVICE_POLLING
1505#if __FreeBSD_version >= 800000
1506#define POLL_RETURN_COUNT(a) (a)
1507static int
1508#else
1509#define POLL_RETURN_COUNT(a)
1510static void
1511#endif
1512igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1513{
1514	struct adapter		*adapter = ifp->if_softc;
1515	struct igb_queue	*que;
1516	struct tx_ring		*txr;
1517	u32			reg_icr, rx_done = 0;
1518	u32			loop = IGB_MAX_LOOP;
1519	bool			more;
1520
1521	IGB_CORE_LOCK(adapter);
1522	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1523		IGB_CORE_UNLOCK(adapter);
1524		return POLL_RETURN_COUNT(rx_done);
1525	}
1526
1527	if (cmd == POLL_AND_CHECK_STATUS) {
1528		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1529		/* Link status change */
1530		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1531			igb_handle_link_locked(adapter);
1532
1533		if (reg_icr & E1000_ICR_RXO)
1534			adapter->rx_overruns++;
1535	}
1536	IGB_CORE_UNLOCK(adapter);
1537
1538	for (int i = 0; i < adapter->num_queues; i++) {
1539		que = &adapter->queues[i];
1540		txr = que->txr;
1541
1542		igb_rxeof(que, count, &rx_done);
1543
1544		IGB_TX_LOCK(txr);
1545		do {
1546			more = igb_txeof(txr);
1547		} while (loop-- && more);
1548#if __FreeBSD_version >= 800000
1549		if (!drbr_empty(ifp, txr->br))
1550			igb_mq_start_locked(ifp, txr, NULL);
1551#else
1552		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1553			igb_start_locked(txr, ifp);
1554#endif
1555		IGB_TX_UNLOCK(txr);
1556	}
1557
1558	return POLL_RETURN_COUNT(rx_done);
1559}
1560#endif /* DEVICE_POLLING */
1561
1562/*********************************************************************
1563 *
1564 *  MSIX Que Interrupt Service routine
1565 *
1566 **********************************************************************/
1567static void
1568igb_msix_que(void *arg)
1569{
1570	struct igb_queue *que = arg;
1571	struct adapter *adapter = que->adapter;
1572	struct ifnet   *ifp = adapter->ifp;
1573	struct tx_ring *txr = que->txr;
1574	struct rx_ring *rxr = que->rxr;
1575	u32		newitr = 0;
1576	bool		more_rx;
1577
1578	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1579	++que->irqs;
1580
1581	IGB_TX_LOCK(txr);
1582	igb_txeof(txr);
1583#if __FreeBSD_version >= 800000
1584	/* Process the stack queue only if not depleted */
1585	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1586	    !drbr_empty(ifp, txr->br))
1587		igb_mq_start_locked(ifp, txr, NULL);
1588#else
1589	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1590		igb_start_locked(txr, ifp);
1591#endif
1592	IGB_TX_UNLOCK(txr);
1593
1594	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1595
1596	if (adapter->enable_aim == FALSE)
1597		goto no_calc;
1598	/*
1599	** Do Adaptive Interrupt Moderation:
1600        **  - Write out last calculated setting
1601	**  - Calculate based on average size over
1602	**    the last interval.
1603	*/
1604        if (que->eitr_setting)
1605                E1000_WRITE_REG(&adapter->hw,
1606                    E1000_EITR(que->msix), que->eitr_setting);
1607
1608        que->eitr_setting = 0;
1609
1610        /* Idle, do nothing */
1611        if ((txr->bytes == 0) && (rxr->bytes == 0))
1612                goto no_calc;
1613
1614        /* Used half Default if sub-gig */
1615        if (adapter->link_speed != 1000)
1616                newitr = IGB_DEFAULT_ITR / 2;
1617        else {
1618		if ((txr->bytes) && (txr->packets))
1619                	newitr = txr->bytes/txr->packets;
1620		if ((rxr->bytes) && (rxr->packets))
1621			newitr = max(newitr,
1622			    (rxr->bytes / rxr->packets));
1623                newitr += 24; /* account for hardware frame, crc */
1624		/* set an upper boundary */
1625		newitr = min(newitr, 3000);
1626		/* Be nice to the mid range */
1627                if ((newitr > 300) && (newitr < 1200))
1628                        newitr = (newitr / 3);
1629                else
1630                        newitr = (newitr / 2);
1631        }
1632        newitr &= 0x7FFC;  /* Mask invalid bits */
1633        if (adapter->hw.mac.type == e1000_82575)
1634                newitr |= newitr << 16;
1635        else
1636                newitr |= E1000_EITR_CNT_IGNR;
1637
1638        /* save for next interrupt */
1639        que->eitr_setting = newitr;
1640
1641        /* Reset state */
1642        txr->bytes = 0;
1643        txr->packets = 0;
1644        rxr->bytes = 0;
1645        rxr->packets = 0;
1646
1647no_calc:
1648	/* Schedule a clean task if needed*/
1649	if (more_rx)
1650		taskqueue_enqueue(que->tq, &que->que_task);
1651	else
1652		/* Reenable this interrupt */
1653		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1654	return;
1655}
1656
1657
1658/*********************************************************************
1659 *
1660 *  MSIX Link Interrupt Service routine
1661 *
1662 **********************************************************************/
1663
1664static void
1665igb_msix_link(void *arg)
1666{
1667	struct adapter	*adapter = arg;
1668	u32       	icr;
1669
1670	++adapter->link_irq;
1671	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1672	if (!(icr & E1000_ICR_LSC))
1673		goto spurious;
1674	igb_handle_link(adapter, 0);
1675
1676spurious:
1677	/* Rearm */
1678	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1679	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1680	return;
1681}
1682
1683
1684/*********************************************************************
1685 *
1686 *  Media Ioctl callback
1687 *
1688 *  This routine is called whenever the user queries the status of
1689 *  the interface using ifconfig.
1690 *
1691 **********************************************************************/
1692static void
1693igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1694{
1695	struct adapter *adapter = ifp->if_softc;
1696	u_char fiber_type = IFM_1000_SX;
1697
1698	INIT_DEBUGOUT("igb_media_status: begin");
1699
1700	IGB_CORE_LOCK(adapter);
1701	igb_update_link_status(adapter);
1702
1703	ifmr->ifm_status = IFM_AVALID;
1704	ifmr->ifm_active = IFM_ETHER;
1705
1706	if (!adapter->link_active) {
1707		IGB_CORE_UNLOCK(adapter);
1708		return;
1709	}
1710
1711	ifmr->ifm_status |= IFM_ACTIVE;
1712
1713	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1714	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1715		ifmr->ifm_active |= fiber_type | IFM_FDX;
1716	else {
1717		switch (adapter->link_speed) {
1718		case 10:
1719			ifmr->ifm_active |= IFM_10_T;
1720			break;
1721		case 100:
1722			ifmr->ifm_active |= IFM_100_TX;
1723			break;
1724		case 1000:
1725			ifmr->ifm_active |= IFM_1000_T;
1726			break;
1727		}
1728		if (adapter->link_duplex == FULL_DUPLEX)
1729			ifmr->ifm_active |= IFM_FDX;
1730		else
1731			ifmr->ifm_active |= IFM_HDX;
1732	}
1733	IGB_CORE_UNLOCK(adapter);
1734}
1735
1736/*********************************************************************
1737 *
1738 *  Media Ioctl callback
1739 *
1740 *  This routine is called when the user changes speed/duplex using
1741 *  media/mediopt option with ifconfig.
1742 *
1743 **********************************************************************/
1744static int
1745igb_media_change(struct ifnet *ifp)
1746{
1747	struct adapter *adapter = ifp->if_softc;
1748	struct ifmedia  *ifm = &adapter->media;
1749
1750	INIT_DEBUGOUT("igb_media_change: begin");
1751
1752	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1753		return (EINVAL);
1754
1755	IGB_CORE_LOCK(adapter);
1756	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1757	case IFM_AUTO:
1758		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1760		break;
1761	case IFM_1000_LX:
1762	case IFM_1000_SX:
1763	case IFM_1000_T:
1764		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1765		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1766		break;
1767	case IFM_100_TX:
1768		adapter->hw.mac.autoneg = FALSE;
1769		adapter->hw.phy.autoneg_advertised = 0;
1770		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1772		else
1773			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1774		break;
1775	case IFM_10_T:
1776		adapter->hw.mac.autoneg = FALSE;
1777		adapter->hw.phy.autoneg_advertised = 0;
1778		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1780		else
1781			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1782		break;
1783	default:
1784		device_printf(adapter->dev, "Unsupported media type\n");
1785	}
1786
1787	igb_init_locked(adapter);
1788	IGB_CORE_UNLOCK(adapter);
1789
1790	return (0);
1791}
1792
1793
1794/*********************************************************************
1795 *
1796 *  This routine maps the mbufs to Advanced TX descriptors.
1797 *
1798 **********************************************************************/
1799static int
1800igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1801{
1802	struct adapter		*adapter = txr->adapter;
1803	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1804	bus_dmamap_t		map;
1805	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1806	union e1000_adv_tx_desc	*txd = NULL;
1807	struct mbuf		*m_head = *m_headp;
1808	struct ether_vlan_header *eh = NULL;
1809	struct ip		*ip = NULL;
1810	struct tcphdr		*th = NULL;
1811	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1812	int			ehdrlen, poff;
1813	int			nsegs, i, first, last = 0;
1814	int			error, do_tso, remap = 1;
1815
1816	/* Set basic descriptor constants */
1817	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1818	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1819	if (m_head->m_flags & M_VLANTAG)
1820		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1821
1822retry:
1823	m_head = *m_headp;
1824	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1825	hdrlen = ehdrlen = poff = 0;
1826
1827	/*
1828	 * Intel recommends entire IP/TCP header length reside in a single
1829	 * buffer. If multiple descriptors are used to describe the IP and
1830	 * TCP header, each descriptor should describe one or more
1831	 * complete headers; descriptors referencing only parts of headers
1832	 * are not supported. If all layer headers are not coalesced into
1833	 * a single buffer, each buffer should not cross a 4KB boundary,
1834	 * or be larger than the maximum read request size.
1835	 * Controller also requires modifing IP/TCP header to make TSO work
1836	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1837	 * IP/TCP header into a single buffer to meet the requirement of
1838	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1839	 * which also has similiar restrictions.
1840	 */
1841	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1842		if (do_tso || (m_head->m_next != NULL &&
1843		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1844			if (M_WRITABLE(*m_headp) == 0) {
1845				m_head = m_dup(*m_headp, M_DONTWAIT);
1846				m_freem(*m_headp);
1847				if (m_head == NULL) {
1848					*m_headp = NULL;
1849					return (ENOBUFS);
1850				}
1851				*m_headp = m_head;
1852			}
1853		}
1854		/*
1855		 * Assume IPv4, we don't have TSO/checksum offload support
1856		 * for IPv6 yet.
1857		 */
1858		ehdrlen = sizeof(struct ether_header);
1859		m_head = m_pullup(m_head, ehdrlen);
1860		if (m_head == NULL) {
1861			*m_headp = NULL;
1862			return (ENOBUFS);
1863		}
1864		eh = mtod(m_head, struct ether_vlan_header *);
1865		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1866			ehdrlen = sizeof(struct ether_vlan_header);
1867			m_head = m_pullup(m_head, ehdrlen);
1868			if (m_head == NULL) {
1869				*m_headp = NULL;
1870				return (ENOBUFS);
1871			}
1872		}
1873		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1874		if (m_head == NULL) {
1875			*m_headp = NULL;
1876			return (ENOBUFS);
1877		}
1878		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1879		poff = ehdrlen + (ip->ip_hl << 2);
1880		if (do_tso) {
1881			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1882			if (m_head == NULL) {
1883				*m_headp = NULL;
1884				return (ENOBUFS);
1885			}
1886			/*
1887			 * The pseudo TCP checksum does not include TCP payload
1888			 * length so driver should recompute the checksum here
1889			 * what hardware expect to see. This is adherence of
1890			 * Microsoft's Large Send specification.
1891			 */
1892			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1894			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1895			/* Keep track of the full header length */
1896			hdrlen = poff + (th->th_off << 2);
1897		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1898			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1899			if (m_head == NULL) {
1900				*m_headp = NULL;
1901				return (ENOBUFS);
1902			}
1903			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1905			if (m_head == NULL) {
1906				*m_headp = NULL;
1907				return (ENOBUFS);
1908			}
1909			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1910			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1912			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1913			if (m_head == NULL) {
1914				*m_headp = NULL;
1915				return (ENOBUFS);
1916			}
1917			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1918		}
1919		*m_headp = m_head;
1920	}
1921
1922	/*
1923	 * Map the packet for DMA
1924	 *
1925	 * Capture the first descriptor index,
1926	 * this descriptor will have the index
1927	 * of the EOP which is the only one that
1928	 * now gets a DONE bit writeback.
1929	 */
1930	first = txr->next_avail_desc;
1931	tx_buffer = &txr->tx_buffers[first];
1932	tx_buffer_mapped = tx_buffer;
1933	map = tx_buffer->map;
1934
1935	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1936	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1937
1938	/*
1939	 * There are two types of errors we can (try) to handle:
1940	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1941	 *   out of segments.  Defragment the mbuf chain and try again.
1942	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1943	 *   at this point in time.  Defer sending and try again later.
1944	 * All other errors, in particular EINVAL, are fatal and prevent the
1945	 * mbuf chain from ever going through.  Drop it and report error.
1946	 */
1947	if (error == EFBIG && remap) {
1948		struct mbuf *m;
1949
1950		m = m_defrag(*m_headp, M_DONTWAIT);
1951		if (m == NULL) {
1952			adapter->mbuf_defrag_failed++;
1953			m_freem(*m_headp);
1954			*m_headp = NULL;
1955			return (ENOBUFS);
1956		}
1957		*m_headp = m;
1958
1959		/* Try it again, but only once */
1960		remap = 0;
1961		goto retry;
1962	} else if (error == ENOMEM) {
1963		adapter->no_tx_dma_setup++;
1964		return (error);
1965	} else if (error != 0) {
1966		adapter->no_tx_dma_setup++;
1967		m_freem(*m_headp);
1968		*m_headp = NULL;
1969		return (error);
1970	}
1971
1972	/*
1973	** Make sure we don't overrun the ring,
1974	** we need nsegs descriptors and one for
1975	** the context descriptor used for the
1976	** offloads.
1977	*/
1978        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1979                txr->no_desc_avail++;
1980		bus_dmamap_unload(txr->txtag, map);
1981		return (ENOBUFS);
1982        }
1983	m_head = *m_headp;
1984
1985	/* Do hardware assists:
1986         * Set up the context descriptor, used
1987         * when any hardware offload is done.
1988         * This includes CSUM, VLAN, and TSO.
1989         * It will use the first descriptor.
1990         */
1991
1992	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1993		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1994			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1995			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1996			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1997		} else
1998			return (ENXIO);
1999	} else if (igb_tx_ctx_setup(txr, m_head))
2000			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2001
2002	/* Calculate payload length */
2003	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2004	    << E1000_ADVTXD_PAYLEN_SHIFT);
2005
2006	/* 82575 needs the queue index added */
2007	if (adapter->hw.mac.type == e1000_82575)
2008		olinfo_status |= txr->me << 4;
2009
2010	/* Set up our transmit descriptors */
2011	i = txr->next_avail_desc;
2012	for (int j = 0; j < nsegs; j++) {
2013		bus_size_t seg_len;
2014		bus_addr_t seg_addr;
2015
2016		tx_buffer = &txr->tx_buffers[i];
2017		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2018		seg_addr = segs[j].ds_addr;
2019		seg_len  = segs[j].ds_len;
2020
2021		txd->read.buffer_addr = htole64(seg_addr);
2022		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2023		txd->read.olinfo_status = htole32(olinfo_status);
2024		last = i;
2025		if (++i == adapter->num_tx_desc)
2026			i = 0;
2027		tx_buffer->m_head = NULL;
2028		tx_buffer->next_eop = -1;
2029	}
2030
2031	txr->next_avail_desc = i;
2032	txr->tx_avail -= nsegs;
2033        tx_buffer->m_head = m_head;
2034
2035	/*
2036	** Here we swap the map so the last descriptor,
2037	** which gets the completion interrupt has the
2038	** real map, and the first descriptor gets the
2039	** unused map from this descriptor.
2040	*/
2041	tx_buffer_mapped->map = tx_buffer->map;
2042	tx_buffer->map = map;
2043        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2044
2045        /*
2046         * Last Descriptor of Packet
2047	 * needs End Of Packet (EOP)
2048	 * and Report Status (RS)
2049         */
2050        txd->read.cmd_type_len |=
2051	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2052	/*
2053	 * Keep track in the first buffer which
2054	 * descriptor will be written back
2055	 */
2056	tx_buffer = &txr->tx_buffers[first];
2057	tx_buffer->next_eop = last;
2058	/* Update the watchdog time early and often */
2059	txr->watchdog_time = ticks;
2060
2061	/*
2062	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2063	 * that this frame is available to transmit.
2064	 */
2065	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2066	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2067	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2068	++txr->tx_packets;
2069
2070	return (0);
2071}
2072static void
2073igb_set_promisc(struct adapter *adapter)
2074{
2075	struct ifnet	*ifp = adapter->ifp;
2076	struct e1000_hw *hw = &adapter->hw;
2077	u32		reg;
2078
2079	if (adapter->vf_ifp) {
2080		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2081		return;
2082	}
2083
2084	reg = E1000_READ_REG(hw, E1000_RCTL);
2085	if (ifp->if_flags & IFF_PROMISC) {
2086		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2087		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2088	} else if (ifp->if_flags & IFF_ALLMULTI) {
2089		reg |= E1000_RCTL_MPE;
2090		reg &= ~E1000_RCTL_UPE;
2091		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2092	}
2093}
2094
2095static void
2096igb_disable_promisc(struct adapter *adapter)
2097{
2098	struct e1000_hw *hw = &adapter->hw;
2099	u32		reg;
2100
2101	if (adapter->vf_ifp) {
2102		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2103		return;
2104	}
2105	reg = E1000_READ_REG(hw, E1000_RCTL);
2106	reg &=  (~E1000_RCTL_UPE);
2107	reg &=  (~E1000_RCTL_MPE);
2108	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2109}
2110
2111
2112/*********************************************************************
2113 *  Multicast Update
2114 *
2115 *  This routine is called whenever multicast address list is updated.
2116 *
2117 **********************************************************************/
2118
2119static void
2120igb_set_multi(struct adapter *adapter)
2121{
2122	struct ifnet	*ifp = adapter->ifp;
2123	struct ifmultiaddr *ifma;
2124	u32 reg_rctl = 0;
2125	u8  *mta;
2126
2127	int mcnt = 0;
2128
2129	IOCTL_DEBUGOUT("igb_set_multi: begin");
2130
2131	mta = adapter->mta;
2132	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2133	    MAX_NUM_MULTICAST_ADDRESSES);
2134
2135#if __FreeBSD_version < 800000
2136	IF_ADDR_LOCK(ifp);
2137#else
2138	if_maddr_rlock(ifp);
2139#endif
2140	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2141		if (ifma->ifma_addr->sa_family != AF_LINK)
2142			continue;
2143
2144		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2145			break;
2146
2147		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2148		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2149		mcnt++;
2150	}
2151#if __FreeBSD_version < 800000
2152	IF_ADDR_UNLOCK(ifp);
2153#else
2154	if_maddr_runlock(ifp);
2155#endif
2156
2157	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2158		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2159		reg_rctl |= E1000_RCTL_MPE;
2160		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2161	} else
2162		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2163}
2164
2165
2166/*********************************************************************
2167 *  Timer routine:
2168 *  	This routine checks for link status,
2169 *	updates statistics, and does the watchdog.
2170 *
2171 **********************************************************************/
2172
2173static void
2174igb_local_timer(void *arg)
2175{
2176	struct adapter		*adapter = arg;
2177	device_t		dev = adapter->dev;
2178	struct ifnet		*ifp = adapter->ifp;
2179	struct tx_ring		*txr = adapter->tx_rings;
2180	struct igb_queue	*que = adapter->queues;
2181	int			hung = 0, busy = 0;
2182
2183
2184	IGB_CORE_LOCK_ASSERT(adapter);
2185
2186	igb_update_link_status(adapter);
2187	igb_update_stats_counters(adapter);
2188
2189        /*
2190        ** Check the TX queues status
2191	**	- central locked handling of OACTIVE
2192	**	- watchdog only if all queues show hung
2193        */
2194	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2195		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2196		    (adapter->pause_frames == 0))
2197			++hung;
2198		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2199			++busy;
2200		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2201			taskqueue_enqueue(que->tq, &que->que_task);
2202	}
2203	if (hung == adapter->num_queues)
2204		goto timeout;
2205	if (busy == adapter->num_queues)
2206		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2207	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2208	    (busy < adapter->num_queues))
2209		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2210
2211	adapter->pause_frames = 0;
2212	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2213#ifndef DEVICE_POLLING
2214	/* Schedule all queue interrupts - deadlock protection */
2215	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2216#endif
2217	return;
2218
2219timeout:
2220	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2221	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2222            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2223            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2224	device_printf(dev,"TX(%d) desc avail = %d,"
2225            "Next TX to Clean = %d\n",
2226            txr->me, txr->tx_avail, txr->next_to_clean);
2227	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2228	adapter->watchdog_events++;
2229	igb_init_locked(adapter);
2230}
2231
2232static void
2233igb_update_link_status(struct adapter *adapter)
2234{
2235	struct e1000_hw *hw = &adapter->hw;
2236	struct ifnet *ifp = adapter->ifp;
2237	device_t dev = adapter->dev;
2238	struct tx_ring *txr = adapter->tx_rings;
2239	u32 link_check, thstat, ctrl;
2240
2241	link_check = thstat = ctrl = 0;
2242
2243	/* Get the cached link value or read for real */
2244        switch (hw->phy.media_type) {
2245        case e1000_media_type_copper:
2246                if (hw->mac.get_link_status) {
2247			/* Do the work to read phy */
2248                        e1000_check_for_link(hw);
2249                        link_check = !hw->mac.get_link_status;
2250                } else
2251                        link_check = TRUE;
2252                break;
2253        case e1000_media_type_fiber:
2254                e1000_check_for_link(hw);
2255                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2256                                 E1000_STATUS_LU);
2257                break;
2258        case e1000_media_type_internal_serdes:
2259                e1000_check_for_link(hw);
2260                link_check = adapter->hw.mac.serdes_has_link;
2261                break;
2262	/* VF device is type_unknown */
2263        case e1000_media_type_unknown:
2264                e1000_check_for_link(hw);
2265		link_check = !hw->mac.get_link_status;
2266		/* Fall thru */
2267        default:
2268                break;
2269        }
2270
2271	/* Check for thermal downshift or shutdown */
2272	if (hw->mac.type == e1000_i350) {
2273		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2274		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2275	}
2276
2277	/* Now we check if a transition has happened */
2278	if (link_check && (adapter->link_active == 0)) {
2279		e1000_get_speed_and_duplex(&adapter->hw,
2280		    &adapter->link_speed, &adapter->link_duplex);
2281		if (bootverbose)
2282			device_printf(dev, "Link is up %d Mbps %s\n",
2283			    adapter->link_speed,
2284			    ((adapter->link_duplex == FULL_DUPLEX) ?
2285			    "Full Duplex" : "Half Duplex"));
2286		adapter->link_active = 1;
2287		ifp->if_baudrate = adapter->link_speed * 1000000;
2288		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2289		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2290			device_printf(dev, "Link: thermal downshift\n");
2291		/* This can sleep */
2292		if_link_state_change(ifp, LINK_STATE_UP);
2293	} else if (!link_check && (adapter->link_active == 1)) {
2294		ifp->if_baudrate = adapter->link_speed = 0;
2295		adapter->link_duplex = 0;
2296		if (bootverbose)
2297			device_printf(dev, "Link is Down\n");
2298		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2299		    (thstat & E1000_THSTAT_PWR_DOWN))
2300			device_printf(dev, "Link: thermal shutdown\n");
2301		adapter->link_active = 0;
2302		/* This can sleep */
2303		if_link_state_change(ifp, LINK_STATE_DOWN);
2304		/* Reset queue state */
2305		for (int i = 0; i < adapter->num_queues; i++, txr++)
2306			txr->queue_status = IGB_QUEUE_IDLE;
2307	}
2308}
2309
2310/*********************************************************************
2311 *
2312 *  This routine disables all traffic on the adapter by issuing a
2313 *  global reset on the MAC and deallocates TX/RX buffers.
2314 *
2315 **********************************************************************/
2316
2317static void
2318igb_stop(void *arg)
2319{
2320	struct adapter	*adapter = arg;
2321	struct ifnet	*ifp = adapter->ifp;
2322	struct tx_ring *txr = adapter->tx_rings;
2323
2324	IGB_CORE_LOCK_ASSERT(adapter);
2325
2326	INIT_DEBUGOUT("igb_stop: begin");
2327
2328	igb_disable_intr(adapter);
2329
2330	callout_stop(&adapter->timer);
2331
2332	/* Tell the stack that the interface is no longer active */
2333	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2334	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2335
2336	/* Disarm watchdog timer. */
2337	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2338		IGB_TX_LOCK(txr);
2339		txr->queue_status = IGB_QUEUE_IDLE;
2340		IGB_TX_UNLOCK(txr);
2341	}
2342
2343	e1000_reset_hw(&adapter->hw);
2344	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2345
2346	e1000_led_off(&adapter->hw);
2347	e1000_cleanup_led(&adapter->hw);
2348}
2349
2350
2351/*********************************************************************
2352 *
2353 *  Determine hardware revision.
2354 *
2355 **********************************************************************/
2356static void
2357igb_identify_hardware(struct adapter *adapter)
2358{
2359	device_t dev = adapter->dev;
2360
2361	/* Make sure our PCI config space has the necessary stuff set */
2362	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2363	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2364	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2365		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2366		    "bits were not set!\n");
2367		adapter->hw.bus.pci_cmd_word |=
2368		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2369		pci_write_config(dev, PCIR_COMMAND,
2370		    adapter->hw.bus.pci_cmd_word, 2);
2371	}
2372
2373	/* Save off the information about this board */
2374	adapter->hw.vendor_id = pci_get_vendor(dev);
2375	adapter->hw.device_id = pci_get_device(dev);
2376	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2377	adapter->hw.subsystem_vendor_id =
2378	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2379	adapter->hw.subsystem_device_id =
2380	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2381
2382	/* Set MAC type early for PCI setup */
2383	e1000_set_mac_type(&adapter->hw);
2384
2385	/* Are we a VF device? */
2386	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2387	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2388		adapter->vf_ifp = 1;
2389	else
2390		adapter->vf_ifp = 0;
2391}
2392
2393static int
2394igb_allocate_pci_resources(struct adapter *adapter)
2395{
2396	device_t	dev = adapter->dev;
2397	int		rid;
2398
2399	rid = PCIR_BAR(0);
2400	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2401	    &rid, RF_ACTIVE);
2402	if (adapter->pci_mem == NULL) {
2403		device_printf(dev, "Unable to allocate bus resource: memory\n");
2404		return (ENXIO);
2405	}
2406	adapter->osdep.mem_bus_space_tag =
2407	    rman_get_bustag(adapter->pci_mem);
2408	adapter->osdep.mem_bus_space_handle =
2409	    rman_get_bushandle(adapter->pci_mem);
2410	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2411
2412	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2413
2414	/* This will setup either MSI/X or MSI */
2415	adapter->msix = igb_setup_msix(adapter);
2416	adapter->hw.back = &adapter->osdep;
2417
2418	return (0);
2419}
2420
2421/*********************************************************************
2422 *
2423 *  Setup the Legacy or MSI Interrupt handler
2424 *
2425 **********************************************************************/
2426static int
2427igb_allocate_legacy(struct adapter *adapter)
2428{
2429	device_t		dev = adapter->dev;
2430	struct igb_queue	*que = adapter->queues;
2431	struct tx_ring		*txr = adapter->tx_rings;
2432	int			error, rid = 0;
2433
2434	/* Turn off all interrupts */
2435	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2436
2437	/* MSI RID is 1 */
2438	if (adapter->msix == 1)
2439		rid = 1;
2440
2441	/* We allocate a single interrupt resource */
2442	adapter->res = bus_alloc_resource_any(dev,
2443	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2444	if (adapter->res == NULL) {
2445		device_printf(dev, "Unable to allocate bus resource: "
2446		    "interrupt\n");
2447		return (ENXIO);
2448	}
2449
2450#if __FreeBSD_version >= 800000
2451	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2452#endif
2453
2454	/*
2455	 * Try allocating a fast interrupt and the associated deferred
2456	 * processing contexts.
2457	 */
2458	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2459	/* Make tasklet for deferred link handling */
2460	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2461	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2462	    taskqueue_thread_enqueue, &que->tq);
2463	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2464	    device_get_nameunit(adapter->dev));
2465	if ((error = bus_setup_intr(dev, adapter->res,
2466	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2467	    adapter, &adapter->tag)) != 0) {
2468		device_printf(dev, "Failed to register fast interrupt "
2469			    "handler: %d\n", error);
2470		taskqueue_free(que->tq);
2471		que->tq = NULL;
2472		return (error);
2473	}
2474
2475	return (0);
2476}
2477
2478
2479/*********************************************************************
2480 *
2481 *  Setup the MSIX Queue Interrupt handlers:
2482 *
2483 **********************************************************************/
2484static int
2485igb_allocate_msix(struct adapter *adapter)
2486{
2487	device_t		dev = adapter->dev;
2488	struct igb_queue	*que = adapter->queues;
2489	int			error, rid, vector = 0;
2490
2491	/* Be sure to start with all interrupts disabled */
2492	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2493	E1000_WRITE_FLUSH(&adapter->hw);
2494
2495	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2496		rid = vector +1;
2497		que->res = bus_alloc_resource_any(dev,
2498		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2499		if (que->res == NULL) {
2500			device_printf(dev,
2501			    "Unable to allocate bus resource: "
2502			    "MSIX Queue Interrupt\n");
2503			return (ENXIO);
2504		}
2505		error = bus_setup_intr(dev, que->res,
2506	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2507		    igb_msix_que, que, &que->tag);
2508		if (error) {
2509			que->res = NULL;
2510			device_printf(dev, "Failed to register Queue handler");
2511			return (error);
2512		}
2513#if __FreeBSD_version >= 800504
2514		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2515#endif
2516		que->msix = vector;
2517		if (adapter->hw.mac.type == e1000_82575)
2518			que->eims = E1000_EICR_TX_QUEUE0 << i;
2519		else
2520			que->eims = 1 << vector;
2521		/*
2522		** Bind the msix vector, and thus the
2523		** rings to the corresponding cpu.
2524		*/
2525		if (adapter->num_queues > 1) {
2526			if (igb_last_bind_cpu < 0)
2527				igb_last_bind_cpu = CPU_FIRST();
2528			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2529			device_printf(dev,
2530				"Bound queue %d to cpu %d\n",
2531				i,igb_last_bind_cpu);
2532			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2533		}
2534#if __FreeBSD_version >= 800000
2535		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2536		    que->txr);
2537#endif
2538		/* Make tasklet for deferred handling */
2539		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2540		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2541		    taskqueue_thread_enqueue, &que->tq);
2542		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2543		    device_get_nameunit(adapter->dev));
2544	}
2545
2546	/* And Link */
2547	rid = vector + 1;
2548	adapter->res = bus_alloc_resource_any(dev,
2549	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2550	if (adapter->res == NULL) {
2551		device_printf(dev,
2552		    "Unable to allocate bus resource: "
2553		    "MSIX Link Interrupt\n");
2554		return (ENXIO);
2555	}
2556	if ((error = bus_setup_intr(dev, adapter->res,
2557	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2558	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2559		device_printf(dev, "Failed to register Link handler");
2560		return (error);
2561	}
2562#if __FreeBSD_version >= 800504
2563	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2564#endif
2565	adapter->linkvec = vector;
2566
2567	return (0);
2568}
2569
2570
2571static void
2572igb_configure_queues(struct adapter *adapter)
2573{
2574	struct	e1000_hw	*hw = &adapter->hw;
2575	struct	igb_queue	*que;
2576	u32			tmp, ivar = 0, newitr = 0;
2577
2578	/* First turn on RSS capability */
2579	if (adapter->hw.mac.type != e1000_82575)
2580		E1000_WRITE_REG(hw, E1000_GPIE,
2581		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2582		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2583
2584	/* Turn on MSIX */
2585	switch (adapter->hw.mac.type) {
2586	case e1000_82580:
2587	case e1000_i350:
2588	case e1000_i210:
2589	case e1000_i211:
2590	case e1000_vfadapt:
2591	case e1000_vfadapt_i350:
2592		/* RX entries */
2593		for (int i = 0; i < adapter->num_queues; i++) {
2594			u32 index = i >> 1;
2595			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2596			que = &adapter->queues[i];
2597			if (i & 1) {
2598				ivar &= 0xFF00FFFF;
2599				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2600			} else {
2601				ivar &= 0xFFFFFF00;
2602				ivar |= que->msix | E1000_IVAR_VALID;
2603			}
2604			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2605		}
2606		/* TX entries */
2607		for (int i = 0; i < adapter->num_queues; i++) {
2608			u32 index = i >> 1;
2609			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2610			que = &adapter->queues[i];
2611			if (i & 1) {
2612				ivar &= 0x00FFFFFF;
2613				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2614			} else {
2615				ivar &= 0xFFFF00FF;
2616				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2617			}
2618			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2619			adapter->que_mask |= que->eims;
2620		}
2621
2622		/* And for the link interrupt */
2623		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2624		adapter->link_mask = 1 << adapter->linkvec;
2625		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2626		break;
2627	case e1000_82576:
2628		/* RX entries */
2629		for (int i = 0; i < adapter->num_queues; i++) {
2630			u32 index = i & 0x7; /* Each IVAR has two entries */
2631			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2632			que = &adapter->queues[i];
2633			if (i < 8) {
2634				ivar &= 0xFFFFFF00;
2635				ivar |= que->msix | E1000_IVAR_VALID;
2636			} else {
2637				ivar &= 0xFF00FFFF;
2638				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2639			}
2640			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2641			adapter->que_mask |= que->eims;
2642		}
2643		/* TX entries */
2644		for (int i = 0; i < adapter->num_queues; i++) {
2645			u32 index = i & 0x7; /* Each IVAR has two entries */
2646			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2647			que = &adapter->queues[i];
2648			if (i < 8) {
2649				ivar &= 0xFFFF00FF;
2650				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2651			} else {
2652				ivar &= 0x00FFFFFF;
2653				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2654			}
2655			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2656			adapter->que_mask |= que->eims;
2657		}
2658
2659		/* And for the link interrupt */
2660		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2661		adapter->link_mask = 1 << adapter->linkvec;
2662		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2663		break;
2664
2665	case e1000_82575:
2666                /* enable MSI-X support*/
2667		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2668                tmp |= E1000_CTRL_EXT_PBA_CLR;
2669                /* Auto-Mask interrupts upon ICR read. */
2670                tmp |= E1000_CTRL_EXT_EIAME;
2671                tmp |= E1000_CTRL_EXT_IRCA;
2672                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2673
2674		/* Queues */
2675		for (int i = 0; i < adapter->num_queues; i++) {
2676			que = &adapter->queues[i];
2677			tmp = E1000_EICR_RX_QUEUE0 << i;
2678			tmp |= E1000_EICR_TX_QUEUE0 << i;
2679			que->eims = tmp;
2680			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2681			    i, que->eims);
2682			adapter->que_mask |= que->eims;
2683		}
2684
2685		/* Link */
2686		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2687		    E1000_EIMS_OTHER);
2688		adapter->link_mask |= E1000_EIMS_OTHER;
2689	default:
2690		break;
2691	}
2692
2693	/* Set the starting interrupt rate */
2694	if (igb_max_interrupt_rate > 0)
2695		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2696
2697        if (hw->mac.type == e1000_82575)
2698                newitr |= newitr << 16;
2699        else
2700                newitr |= E1000_EITR_CNT_IGNR;
2701
2702	for (int i = 0; i < adapter->num_queues; i++) {
2703		que = &adapter->queues[i];
2704		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2705	}
2706
2707	return;
2708}
2709
2710
2711static void
2712igb_free_pci_resources(struct adapter *adapter)
2713{
2714	struct		igb_queue *que = adapter->queues;
2715	device_t	dev = adapter->dev;
2716	int		rid;
2717
2718	/*
2719	** There is a slight possibility of a failure mode
2720	** in attach that will result in entering this function
2721	** before interrupt resources have been initialized, and
2722	** in that case we do not want to execute the loops below
2723	** We can detect this reliably by the state of the adapter
2724	** res pointer.
2725	*/
2726	if (adapter->res == NULL)
2727		goto mem;
2728
2729	/*
2730	 * First release all the interrupt resources:
2731	 */
2732	for (int i = 0; i < adapter->num_queues; i++, que++) {
2733		rid = que->msix + 1;
2734		if (que->tag != NULL) {
2735			bus_teardown_intr(dev, que->res, que->tag);
2736			que->tag = NULL;
2737		}
2738		if (que->res != NULL)
2739			bus_release_resource(dev,
2740			    SYS_RES_IRQ, rid, que->res);
2741	}
2742
2743	/* Clean the Legacy or Link interrupt last */
2744	if (adapter->linkvec) /* we are doing MSIX */
2745		rid = adapter->linkvec + 1;
2746	else
2747		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2748
2749	que = adapter->queues;
2750	if (adapter->tag != NULL) {
2751		taskqueue_drain(que->tq, &adapter->link_task);
2752		bus_teardown_intr(dev, adapter->res, adapter->tag);
2753		adapter->tag = NULL;
2754	}
2755	if (adapter->res != NULL)
2756		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2757
2758	for (int i = 0; i < adapter->num_queues; i++, que++) {
2759		if (que->tq != NULL) {
2760#if __FreeBSD_version >= 800000
2761			taskqueue_drain(que->tq, &que->txr->txq_task);
2762#endif
2763			taskqueue_drain(que->tq, &que->que_task);
2764			taskqueue_free(que->tq);
2765		}
2766	}
2767mem:
2768	if (adapter->msix)
2769		pci_release_msi(dev);
2770
2771	if (adapter->msix_mem != NULL)
2772		bus_release_resource(dev, SYS_RES_MEMORY,
2773		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2774
2775	if (adapter->pci_mem != NULL)
2776		bus_release_resource(dev, SYS_RES_MEMORY,
2777		    PCIR_BAR(0), adapter->pci_mem);
2778
2779}
2780
2781/*
2782 * Setup Either MSI/X or MSI
2783 */
2784static int
2785igb_setup_msix(struct adapter *adapter)
2786{
2787	device_t dev = adapter->dev;
2788	int rid, want, queues, msgs, maxqueues;
2789
2790	/* tuneable override */
2791	if (igb_enable_msix == 0)
2792		goto msi;
2793
2794	/* First try MSI/X */
2795	rid = PCIR_BAR(IGB_MSIX_BAR);
2796	adapter->msix_mem = bus_alloc_resource_any(dev,
2797	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2798       	if (!adapter->msix_mem) {
2799		/* May not be enabled */
2800		device_printf(adapter->dev,
2801		    "Unable to map MSIX table \n");
2802		goto msi;
2803	}
2804
2805	msgs = pci_msix_count(dev);
2806	if (msgs == 0) { /* system has msix disabled */
2807		bus_release_resource(dev, SYS_RES_MEMORY,
2808		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2809		adapter->msix_mem = NULL;
2810		goto msi;
2811	}
2812
2813	/* Figure out a reasonable auto config value */
2814	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2815
2816	/* Manual override */
2817	if (igb_num_queues != 0)
2818		queues = igb_num_queues;
2819
2820	/* Sanity check based on HW */
2821	switch (adapter->hw.mac.type) {
2822		case e1000_82575:
2823			maxqueues = 4;
2824			break;
2825		case e1000_82576:
2826		case e1000_82580:
2827		case e1000_i350:
2828			maxqueues = 8;
2829			break;
2830		case e1000_i210:
2831			maxqueues = 4;
2832			break;
2833		case e1000_i211:
2834			maxqueues = 2;
2835			break;
2836		default:  /* VF interfaces */
2837			maxqueues = 1;
2838			break;
2839	}
2840	if (queues > maxqueues)
2841		queues = maxqueues;
2842
2843	/*
2844	** One vector (RX/TX pair) per queue
2845	** plus an additional for Link interrupt
2846	*/
2847	want = queues + 1;
2848	if (msgs >= want)
2849		msgs = want;
2850	else {
2851               	device_printf(adapter->dev,
2852		    "MSIX Configuration Problem, "
2853		    "%d vectors configured, but %d queues wanted!\n",
2854		    msgs, want);
2855		return (0);
2856	}
2857	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2858               	device_printf(adapter->dev,
2859		    "Using MSIX interrupts with %d vectors\n", msgs);
2860		adapter->num_queues = queues;
2861		return (msgs);
2862	}
2863msi:
2864       	msgs = pci_msi_count(dev);
2865	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2866		device_printf(adapter->dev," Using MSI interrupt\n");
2867		return (msgs);
2868	}
2869	return (0);
2870}
2871
2872/*********************************************************************
2873 *
2874 *  Set up an fresh starting state
2875 *
2876 **********************************************************************/
2877static void
2878igb_reset(struct adapter *adapter)
2879{
2880	device_t	dev = adapter->dev;
2881	struct e1000_hw *hw = &adapter->hw;
2882	struct e1000_fc_info *fc = &hw->fc;
2883	struct ifnet	*ifp = adapter->ifp;
2884	u32		pba = 0;
2885	u16		hwm;
2886
2887	INIT_DEBUGOUT("igb_reset: begin");
2888
2889	/* Let the firmware know the OS is in control */
2890	igb_get_hw_control(adapter);
2891
2892	/*
2893	 * Packet Buffer Allocation (PBA)
2894	 * Writing PBA sets the receive portion of the buffer
2895	 * the remainder is used for the transmit buffer.
2896	 */
2897	switch (hw->mac.type) {
2898	case e1000_82575:
2899		pba = E1000_PBA_32K;
2900		break;
2901	case e1000_82576:
2902	case e1000_vfadapt:
2903		pba = E1000_READ_REG(hw, E1000_RXPBS);
2904		pba &= E1000_RXPBS_SIZE_MASK_82576;
2905		break;
2906	case e1000_82580:
2907	case e1000_i350:
2908	case e1000_vfadapt_i350:
2909		pba = E1000_READ_REG(hw, E1000_RXPBS);
2910		pba = e1000_rxpbs_adjust_82580(pba);
2911		break;
2912	case e1000_i210:
2913	case e1000_i211:
2914		pba = E1000_PBA_34K;
2915	default:
2916		break;
2917	}
2918
2919	/* Special needs in case of Jumbo frames */
2920	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2921		u32 tx_space, min_tx, min_rx;
2922		pba = E1000_READ_REG(hw, E1000_PBA);
2923		tx_space = pba >> 16;
2924		pba &= 0xffff;
2925		min_tx = (adapter->max_frame_size +
2926		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2927		min_tx = roundup2(min_tx, 1024);
2928		min_tx >>= 10;
2929                min_rx = adapter->max_frame_size;
2930                min_rx = roundup2(min_rx, 1024);
2931                min_rx >>= 10;
2932		if (tx_space < min_tx &&
2933		    ((min_tx - tx_space) < pba)) {
2934			pba = pba - (min_tx - tx_space);
2935			/*
2936                         * if short on rx space, rx wins
2937                         * and must trump tx adjustment
2938			 */
2939                        if (pba < min_rx)
2940                                pba = min_rx;
2941		}
2942		E1000_WRITE_REG(hw, E1000_PBA, pba);
2943	}
2944
2945	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2946
2947	/*
2948	 * These parameters control the automatic generation (Tx) and
2949	 * response (Rx) to Ethernet PAUSE frames.
2950	 * - High water mark should allow for at least two frames to be
2951	 *   received after sending an XOFF.
2952	 * - Low water mark works best when it is very near the high water mark.
2953	 *   This allows the receiver to restart by sending XON when it has
2954	 *   drained a bit.
2955	 */
2956	hwm = min(((pba << 10) * 9 / 10),
2957	    ((pba << 10) - 2 * adapter->max_frame_size));
2958
2959	if (hw->mac.type < e1000_82576) {
2960		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2961		fc->low_water = fc->high_water - 8;
2962	} else {
2963		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2964		fc->low_water = fc->high_water - 16;
2965	}
2966
2967	fc->pause_time = IGB_FC_PAUSE_TIME;
2968	fc->send_xon = TRUE;
2969	if (adapter->fc)
2970		fc->requested_mode = adapter->fc;
2971	else
2972		fc->requested_mode = e1000_fc_default;
2973
2974	/* Issue a global reset */
2975	e1000_reset_hw(hw);
2976	E1000_WRITE_REG(hw, E1000_WUC, 0);
2977
2978	if (e1000_init_hw(hw) < 0)
2979		device_printf(dev, "Hardware Initialization Failed\n");
2980
2981	/* Setup DMA Coalescing */
2982	if ((hw->mac.type > e1000_82580) &&
2983	    (hw->mac.type != e1000_i211)) {
2984		u32 dmac;
2985		u32 reg = ~E1000_DMACR_DMAC_EN;
2986
2987		if (adapter->dmac == 0) { /* Disabling it */
2988			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2989			goto reset_out;
2990		}
2991
2992		/* Set starting thresholds */
2993		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2994		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2995
2996		hwm = 64 * pba - adapter->max_frame_size / 16;
2997		if (hwm < 64 * (pba - 6))
2998			hwm = 64 * (pba - 6);
2999		reg = E1000_READ_REG(hw, E1000_FCRTC);
3000		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3001		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3002		    & E1000_FCRTC_RTH_COAL_MASK);
3003		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3004
3005
3006		dmac = pba - adapter->max_frame_size / 512;
3007		if (dmac < pba - 10)
3008			dmac = pba - 10;
3009		reg = E1000_READ_REG(hw, E1000_DMACR);
3010		reg &= ~E1000_DMACR_DMACTHR_MASK;
3011		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3012		    & E1000_DMACR_DMACTHR_MASK);
3013		/* transition to L0x or L1 if available..*/
3014		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3015		/* timer = value in adapter->dmac in 32usec intervals */
3016		reg |= (adapter->dmac >> 5);
3017		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3018
3019		/* Set the interval before transition */
3020		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3021		reg |= 0x80000004;
3022		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3023
3024		/* free space in tx packet buffer to wake from DMA coal */
3025		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3026		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3027
3028		/* make low power state decision controlled by DMA coal */
3029		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3030		reg &= ~E1000_PCIEMISC_LX_DECISION;
3031		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3032		device_printf(dev, "DMA Coalescing enabled\n");
3033
3034	} else if (hw->mac.type == e1000_82580) {
3035		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3036		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3037		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3038		    reg & ~E1000_PCIEMISC_LX_DECISION);
3039	}
3040
3041reset_out:
3042	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3043	e1000_get_phy_info(hw);
3044	e1000_check_for_link(hw);
3045	return;
3046}
3047
3048/*********************************************************************
3049 *
3050 *  Setup networking device structure and register an interface.
3051 *
3052 **********************************************************************/
3053static int
3054igb_setup_interface(device_t dev, struct adapter *adapter)
3055{
3056	struct ifnet   *ifp;
3057
3058	INIT_DEBUGOUT("igb_setup_interface: begin");
3059
3060	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3061	if (ifp == NULL) {
3062		device_printf(dev, "can not allocate ifnet structure\n");
3063		return (-1);
3064	}
3065	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3066	ifp->if_init =  igb_init;
3067	ifp->if_softc = adapter;
3068	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3069	ifp->if_ioctl = igb_ioctl;
3070#if __FreeBSD_version >= 800000
3071	ifp->if_transmit = igb_mq_start;
3072	ifp->if_qflush = igb_qflush;
3073#else
3074	ifp->if_start = igb_start;
3075	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3076	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3077	IFQ_SET_READY(&ifp->if_snd);
3078#endif
3079
3080	ether_ifattach(ifp, adapter->hw.mac.addr);
3081
3082	ifp->if_capabilities = ifp->if_capenable = 0;
3083
3084	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3085	ifp->if_capabilities |= IFCAP_TSO4;
3086	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3087	ifp->if_capenable = ifp->if_capabilities;
3088
3089	/* Don't enable LRO by default */
3090	ifp->if_capabilities |= IFCAP_LRO;
3091
3092#ifdef DEVICE_POLLING
3093	ifp->if_capabilities |= IFCAP_POLLING;
3094#endif
3095
3096	/*
3097	 * Tell the upper layer(s) we
3098	 * support full VLAN capability.
3099	 */
3100	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3101	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3102			     |  IFCAP_VLAN_HWTSO
3103			     |  IFCAP_VLAN_MTU;
3104	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3105			  |  IFCAP_VLAN_HWTSO
3106			  |  IFCAP_VLAN_MTU;
3107
3108	/*
3109	** Don't turn this on by default, if vlans are
3110	** created on another pseudo device (eg. lagg)
3111	** then vlan events are not passed thru, breaking
3112	** operation, but with HW FILTER off it works. If
3113	** using vlans directly on the igb driver you can
3114	** enable this and get full hardware tag filtering.
3115	*/
3116	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3117
3118	/*
3119	 * Specify the media types supported by this adapter and register
3120	 * callbacks to update media and link information
3121	 */
3122	ifmedia_init(&adapter->media, IFM_IMASK,
3123	    igb_media_change, igb_media_status);
3124	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3125	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3126		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3127			    0, NULL);
3128		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3129	} else {
3130		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3131		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3132			    0, NULL);
3133		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3134			    0, NULL);
3135		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3136			    0, NULL);
3137		if (adapter->hw.phy.type != e1000_phy_ife) {
3138			ifmedia_add(&adapter->media,
3139				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3140			ifmedia_add(&adapter->media,
3141				IFM_ETHER | IFM_1000_T, 0, NULL);
3142		}
3143	}
3144	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3145	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3146	return (0);
3147}
3148
3149
3150/*
3151 * Manage DMA'able memory.
3152 */
3153static void
3154igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3155{
3156	if (error)
3157		return;
3158	*(bus_addr_t *) arg = segs[0].ds_addr;
3159}
3160
3161static int
3162igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3163        struct igb_dma_alloc *dma, int mapflags)
3164{
3165	int error;
3166
3167	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3168				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3169				BUS_SPACE_MAXADDR,	/* lowaddr */
3170				BUS_SPACE_MAXADDR,	/* highaddr */
3171				NULL, NULL,		/* filter, filterarg */
3172				size,			/* maxsize */
3173				1,			/* nsegments */
3174				size,			/* maxsegsize */
3175				0,			/* flags */
3176				NULL,			/* lockfunc */
3177				NULL,			/* lockarg */
3178				&dma->dma_tag);
3179	if (error) {
3180		device_printf(adapter->dev,
3181		    "%s: bus_dma_tag_create failed: %d\n",
3182		    __func__, error);
3183		goto fail_0;
3184	}
3185
3186	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3187	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3188	if (error) {
3189		device_printf(adapter->dev,
3190		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3191		    __func__, (uintmax_t)size, error);
3192		goto fail_2;
3193	}
3194
3195	dma->dma_paddr = 0;
3196	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3197	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3198	if (error || dma->dma_paddr == 0) {
3199		device_printf(adapter->dev,
3200		    "%s: bus_dmamap_load failed: %d\n",
3201		    __func__, error);
3202		goto fail_3;
3203	}
3204
3205	return (0);
3206
3207fail_3:
3208	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3209fail_2:
3210	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3211	bus_dma_tag_destroy(dma->dma_tag);
3212fail_0:
3213	dma->dma_map = NULL;
3214	dma->dma_tag = NULL;
3215
3216	return (error);
3217}
3218
3219static void
3220igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3221{
3222	if (dma->dma_tag == NULL)
3223		return;
3224	if (dma->dma_map != NULL) {
3225		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3226		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3227		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3228		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3229		dma->dma_map = NULL;
3230	}
3231	bus_dma_tag_destroy(dma->dma_tag);
3232	dma->dma_tag = NULL;
3233}
3234
3235
3236/*********************************************************************
3237 *
3238 *  Allocate memory for the transmit and receive rings, and then
3239 *  the descriptors associated with each, called only once at attach.
3240 *
3241 **********************************************************************/
3242static int
3243igb_allocate_queues(struct adapter *adapter)
3244{
3245	device_t dev = adapter->dev;
3246	struct igb_queue	*que = NULL;
3247	struct tx_ring		*txr = NULL;
3248	struct rx_ring		*rxr = NULL;
3249	int rsize, tsize, error = E1000_SUCCESS;
3250	int txconf = 0, rxconf = 0;
3251
3252	/* First allocate the top level queue structs */
3253	if (!(adapter->queues =
3254	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3255	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3256		device_printf(dev, "Unable to allocate queue memory\n");
3257		error = ENOMEM;
3258		goto fail;
3259	}
3260
3261	/* Next allocate the TX ring struct memory */
3262	if (!(adapter->tx_rings =
3263	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3264	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3265		device_printf(dev, "Unable to allocate TX ring memory\n");
3266		error = ENOMEM;
3267		goto tx_fail;
3268	}
3269
3270	/* Now allocate the RX */
3271	if (!(adapter->rx_rings =
3272	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3273	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274		device_printf(dev, "Unable to allocate RX ring memory\n");
3275		error = ENOMEM;
3276		goto rx_fail;
3277	}
3278
3279	tsize = roundup2(adapter->num_tx_desc *
3280	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3281	/*
3282	 * Now set up the TX queues, txconf is needed to handle the
3283	 * possibility that things fail midcourse and we need to
3284	 * undo memory gracefully
3285	 */
3286	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3287		/* Set up some basics */
3288		txr = &adapter->tx_rings[i];
3289		txr->adapter = adapter;
3290		txr->me = i;
3291
3292		/* Initialize the TX lock */
3293		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3294		    device_get_nameunit(dev), txr->me);
3295		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3296
3297		if (igb_dma_malloc(adapter, tsize,
3298			&txr->txdma, BUS_DMA_NOWAIT)) {
3299			device_printf(dev,
3300			    "Unable to allocate TX Descriptor memory\n");
3301			error = ENOMEM;
3302			goto err_tx_desc;
3303		}
3304		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3305		bzero((void *)txr->tx_base, tsize);
3306
3307        	/* Now allocate transmit buffers for the ring */
3308        	if (igb_allocate_transmit_buffers(txr)) {
3309			device_printf(dev,
3310			    "Critical Failure setting up transmit buffers\n");
3311			error = ENOMEM;
3312			goto err_tx_desc;
3313        	}
3314#if __FreeBSD_version >= 800000
3315		/* Allocate a buf ring */
3316		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3317		    M_WAITOK, &txr->tx_mtx);
3318#endif
3319	}
3320
3321	/*
3322	 * Next the RX queues...
3323	 */
3324	rsize = roundup2(adapter->num_rx_desc *
3325	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3326	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3327		rxr = &adapter->rx_rings[i];
3328		rxr->adapter = adapter;
3329		rxr->me = i;
3330
3331		/* Initialize the RX lock */
3332		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3333		    device_get_nameunit(dev), txr->me);
3334		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3335
3336		if (igb_dma_malloc(adapter, rsize,
3337			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3338			device_printf(dev,
3339			    "Unable to allocate RxDescriptor memory\n");
3340			error = ENOMEM;
3341			goto err_rx_desc;
3342		}
3343		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3344		bzero((void *)rxr->rx_base, rsize);
3345
3346        	/* Allocate receive buffers for the ring*/
3347		if (igb_allocate_receive_buffers(rxr)) {
3348			device_printf(dev,
3349			    "Critical Failure setting up receive buffers\n");
3350			error = ENOMEM;
3351			goto err_rx_desc;
3352		}
3353	}
3354
3355	/*
3356	** Finally set up the queue holding structs
3357	*/
3358	for (int i = 0; i < adapter->num_queues; i++) {
3359		que = &adapter->queues[i];
3360		que->adapter = adapter;
3361		que->txr = &adapter->tx_rings[i];
3362		que->rxr = &adapter->rx_rings[i];
3363	}
3364
3365	return (0);
3366
3367err_rx_desc:
3368	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3369		igb_dma_free(adapter, &rxr->rxdma);
3370err_tx_desc:
3371	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3372		igb_dma_free(adapter, &txr->txdma);
3373	free(adapter->rx_rings, M_DEVBUF);
3374rx_fail:
3375#if __FreeBSD_version >= 800000
3376	buf_ring_free(txr->br, M_DEVBUF);
3377#endif
3378	free(adapter->tx_rings, M_DEVBUF);
3379tx_fail:
3380	free(adapter->queues, M_DEVBUF);
3381fail:
3382	return (error);
3383}
3384
3385/*********************************************************************
3386 *
3387 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3388 *  the information needed to transmit a packet on the wire. This is
3389 *  called only once at attach, setup is done every reset.
3390 *
3391 **********************************************************************/
3392static int
3393igb_allocate_transmit_buffers(struct tx_ring *txr)
3394{
3395	struct adapter *adapter = txr->adapter;
3396	device_t dev = adapter->dev;
3397	struct igb_tx_buffer *txbuf;
3398	int error, i;
3399
3400	/*
3401	 * Setup DMA descriptor areas.
3402	 */
3403	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3404			       1, 0,			/* alignment, bounds */
3405			       BUS_SPACE_MAXADDR,	/* lowaddr */
3406			       BUS_SPACE_MAXADDR,	/* highaddr */
3407			       NULL, NULL,		/* filter, filterarg */
3408			       IGB_TSO_SIZE,		/* maxsize */
3409			       IGB_MAX_SCATTER,		/* nsegments */
3410			       PAGE_SIZE,		/* maxsegsize */
3411			       0,			/* flags */
3412			       NULL,			/* lockfunc */
3413			       NULL,			/* lockfuncarg */
3414			       &txr->txtag))) {
3415		device_printf(dev,"Unable to allocate TX DMA tag\n");
3416		goto fail;
3417	}
3418
3419	if (!(txr->tx_buffers =
3420	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3421	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3422		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3423		error = ENOMEM;
3424		goto fail;
3425	}
3426
3427        /* Create the descriptor buffer dma maps */
3428	txbuf = txr->tx_buffers;
3429	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3430		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3431		if (error != 0) {
3432			device_printf(dev, "Unable to create TX DMA map\n");
3433			goto fail;
3434		}
3435	}
3436
3437	return 0;
3438fail:
3439	/* We free all, it handles case where we are in the middle */
3440	igb_free_transmit_structures(adapter);
3441	return (error);
3442}
3443
3444/*********************************************************************
3445 *
3446 *  Initialize a transmit ring.
3447 *
3448 **********************************************************************/
3449static void
3450igb_setup_transmit_ring(struct tx_ring *txr)
3451{
3452	struct adapter *adapter = txr->adapter;
3453	struct igb_tx_buffer *txbuf;
3454	int i;
3455#ifdef DEV_NETMAP
3456	struct netmap_adapter *na = NA(adapter->ifp);
3457	struct netmap_slot *slot;
3458#endif /* DEV_NETMAP */
3459
3460	/* Clear the old descriptor contents */
3461	IGB_TX_LOCK(txr);
3462#ifdef DEV_NETMAP
3463	slot = netmap_reset(na, NR_TX, txr->me, 0);
3464#endif /* DEV_NETMAP */
3465	bzero((void *)txr->tx_base,
3466	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3467	/* Reset indices */
3468	txr->next_avail_desc = 0;
3469	txr->next_to_clean = 0;
3470
3471	/* Free any existing tx buffers. */
3472        txbuf = txr->tx_buffers;
3473	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3474		if (txbuf->m_head != NULL) {
3475			bus_dmamap_sync(txr->txtag, txbuf->map,
3476			    BUS_DMASYNC_POSTWRITE);
3477			bus_dmamap_unload(txr->txtag, txbuf->map);
3478			m_freem(txbuf->m_head);
3479			txbuf->m_head = NULL;
3480		}
3481#ifdef DEV_NETMAP
3482		if (slot) {
3483			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3484			/* no need to set the address */
3485			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3486		}
3487#endif /* DEV_NETMAP */
3488		/* clear the watch index */
3489		txbuf->next_eop = -1;
3490        }
3491
3492	/* Set number of descriptors available */
3493	txr->tx_avail = adapter->num_tx_desc;
3494
3495	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3496	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3497	IGB_TX_UNLOCK(txr);
3498}
3499
3500/*********************************************************************
3501 *
3502 *  Initialize all transmit rings.
3503 *
3504 **********************************************************************/
3505static void
3506igb_setup_transmit_structures(struct adapter *adapter)
3507{
3508	struct tx_ring *txr = adapter->tx_rings;
3509
3510	for (int i = 0; i < adapter->num_queues; i++, txr++)
3511		igb_setup_transmit_ring(txr);
3512
3513	return;
3514}
3515
3516/*********************************************************************
3517 *
3518 *  Enable transmit unit.
3519 *
3520 **********************************************************************/
3521static void
3522igb_initialize_transmit_units(struct adapter *adapter)
3523{
3524	struct tx_ring	*txr = adapter->tx_rings;
3525	struct e1000_hw *hw = &adapter->hw;
3526	u32		tctl, txdctl;
3527
3528	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3529	tctl = txdctl = 0;
3530
3531	/* Setup the Tx Descriptor Rings */
3532	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3533		u64 bus_addr = txr->txdma.dma_paddr;
3534
3535		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3536		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3537		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3538		    (uint32_t)(bus_addr >> 32));
3539		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3540		    (uint32_t)bus_addr);
3541
3542		/* Setup the HW Tx Head and Tail descriptor pointers */
3543		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3544		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3545
3546		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3547		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3548		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3549
3550		txr->queue_status = IGB_QUEUE_IDLE;
3551
3552		txdctl |= IGB_TX_PTHRESH;
3553		txdctl |= IGB_TX_HTHRESH << 8;
3554		txdctl |= IGB_TX_WTHRESH << 16;
3555		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3556		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3557	}
3558
3559	if (adapter->vf_ifp)
3560		return;
3561
3562	e1000_config_collision_dist(hw);
3563
3564	/* Program the Transmit Control Register */
3565	tctl = E1000_READ_REG(hw, E1000_TCTL);
3566	tctl &= ~E1000_TCTL_CT;
3567	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3568		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3569
3570	/* This write will effectively turn on the transmit unit. */
3571	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3572}
3573
3574/*********************************************************************
3575 *
3576 *  Free all transmit rings.
3577 *
3578 **********************************************************************/
3579static void
3580igb_free_transmit_structures(struct adapter *adapter)
3581{
3582	struct tx_ring *txr = adapter->tx_rings;
3583
3584	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3585		IGB_TX_LOCK(txr);
3586		igb_free_transmit_buffers(txr);
3587		igb_dma_free(adapter, &txr->txdma);
3588		IGB_TX_UNLOCK(txr);
3589		IGB_TX_LOCK_DESTROY(txr);
3590	}
3591	free(adapter->tx_rings, M_DEVBUF);
3592}
3593
3594/*********************************************************************
3595 *
3596 *  Free transmit ring related data structures.
3597 *
3598 **********************************************************************/
3599static void
3600igb_free_transmit_buffers(struct tx_ring *txr)
3601{
3602	struct adapter *adapter = txr->adapter;
3603	struct igb_tx_buffer *tx_buffer;
3604	int             i;
3605
3606	INIT_DEBUGOUT("free_transmit_ring: begin");
3607
3608	if (txr->tx_buffers == NULL)
3609		return;
3610
3611	tx_buffer = txr->tx_buffers;
3612	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3613		if (tx_buffer->m_head != NULL) {
3614			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3615			    BUS_DMASYNC_POSTWRITE);
3616			bus_dmamap_unload(txr->txtag,
3617			    tx_buffer->map);
3618			m_freem(tx_buffer->m_head);
3619			tx_buffer->m_head = NULL;
3620			if (tx_buffer->map != NULL) {
3621				bus_dmamap_destroy(txr->txtag,
3622				    tx_buffer->map);
3623				tx_buffer->map = NULL;
3624			}
3625		} else if (tx_buffer->map != NULL) {
3626			bus_dmamap_unload(txr->txtag,
3627			    tx_buffer->map);
3628			bus_dmamap_destroy(txr->txtag,
3629			    tx_buffer->map);
3630			tx_buffer->map = NULL;
3631		}
3632	}
3633#if __FreeBSD_version >= 800000
3634	if (txr->br != NULL)
3635		buf_ring_free(txr->br, M_DEVBUF);
3636#endif
3637	if (txr->tx_buffers != NULL) {
3638		free(txr->tx_buffers, M_DEVBUF);
3639		txr->tx_buffers = NULL;
3640	}
3641	if (txr->txtag != NULL) {
3642		bus_dma_tag_destroy(txr->txtag);
3643		txr->txtag = NULL;
3644	}
3645	return;
3646}
3647
3648/**********************************************************************
3649 *
3650 *  Setup work for hardware segmentation offload (TSO)
3651 *
3652 **********************************************************************/
3653static bool
3654igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3655	struct ip *ip, struct tcphdr *th)
3656{
3657	struct adapter *adapter = txr->adapter;
3658	struct e1000_adv_tx_context_desc *TXD;
3659	struct igb_tx_buffer        *tx_buffer;
3660	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3661	u32 mss_l4len_idx = 0;
3662	u16 vtag = 0;
3663	int ctxd, ip_hlen, tcp_hlen;
3664
3665	ctxd = txr->next_avail_desc;
3666	tx_buffer = &txr->tx_buffers[ctxd];
3667	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3668
3669	ip->ip_sum = 0;
3670	ip_hlen = ip->ip_hl << 2;
3671	tcp_hlen = th->th_off << 2;
3672
3673	/* VLAN MACLEN IPLEN */
3674	if (mp->m_flags & M_VLANTAG) {
3675		vtag = htole16(mp->m_pkthdr.ether_vtag);
3676		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3677	}
3678
3679	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3680	vlan_macip_lens |= ip_hlen;
3681	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3682
3683	/* ADV DTYPE TUCMD */
3684	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3685	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3686	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3687	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3688
3689	/* MSS L4LEN IDX */
3690	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3691	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3692	/* 82575 needs the queue index added */
3693	if (adapter->hw.mac.type == e1000_82575)
3694		mss_l4len_idx |= txr->me << 4;
3695	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3696
3697	TXD->seqnum_seed = htole32(0);
3698	tx_buffer->m_head = NULL;
3699	tx_buffer->next_eop = -1;
3700
3701	if (++ctxd == adapter->num_tx_desc)
3702		ctxd = 0;
3703
3704	txr->tx_avail--;
3705	txr->next_avail_desc = ctxd;
3706	return TRUE;
3707}
3708
3709
3710/*********************************************************************
3711 *
3712 *  Context Descriptor setup for VLAN or CSUM
3713 *
3714 **********************************************************************/
3715
3716static bool
3717igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3718{
3719	struct adapter *adapter = txr->adapter;
3720	struct e1000_adv_tx_context_desc *TXD;
3721	struct igb_tx_buffer        *tx_buffer;
3722	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3723	struct ether_vlan_header *eh;
3724	struct ip *ip = NULL;
3725	struct ip6_hdr *ip6;
3726	int  ehdrlen, ctxd, ip_hlen = 0;
3727	u16	etype, vtag = 0;
3728	u8	ipproto = 0;
3729	bool	offload = TRUE;
3730
3731	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3732		offload = FALSE;
3733
3734	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3735	ctxd = txr->next_avail_desc;
3736	tx_buffer = &txr->tx_buffers[ctxd];
3737	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3738
3739	/*
3740	** In advanced descriptors the vlan tag must
3741	** be placed into the context descriptor, thus
3742	** we need to be here just for that setup.
3743	*/
3744	if (mp->m_flags & M_VLANTAG) {
3745		vtag = htole16(mp->m_pkthdr.ether_vtag);
3746		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3747	} else if (offload == FALSE)
3748		return FALSE;
3749
3750	/*
3751	 * Determine where frame payload starts.
3752	 * Jump over vlan headers if already present,
3753	 * helpful for QinQ too.
3754	 */
3755	eh = mtod(mp, struct ether_vlan_header *);
3756	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3757		etype = ntohs(eh->evl_proto);
3758		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3759	} else {
3760		etype = ntohs(eh->evl_encap_proto);
3761		ehdrlen = ETHER_HDR_LEN;
3762	}
3763
3764	/* Set the ether header length */
3765	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3766
3767	switch (etype) {
3768		case ETHERTYPE_IP:
3769			ip = (struct ip *)(mp->m_data + ehdrlen);
3770			ip_hlen = ip->ip_hl << 2;
3771			if (mp->m_len < ehdrlen + ip_hlen) {
3772				offload = FALSE;
3773				break;
3774			}
3775			ipproto = ip->ip_p;
3776			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3777			break;
3778		case ETHERTYPE_IPV6:
3779			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3780			ip_hlen = sizeof(struct ip6_hdr);
3781			ipproto = ip6->ip6_nxt;
3782			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3783			break;
3784		default:
3785			offload = FALSE;
3786			break;
3787	}
3788
3789	vlan_macip_lens |= ip_hlen;
3790	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3791
3792	switch (ipproto) {
3793		case IPPROTO_TCP:
3794			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3795				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3796			break;
3797		case IPPROTO_UDP:
3798			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3799				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3800			break;
3801#if __FreeBSD_version >= 800000
3802		case IPPROTO_SCTP:
3803			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3804				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3805			break;
3806#endif
3807		default:
3808			offload = FALSE;
3809			break;
3810	}
3811
3812	/* 82575 needs the queue index added */
3813	if (adapter->hw.mac.type == e1000_82575)
3814		mss_l4len_idx = txr->me << 4;
3815
3816	/* Now copy bits into descriptor */
3817	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3818	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3819	TXD->seqnum_seed = htole32(0);
3820	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3821
3822	tx_buffer->m_head = NULL;
3823	tx_buffer->next_eop = -1;
3824
3825	/* We've consumed the first desc, adjust counters */
3826	if (++ctxd == adapter->num_tx_desc)
3827		ctxd = 0;
3828	txr->next_avail_desc = ctxd;
3829	--txr->tx_avail;
3830
3831        return (offload);
3832}
3833
3834
3835/**********************************************************************
3836 *
3837 *  Examine each tx_buffer in the used queue. If the hardware is done
3838 *  processing the packet then free associated resources. The
3839 *  tx_buffer is put back on the free queue.
3840 *
3841 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3842 **********************************************************************/
3843static bool
3844igb_txeof(struct tx_ring *txr)
3845{
3846	struct adapter	*adapter = txr->adapter;
3847        int first, last, done, processed;
3848        struct igb_tx_buffer *tx_buffer;
3849        struct e1000_tx_desc   *tx_desc, *eop_desc;
3850	struct ifnet   *ifp = adapter->ifp;
3851
3852	IGB_TX_LOCK_ASSERT(txr);
3853
3854#ifdef DEV_NETMAP
3855	if (ifp->if_capenable & IFCAP_NETMAP) {
3856		struct netmap_adapter *na = NA(ifp);
3857
3858		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3859		IGB_TX_UNLOCK(txr);
3860		IGB_CORE_LOCK(adapter);
3861		selwakeuppri(&na->tx_si, PI_NET);
3862		IGB_CORE_UNLOCK(adapter);
3863		IGB_TX_LOCK(txr);
3864		return FALSE;
3865	}
3866#endif /* DEV_NETMAP */
3867        if (txr->tx_avail == adapter->num_tx_desc) {
3868		txr->queue_status = IGB_QUEUE_IDLE;
3869                return FALSE;
3870	}
3871
3872	processed = 0;
3873        first = txr->next_to_clean;
3874        tx_desc = &txr->tx_base[first];
3875        tx_buffer = &txr->tx_buffers[first];
3876	last = tx_buffer->next_eop;
3877        eop_desc = &txr->tx_base[last];
3878
3879	/*
3880	 * What this does is get the index of the
3881	 * first descriptor AFTER the EOP of the
3882	 * first packet, that way we can do the
3883	 * simple comparison on the inner while loop.
3884	 */
3885	if (++last == adapter->num_tx_desc)
3886 		last = 0;
3887	done = last;
3888
3889        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3890            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3891
3892        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3893		/* We clean the range of the packet */
3894		while (first != done) {
3895                	tx_desc->upper.data = 0;
3896                	tx_desc->lower.data = 0;
3897                	tx_desc->buffer_addr = 0;
3898                	++txr->tx_avail;
3899			++processed;
3900
3901			if (tx_buffer->m_head) {
3902				txr->bytes +=
3903				    tx_buffer->m_head->m_pkthdr.len;
3904				bus_dmamap_sync(txr->txtag,
3905				    tx_buffer->map,
3906				    BUS_DMASYNC_POSTWRITE);
3907				bus_dmamap_unload(txr->txtag,
3908				    tx_buffer->map);
3909
3910                        	m_freem(tx_buffer->m_head);
3911                        	tx_buffer->m_head = NULL;
3912                	}
3913			tx_buffer->next_eop = -1;
3914			txr->watchdog_time = ticks;
3915
3916	                if (++first == adapter->num_tx_desc)
3917				first = 0;
3918
3919	                tx_buffer = &txr->tx_buffers[first];
3920			tx_desc = &txr->tx_base[first];
3921		}
3922		++txr->packets;
3923		++ifp->if_opackets;
3924		/* See if we can continue to the next packet */
3925		last = tx_buffer->next_eop;
3926		if (last != -1) {
3927        		eop_desc = &txr->tx_base[last];
3928			/* Get new done point */
3929			if (++last == adapter->num_tx_desc) last = 0;
3930			done = last;
3931		} else
3932			break;
3933        }
3934        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3935            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3936
3937        txr->next_to_clean = first;
3938
3939	/*
3940	** Watchdog calculation, we know there's
3941	** work outstanding or the first return
3942	** would have been taken, so none processed
3943	** for too long indicates a hang.
3944	*/
3945	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3946		txr->queue_status |= IGB_QUEUE_HUNG;
3947        /*
3948         * If we have a minimum free,
3949         * clear depleted state bit
3950         */
3951        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3952                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3953
3954	/* All clean, turn off the watchdog */
3955	if (txr->tx_avail == adapter->num_tx_desc) {
3956		txr->queue_status = IGB_QUEUE_IDLE;
3957		return (FALSE);
3958        }
3959
3960	return (TRUE);
3961}
3962
3963/*********************************************************************
3964 *
3965 *  Refresh mbuf buffers for RX descriptor rings
3966 *   - now keeps its own state so discards due to resource
3967 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3968 *     it just returns, keeping its placeholder, thus it can simply
3969 *     be recalled to try again.
3970 *
3971 **********************************************************************/
3972static void
3973igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3974{
3975	struct adapter		*adapter = rxr->adapter;
3976	bus_dma_segment_t	hseg[1];
3977	bus_dma_segment_t	pseg[1];
3978	struct igb_rx_buf	*rxbuf;
3979	struct mbuf		*mh, *mp;
3980	int			i, j, nsegs, error;
3981	bool			refreshed = FALSE;
3982
3983	i = j = rxr->next_to_refresh;
3984	/*
3985	** Get one descriptor beyond
3986	** our work mark to control
3987	** the loop.
3988        */
3989	if (++j == adapter->num_rx_desc)
3990		j = 0;
3991
3992	while (j != limit) {
3993		rxbuf = &rxr->rx_buffers[i];
3994		/* No hdr mbuf used with header split off */
3995		if (rxr->hdr_split == FALSE)
3996			goto no_split;
3997		if (rxbuf->m_head == NULL) {
3998			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3999			if (mh == NULL)
4000				goto update;
4001		} else
4002			mh = rxbuf->m_head;
4003
4004		mh->m_pkthdr.len = mh->m_len = MHLEN;
4005		mh->m_len = MHLEN;
4006		mh->m_flags |= M_PKTHDR;
4007		/* Get the memory mapping */
4008		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4009		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4010		if (error != 0) {
4011			printf("Refresh mbufs: hdr dmamap load"
4012			    " failure - %d\n", error);
4013			m_free(mh);
4014			rxbuf->m_head = NULL;
4015			goto update;
4016		}
4017		rxbuf->m_head = mh;
4018		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4019		    BUS_DMASYNC_PREREAD);
4020		rxr->rx_base[i].read.hdr_addr =
4021		    htole64(hseg[0].ds_addr);
4022no_split:
4023		if (rxbuf->m_pack == NULL) {
4024			mp = m_getjcl(M_DONTWAIT, MT_DATA,
4025			    M_PKTHDR, adapter->rx_mbuf_sz);
4026			if (mp == NULL)
4027				goto update;
4028		} else
4029			mp = rxbuf->m_pack;
4030
4031		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4032		/* Get the memory mapping */
4033		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4034		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4035		if (error != 0) {
4036			printf("Refresh mbufs: payload dmamap load"
4037			    " failure - %d\n", error);
4038			m_free(mp);
4039			rxbuf->m_pack = NULL;
4040			goto update;
4041		}
4042		rxbuf->m_pack = mp;
4043		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4044		    BUS_DMASYNC_PREREAD);
4045		rxr->rx_base[i].read.pkt_addr =
4046		    htole64(pseg[0].ds_addr);
4047		refreshed = TRUE; /* I feel wefreshed :) */
4048
4049		i = j; /* our next is precalculated */
4050		rxr->next_to_refresh = i;
4051		if (++j == adapter->num_rx_desc)
4052			j = 0;
4053	}
4054update:
4055	if (refreshed) /* update tail */
4056		E1000_WRITE_REG(&adapter->hw,
4057		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4058	return;
4059}
4060
4061
4062/*********************************************************************
4063 *
4064 *  Allocate memory for rx_buffer structures. Since we use one
4065 *  rx_buffer per received packet, the maximum number of rx_buffer's
4066 *  that we'll need is equal to the number of receive descriptors
4067 *  that we've allocated.
4068 *
4069 **********************************************************************/
4070static int
4071igb_allocate_receive_buffers(struct rx_ring *rxr)
4072{
4073	struct	adapter 	*adapter = rxr->adapter;
4074	device_t 		dev = adapter->dev;
4075	struct igb_rx_buf	*rxbuf;
4076	int             	i, bsize, error;
4077
4078	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4079	if (!(rxr->rx_buffers =
4080	    (struct igb_rx_buf *) malloc(bsize,
4081	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4082		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4083		error = ENOMEM;
4084		goto fail;
4085	}
4086
4087	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4088				   1, 0,		/* alignment, bounds */
4089				   BUS_SPACE_MAXADDR,	/* lowaddr */
4090				   BUS_SPACE_MAXADDR,	/* highaddr */
4091				   NULL, NULL,		/* filter, filterarg */
4092				   MSIZE,		/* maxsize */
4093				   1,			/* nsegments */
4094				   MSIZE,		/* maxsegsize */
4095				   0,			/* flags */
4096				   NULL,		/* lockfunc */
4097				   NULL,		/* lockfuncarg */
4098				   &rxr->htag))) {
4099		device_printf(dev, "Unable to create RX DMA tag\n");
4100		goto fail;
4101	}
4102
4103	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4104				   1, 0,		/* alignment, bounds */
4105				   BUS_SPACE_MAXADDR,	/* lowaddr */
4106				   BUS_SPACE_MAXADDR,	/* highaddr */
4107				   NULL, NULL,		/* filter, filterarg */
4108				   MJUM9BYTES,		/* maxsize */
4109				   1,			/* nsegments */
4110				   MJUM9BYTES,		/* maxsegsize */
4111				   0,			/* flags */
4112				   NULL,		/* lockfunc */
4113				   NULL,		/* lockfuncarg */
4114				   &rxr->ptag))) {
4115		device_printf(dev, "Unable to create RX payload DMA tag\n");
4116		goto fail;
4117	}
4118
4119	for (i = 0; i < adapter->num_rx_desc; i++) {
4120		rxbuf = &rxr->rx_buffers[i];
4121		error = bus_dmamap_create(rxr->htag,
4122		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4123		if (error) {
4124			device_printf(dev,
4125			    "Unable to create RX head DMA maps\n");
4126			goto fail;
4127		}
4128		error = bus_dmamap_create(rxr->ptag,
4129		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4130		if (error) {
4131			device_printf(dev,
4132			    "Unable to create RX packet DMA maps\n");
4133			goto fail;
4134		}
4135	}
4136
4137	return (0);
4138
4139fail:
4140	/* Frees all, but can handle partial completion */
4141	igb_free_receive_structures(adapter);
4142	return (error);
4143}
4144
4145
4146static void
4147igb_free_receive_ring(struct rx_ring *rxr)
4148{
4149	struct	adapter		*adapter = rxr->adapter;
4150	struct igb_rx_buf	*rxbuf;
4151
4152
4153	for (int i = 0; i < adapter->num_rx_desc; i++) {
4154		rxbuf = &rxr->rx_buffers[i];
4155		if (rxbuf->m_head != NULL) {
4156			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4157			    BUS_DMASYNC_POSTREAD);
4158			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4159			rxbuf->m_head->m_flags |= M_PKTHDR;
4160			m_freem(rxbuf->m_head);
4161		}
4162		if (rxbuf->m_pack != NULL) {
4163			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4164			    BUS_DMASYNC_POSTREAD);
4165			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4166			rxbuf->m_pack->m_flags |= M_PKTHDR;
4167			m_freem(rxbuf->m_pack);
4168		}
4169		rxbuf->m_head = NULL;
4170		rxbuf->m_pack = NULL;
4171	}
4172}
4173
4174
4175/*********************************************************************
4176 *
4177 *  Initialize a receive ring and its buffers.
4178 *
4179 **********************************************************************/
4180static int
4181igb_setup_receive_ring(struct rx_ring *rxr)
4182{
4183	struct	adapter		*adapter;
4184	struct  ifnet		*ifp;
4185	device_t		dev;
4186	struct igb_rx_buf	*rxbuf;
4187	bus_dma_segment_t	pseg[1], hseg[1];
4188	struct lro_ctrl		*lro = &rxr->lro;
4189	int			rsize, nsegs, error = 0;
4190#ifdef DEV_NETMAP
4191	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4192	struct netmap_slot *slot;
4193#endif /* DEV_NETMAP */
4194
4195	adapter = rxr->adapter;
4196	dev = adapter->dev;
4197	ifp = adapter->ifp;
4198
4199	/* Clear the ring contents */
4200	IGB_RX_LOCK(rxr);
4201#ifdef DEV_NETMAP
4202	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4203#endif /* DEV_NETMAP */
4204	rsize = roundup2(adapter->num_rx_desc *
4205	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4206	bzero((void *)rxr->rx_base, rsize);
4207
4208	/*
4209	** Free current RX buffer structures and their mbufs
4210	*/
4211	igb_free_receive_ring(rxr);
4212
4213	/* Configure for header split? */
4214	if (igb_header_split)
4215		rxr->hdr_split = TRUE;
4216
4217        /* Now replenish the ring mbufs */
4218	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4219		struct mbuf	*mh, *mp;
4220
4221		rxbuf = &rxr->rx_buffers[j];
4222#ifdef DEV_NETMAP
4223		if (slot) {
4224			/* slot sj is mapped to the i-th NIC-ring entry */
4225			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4226			uint64_t paddr;
4227			void *addr;
4228
4229			addr = PNMB(slot + sj, &paddr);
4230			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4231			/* Update descriptor */
4232			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4233			continue;
4234		}
4235#endif /* DEV_NETMAP */
4236		if (rxr->hdr_split == FALSE)
4237			goto skip_head;
4238
4239		/* First the header */
4240		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4241		if (rxbuf->m_head == NULL) {
4242			error = ENOBUFS;
4243                        goto fail;
4244		}
4245		m_adj(rxbuf->m_head, ETHER_ALIGN);
4246		mh = rxbuf->m_head;
4247		mh->m_len = mh->m_pkthdr.len = MHLEN;
4248		mh->m_flags |= M_PKTHDR;
4249		/* Get the memory mapping */
4250		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4251		    rxbuf->hmap, rxbuf->m_head, hseg,
4252		    &nsegs, BUS_DMA_NOWAIT);
4253		if (error != 0) /* Nothing elegant to do here */
4254                        goto fail;
4255		bus_dmamap_sync(rxr->htag,
4256		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4257		/* Update descriptor */
4258		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4259
4260skip_head:
4261		/* Now the payload cluster */
4262		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4263		    M_PKTHDR, adapter->rx_mbuf_sz);
4264		if (rxbuf->m_pack == NULL) {
4265			error = ENOBUFS;
4266                        goto fail;
4267		}
4268		mp = rxbuf->m_pack;
4269		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4270		/* Get the memory mapping */
4271		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4272		    rxbuf->pmap, mp, pseg,
4273		    &nsegs, BUS_DMA_NOWAIT);
4274		if (error != 0)
4275                        goto fail;
4276		bus_dmamap_sync(rxr->ptag,
4277		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4278		/* Update descriptor */
4279		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4280        }
4281
4282	/* Setup our descriptor indices */
4283	rxr->next_to_check = 0;
4284	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4285	rxr->lro_enabled = FALSE;
4286	rxr->rx_split_packets = 0;
4287	rxr->rx_bytes = 0;
4288
4289	rxr->fmp = NULL;
4290	rxr->lmp = NULL;
4291	rxr->discard = FALSE;
4292
4293	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4294	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4295
4296	/*
4297	** Now set up the LRO interface, we
4298	** also only do head split when LRO
4299	** is enabled, since so often they
4300	** are undesireable in similar setups.
4301	*/
4302	if (ifp->if_capenable & IFCAP_LRO) {
4303		error = tcp_lro_init(lro);
4304		if (error) {
4305			device_printf(dev, "LRO Initialization failed!\n");
4306			goto fail;
4307		}
4308		INIT_DEBUGOUT("RX LRO Initialized\n");
4309		rxr->lro_enabled = TRUE;
4310		lro->ifp = adapter->ifp;
4311	}
4312
4313	IGB_RX_UNLOCK(rxr);
4314	return (0);
4315
4316fail:
4317	igb_free_receive_ring(rxr);
4318	IGB_RX_UNLOCK(rxr);
4319	return (error);
4320}
4321
4322
4323/*********************************************************************
4324 *
4325 *  Initialize all receive rings.
4326 *
4327 **********************************************************************/
4328static int
4329igb_setup_receive_structures(struct adapter *adapter)
4330{
4331	struct rx_ring *rxr = adapter->rx_rings;
4332	int i;
4333
4334	for (i = 0; i < adapter->num_queues; i++, rxr++)
4335		if (igb_setup_receive_ring(rxr))
4336			goto fail;
4337
4338	return (0);
4339fail:
4340	/*
4341	 * Free RX buffers allocated so far, we will only handle
4342	 * the rings that completed, the failing case will have
4343	 * cleaned up for itself. 'i' is the endpoint.
4344	 */
4345	for (int j = 0; j > i; ++j) {
4346		rxr = &adapter->rx_rings[i];
4347		IGB_RX_LOCK(rxr);
4348		igb_free_receive_ring(rxr);
4349		IGB_RX_UNLOCK(rxr);
4350	}
4351
4352	return (ENOBUFS);
4353}
4354
4355/*********************************************************************
4356 *
4357 *  Enable receive unit.
4358 *
4359 **********************************************************************/
4360static void
4361igb_initialize_receive_units(struct adapter *adapter)
4362{
4363	struct rx_ring	*rxr = adapter->rx_rings;
4364	struct ifnet	*ifp = adapter->ifp;
4365	struct e1000_hw *hw = &adapter->hw;
4366	u32		rctl, rxcsum, psize, srrctl = 0;
4367
4368	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4369
4370	/*
4371	 * Make sure receives are disabled while setting
4372	 * up the descriptor ring
4373	 */
4374	rctl = E1000_READ_REG(hw, E1000_RCTL);
4375	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4376
4377	/*
4378	** Set up for header split
4379	*/
4380	if (igb_header_split) {
4381		/* Use a standard mbuf for the header */
4382		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4383		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4384	} else
4385		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4386
4387	/*
4388	** Set up for jumbo frames
4389	*/
4390	if (ifp->if_mtu > ETHERMTU) {
4391		rctl |= E1000_RCTL_LPE;
4392		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4393			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4394			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4395		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4396			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4397			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4398		}
4399		/* Set maximum packet len */
4400		psize = adapter->max_frame_size;
4401		/* are we on a vlan? */
4402		if (adapter->ifp->if_vlantrunk != NULL)
4403			psize += VLAN_TAG_SIZE;
4404		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4405	} else {
4406		rctl &= ~E1000_RCTL_LPE;
4407		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4408		rctl |= E1000_RCTL_SZ_2048;
4409	}
4410
4411	/* Setup the Base and Length of the Rx Descriptor Rings */
4412	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4413		u64 bus_addr = rxr->rxdma.dma_paddr;
4414		u32 rxdctl;
4415
4416		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4417		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4418		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4419		    (uint32_t)(bus_addr >> 32));
4420		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4421		    (uint32_t)bus_addr);
4422		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4423		/* Enable this Queue */
4424		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4425		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4426		rxdctl &= 0xFFF00000;
4427		rxdctl |= IGB_RX_PTHRESH;
4428		rxdctl |= IGB_RX_HTHRESH << 8;
4429		rxdctl |= IGB_RX_WTHRESH << 16;
4430		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4431	}
4432
4433	/*
4434	** Setup for RX MultiQueue
4435	*/
4436	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4437	if (adapter->num_queues >1) {
4438		u32 random[10], mrqc, shift = 0;
4439		union igb_reta {
4440			u32 dword;
4441			u8  bytes[4];
4442		} reta;
4443
4444		arc4rand(&random, sizeof(random), 0);
4445		if (adapter->hw.mac.type == e1000_82575)
4446			shift = 6;
4447		/* Warning FM follows */
4448		for (int i = 0; i < 128; i++) {
4449			reta.bytes[i & 3] =
4450			    (i % adapter->num_queues) << shift;
4451			if ((i & 3) == 3)
4452				E1000_WRITE_REG(hw,
4453				    E1000_RETA(i >> 2), reta.dword);
4454		}
4455		/* Now fill in hash table */
4456		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4457		for (int i = 0; i < 10; i++)
4458			E1000_WRITE_REG_ARRAY(hw,
4459			    E1000_RSSRK(0), i, random[i]);
4460
4461		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4462		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4463		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4464		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4465		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4466		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4467		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4468		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4469
4470		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4471
4472		/*
4473		** NOTE: Receive Full-Packet Checksum Offload
4474		** is mutually exclusive with Multiqueue. However
4475		** this is not the same as TCP/IP checksums which
4476		** still work.
4477		*/
4478		rxcsum |= E1000_RXCSUM_PCSD;
4479#if __FreeBSD_version >= 800000
4480		/* For SCTP Offload */
4481		if ((hw->mac.type == e1000_82576)
4482		    && (ifp->if_capenable & IFCAP_RXCSUM))
4483			rxcsum |= E1000_RXCSUM_CRCOFL;
4484#endif
4485	} else {
4486		/* Non RSS setup */
4487		if (ifp->if_capenable & IFCAP_RXCSUM) {
4488			rxcsum |= E1000_RXCSUM_IPPCSE;
4489#if __FreeBSD_version >= 800000
4490			if (adapter->hw.mac.type == e1000_82576)
4491				rxcsum |= E1000_RXCSUM_CRCOFL;
4492#endif
4493		} else
4494			rxcsum &= ~E1000_RXCSUM_TUOFL;
4495	}
4496	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4497
4498	/* Setup the Receive Control Register */
4499	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4500	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4501		   E1000_RCTL_RDMTS_HALF |
4502		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4503	/* Strip CRC bytes. */
4504	rctl |= E1000_RCTL_SECRC;
4505	/* Make sure VLAN Filters are off */
4506	rctl &= ~E1000_RCTL_VFE;
4507	/* Don't store bad packets */
4508	rctl &= ~E1000_RCTL_SBP;
4509
4510	/* Enable Receives */
4511	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4512
4513	/*
4514	 * Setup the HW Rx Head and Tail Descriptor Pointers
4515	 *   - needs to be after enable
4516	 */
4517	for (int i = 0; i < adapter->num_queues; i++) {
4518		rxr = &adapter->rx_rings[i];
4519		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4520#ifdef DEV_NETMAP
4521		/*
4522		 * an init() while a netmap client is active must
4523		 * preserve the rx buffers passed to userspace.
4524		 * In this driver it means we adjust RDT to
4525		 * somthing different from next_to_refresh
4526		 * (which is not used in netmap mode).
4527		 */
4528		if (ifp->if_capenable & IFCAP_NETMAP) {
4529			struct netmap_adapter *na = NA(adapter->ifp);
4530			struct netmap_kring *kring = &na->rx_rings[i];
4531			int t = rxr->next_to_refresh - kring->nr_hwavail;
4532
4533			if (t >= adapter->num_rx_desc)
4534				t -= adapter->num_rx_desc;
4535			else if (t < 0)
4536				t += adapter->num_rx_desc;
4537			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4538		} else
4539#endif /* DEV_NETMAP */
4540		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4541	}
4542	return;
4543}
4544
4545/*********************************************************************
4546 *
4547 *  Free receive rings.
4548 *
4549 **********************************************************************/
4550static void
4551igb_free_receive_structures(struct adapter *adapter)
4552{
4553	struct rx_ring *rxr = adapter->rx_rings;
4554
4555	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4556		struct lro_ctrl	*lro = &rxr->lro;
4557		igb_free_receive_buffers(rxr);
4558		tcp_lro_free(lro);
4559		igb_dma_free(adapter, &rxr->rxdma);
4560	}
4561
4562	free(adapter->rx_rings, M_DEVBUF);
4563}
4564
4565/*********************************************************************
4566 *
4567 *  Free receive ring data structures.
4568 *
4569 **********************************************************************/
4570static void
4571igb_free_receive_buffers(struct rx_ring *rxr)
4572{
4573	struct adapter		*adapter = rxr->adapter;
4574	struct igb_rx_buf	*rxbuf;
4575	int i;
4576
4577	INIT_DEBUGOUT("free_receive_structures: begin");
4578
4579	/* Cleanup any existing buffers */
4580	if (rxr->rx_buffers != NULL) {
4581		for (i = 0; i < adapter->num_rx_desc; i++) {
4582			rxbuf = &rxr->rx_buffers[i];
4583			if (rxbuf->m_head != NULL) {
4584				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4585				    BUS_DMASYNC_POSTREAD);
4586				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4587				rxbuf->m_head->m_flags |= M_PKTHDR;
4588				m_freem(rxbuf->m_head);
4589			}
4590			if (rxbuf->m_pack != NULL) {
4591				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4592				    BUS_DMASYNC_POSTREAD);
4593				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4594				rxbuf->m_pack->m_flags |= M_PKTHDR;
4595				m_freem(rxbuf->m_pack);
4596			}
4597			rxbuf->m_head = NULL;
4598			rxbuf->m_pack = NULL;
4599			if (rxbuf->hmap != NULL) {
4600				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4601				rxbuf->hmap = NULL;
4602			}
4603			if (rxbuf->pmap != NULL) {
4604				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4605				rxbuf->pmap = NULL;
4606			}
4607		}
4608		if (rxr->rx_buffers != NULL) {
4609			free(rxr->rx_buffers, M_DEVBUF);
4610			rxr->rx_buffers = NULL;
4611		}
4612	}
4613
4614	if (rxr->htag != NULL) {
4615		bus_dma_tag_destroy(rxr->htag);
4616		rxr->htag = NULL;
4617	}
4618	if (rxr->ptag != NULL) {
4619		bus_dma_tag_destroy(rxr->ptag);
4620		rxr->ptag = NULL;
4621	}
4622}
4623
4624static __inline void
4625igb_rx_discard(struct rx_ring *rxr, int i)
4626{
4627	struct igb_rx_buf	*rbuf;
4628
4629	rbuf = &rxr->rx_buffers[i];
4630
4631	/* Partially received? Free the chain */
4632	if (rxr->fmp != NULL) {
4633		rxr->fmp->m_flags |= M_PKTHDR;
4634		m_freem(rxr->fmp);
4635		rxr->fmp = NULL;
4636		rxr->lmp = NULL;
4637	}
4638
4639	/*
4640	** With advanced descriptors the writeback
4641	** clobbers the buffer addrs, so its easier
4642	** to just free the existing mbufs and take
4643	** the normal refresh path to get new buffers
4644	** and mapping.
4645	*/
4646	if (rbuf->m_head) {
4647		m_free(rbuf->m_head);
4648		rbuf->m_head = NULL;
4649	}
4650
4651	if (rbuf->m_pack) {
4652		m_free(rbuf->m_pack);
4653		rbuf->m_pack = NULL;
4654	}
4655
4656	return;
4657}
4658
4659static __inline void
4660igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4661{
4662
4663	/*
4664	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4665	 * should be computed by hardware. Also it should not have VLAN tag in
4666	 * ethernet header.
4667	 */
4668	if (rxr->lro_enabled &&
4669	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4670	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4671	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4672	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4673	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4674	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4675		/*
4676		 * Send to the stack if:
4677		 **  - LRO not enabled, or
4678		 **  - no LRO resources, or
4679		 **  - lro enqueue fails
4680		 */
4681		if (rxr->lro.lro_cnt != 0)
4682			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4683				return;
4684	}
4685	IGB_RX_UNLOCK(rxr);
4686	(*ifp->if_input)(ifp, m);
4687	IGB_RX_LOCK(rxr);
4688}
4689
4690/*********************************************************************
4691 *
4692 *  This routine executes in interrupt context. It replenishes
4693 *  the mbufs in the descriptor and sends data which has been
4694 *  dma'ed into host memory to upper layer.
4695 *
4696 *  We loop at most count times if count is > 0, or until done if
4697 *  count < 0.
4698 *
4699 *  Return TRUE if more to clean, FALSE otherwise
4700 *********************************************************************/
4701static bool
4702igb_rxeof(struct igb_queue *que, int count, int *done)
4703{
4704	struct adapter		*adapter = que->adapter;
4705	struct rx_ring		*rxr = que->rxr;
4706	struct ifnet		*ifp = adapter->ifp;
4707	struct lro_ctrl		*lro = &rxr->lro;
4708	struct lro_entry	*queued;
4709	int			i, processed = 0, rxdone = 0;
4710	u32			ptype, staterr = 0;
4711	union e1000_adv_rx_desc	*cur;
4712
4713	IGB_RX_LOCK(rxr);
4714	/* Sync the ring. */
4715	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4716	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4717
4718#ifdef DEV_NETMAP
4719	if (ifp->if_capenable & IFCAP_NETMAP) {
4720		struct netmap_adapter *na = NA(ifp);
4721
4722		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4723		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4724		IGB_RX_UNLOCK(rxr);
4725		IGB_CORE_LOCK(adapter);
4726		selwakeuppri(&na->rx_si, PI_NET);
4727		IGB_CORE_UNLOCK(adapter);
4728		return (0);
4729	}
4730#endif /* DEV_NETMAP */
4731
4732	/* Main clean loop */
4733	for (i = rxr->next_to_check; count != 0;) {
4734		struct mbuf		*sendmp, *mh, *mp;
4735		struct igb_rx_buf	*rxbuf;
4736		u16			hlen, plen, hdr, vtag;
4737		bool			eop = FALSE;
4738
4739		cur = &rxr->rx_base[i];
4740		staterr = le32toh(cur->wb.upper.status_error);
4741		if ((staterr & E1000_RXD_STAT_DD) == 0)
4742			break;
4743		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4744			break;
4745		count--;
4746		sendmp = mh = mp = NULL;
4747		cur->wb.upper.status_error = 0;
4748		rxbuf = &rxr->rx_buffers[i];
4749		plen = le16toh(cur->wb.upper.length);
4750		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4751		if ((adapter->hw.mac.type == e1000_i350) &&
4752		    (staterr & E1000_RXDEXT_STATERR_LB))
4753			vtag = be16toh(cur->wb.upper.vlan);
4754		else
4755			vtag = le16toh(cur->wb.upper.vlan);
4756		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4757		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4758
4759		/* Make sure all segments of a bad packet are discarded */
4760		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4761		    (rxr->discard)) {
4762			ifp->if_ierrors++;
4763			++rxr->rx_discarded;
4764			if (!eop) /* Catch subsequent segs */
4765				rxr->discard = TRUE;
4766			else
4767				rxr->discard = FALSE;
4768			igb_rx_discard(rxr, i);
4769			goto next_desc;
4770		}
4771
4772		/*
4773		** The way the hardware is configured to
4774		** split, it will ONLY use the header buffer
4775		** when header split is enabled, otherwise we
4776		** get normal behavior, ie, both header and
4777		** payload are DMA'd into the payload buffer.
4778		**
4779		** The fmp test is to catch the case where a
4780		** packet spans multiple descriptors, in that
4781		** case only the first header is valid.
4782		*/
4783		if (rxr->hdr_split && rxr->fmp == NULL) {
4784			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4785			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4786			if (hlen > IGB_HDR_BUF)
4787				hlen = IGB_HDR_BUF;
4788			mh = rxr->rx_buffers[i].m_head;
4789			mh->m_len = hlen;
4790			/* clear buf pointer for refresh */
4791			rxbuf->m_head = NULL;
4792			/*
4793			** Get the payload length, this
4794			** could be zero if its a small
4795			** packet.
4796			*/
4797			if (plen > 0) {
4798				mp = rxr->rx_buffers[i].m_pack;
4799				mp->m_len = plen;
4800				mh->m_next = mp;
4801				/* clear buf pointer */
4802				rxbuf->m_pack = NULL;
4803				rxr->rx_split_packets++;
4804			}
4805		} else {
4806			/*
4807			** Either no header split, or a
4808			** secondary piece of a fragmented
4809			** split packet.
4810			*/
4811			mh = rxr->rx_buffers[i].m_pack;
4812			mh->m_len = plen;
4813			/* clear buf info for refresh */
4814			rxbuf->m_pack = NULL;
4815		}
4816
4817		++processed; /* So we know when to refresh */
4818
4819		/* Initial frame - setup */
4820		if (rxr->fmp == NULL) {
4821			mh->m_pkthdr.len = mh->m_len;
4822			/* Save the head of the chain */
4823			rxr->fmp = mh;
4824			rxr->lmp = mh;
4825			if (mp != NULL) {
4826				/* Add payload if split */
4827				mh->m_pkthdr.len += mp->m_len;
4828				rxr->lmp = mh->m_next;
4829			}
4830		} else {
4831			/* Chain mbuf's together */
4832			rxr->lmp->m_next = mh;
4833			rxr->lmp = rxr->lmp->m_next;
4834			rxr->fmp->m_pkthdr.len += mh->m_len;
4835		}
4836
4837		if (eop) {
4838			rxr->fmp->m_pkthdr.rcvif = ifp;
4839			ifp->if_ipackets++;
4840			rxr->rx_packets++;
4841			/* capture data for AIM */
4842			rxr->packets++;
4843			rxr->bytes += rxr->fmp->m_pkthdr.len;
4844			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4845
4846			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4847				igb_rx_checksum(staterr, rxr->fmp, ptype);
4848
4849			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4850			    (staterr & E1000_RXD_STAT_VP) != 0) {
4851				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4852				rxr->fmp->m_flags |= M_VLANTAG;
4853			}
4854#if __FreeBSD_version >= 800000
4855			rxr->fmp->m_pkthdr.flowid = que->msix;
4856			rxr->fmp->m_flags |= M_FLOWID;
4857#endif
4858			sendmp = rxr->fmp;
4859			/* Make sure to set M_PKTHDR. */
4860			sendmp->m_flags |= M_PKTHDR;
4861			rxr->fmp = NULL;
4862			rxr->lmp = NULL;
4863		}
4864
4865next_desc:
4866		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4867		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4868
4869		/* Advance our pointers to the next descriptor. */
4870		if (++i == adapter->num_rx_desc)
4871			i = 0;
4872		/*
4873		** Send to the stack or LRO
4874		*/
4875		if (sendmp != NULL) {
4876			rxr->next_to_check = i;
4877			igb_rx_input(rxr, ifp, sendmp, ptype);
4878			i = rxr->next_to_check;
4879			rxdone++;
4880		}
4881
4882		/* Every 8 descriptors we go to refresh mbufs */
4883		if (processed == 8) {
4884                        igb_refresh_mbufs(rxr, i);
4885                        processed = 0;
4886		}
4887	}
4888
4889	/* Catch any remainders */
4890	if (igb_rx_unrefreshed(rxr))
4891		igb_refresh_mbufs(rxr, i);
4892
4893	rxr->next_to_check = i;
4894
4895	/*
4896	 * Flush any outstanding LRO work
4897	 */
4898	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4899		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4900		tcp_lro_flush(lro, queued);
4901	}
4902
4903	if (done != NULL)
4904		*done += rxdone;
4905
4906	IGB_RX_UNLOCK(rxr);
4907	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4908}
4909
4910/*********************************************************************
4911 *
4912 *  Verify that the hardware indicated that the checksum is valid.
4913 *  Inform the stack about the status of checksum so that stack
4914 *  doesn't spend time verifying the checksum.
4915 *
4916 *********************************************************************/
4917static void
4918igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4919{
4920	u16 status = (u16)staterr;
4921	u8  errors = (u8) (staterr >> 24);
4922	int sctp;
4923
4924	/* Ignore Checksum bit is set */
4925	if (status & E1000_RXD_STAT_IXSM) {
4926		mp->m_pkthdr.csum_flags = 0;
4927		return;
4928	}
4929
4930	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4931	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4932		sctp = 1;
4933	else
4934		sctp = 0;
4935	if (status & E1000_RXD_STAT_IPCS) {
4936		/* Did it pass? */
4937		if (!(errors & E1000_RXD_ERR_IPE)) {
4938			/* IP Checksum Good */
4939			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4940			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4941		} else
4942			mp->m_pkthdr.csum_flags = 0;
4943	}
4944
4945	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4946		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4947#if __FreeBSD_version >= 800000
4948		if (sctp) /* reassign */
4949			type = CSUM_SCTP_VALID;
4950#endif
4951		/* Did it pass? */
4952		if (!(errors & E1000_RXD_ERR_TCPE)) {
4953			mp->m_pkthdr.csum_flags |= type;
4954			if (sctp == 0)
4955				mp->m_pkthdr.csum_data = htons(0xffff);
4956		}
4957	}
4958	return;
4959}
4960
4961/*
4962 * This routine is run via an vlan
4963 * config EVENT
4964 */
4965static void
4966igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4967{
4968	struct adapter	*adapter = ifp->if_softc;
4969	u32		index, bit;
4970
4971	if (ifp->if_softc !=  arg)   /* Not our event */
4972		return;
4973
4974	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4975                return;
4976
4977	IGB_CORE_LOCK(adapter);
4978	index = (vtag >> 5) & 0x7F;
4979	bit = vtag & 0x1F;
4980	adapter->shadow_vfta[index] |= (1 << bit);
4981	++adapter->num_vlans;
4982	/* Change hw filter setting */
4983	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4984		igb_setup_vlan_hw_support(adapter);
4985	IGB_CORE_UNLOCK(adapter);
4986}
4987
4988/*
4989 * This routine is run via an vlan
4990 * unconfig EVENT
4991 */
4992static void
4993igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4994{
4995	struct adapter	*adapter = ifp->if_softc;
4996	u32		index, bit;
4997
4998	if (ifp->if_softc !=  arg)
4999		return;
5000
5001	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5002                return;
5003
5004	IGB_CORE_LOCK(adapter);
5005	index = (vtag >> 5) & 0x7F;
5006	bit = vtag & 0x1F;
5007	adapter->shadow_vfta[index] &= ~(1 << bit);
5008	--adapter->num_vlans;
5009	/* Change hw filter setting */
5010	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5011		igb_setup_vlan_hw_support(adapter);
5012	IGB_CORE_UNLOCK(adapter);
5013}
5014
5015static void
5016igb_setup_vlan_hw_support(struct adapter *adapter)
5017{
5018	struct e1000_hw *hw = &adapter->hw;
5019	struct ifnet	*ifp = adapter->ifp;
5020	u32             reg;
5021
5022	if (adapter->vf_ifp) {
5023		e1000_rlpml_set_vf(hw,
5024		    adapter->max_frame_size + VLAN_TAG_SIZE);
5025		return;
5026	}
5027
5028	reg = E1000_READ_REG(hw, E1000_CTRL);
5029	reg |= E1000_CTRL_VME;
5030	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5031
5032	/* Enable the Filter Table */
5033	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5034		reg = E1000_READ_REG(hw, E1000_RCTL);
5035		reg &= ~E1000_RCTL_CFIEN;
5036		reg |= E1000_RCTL_VFE;
5037		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5038	}
5039
5040	/* Update the frame size */
5041	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5042	    adapter->max_frame_size + VLAN_TAG_SIZE);
5043
5044	/* Don't bother with table if no vlans */
5045	if ((adapter->num_vlans == 0) ||
5046	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5047                return;
5048	/*
5049	** A soft reset zero's out the VFTA, so
5050	** we need to repopulate it now.
5051	*/
5052	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5053                if (adapter->shadow_vfta[i] != 0) {
5054			if (adapter->vf_ifp)
5055				e1000_vfta_set_vf(hw,
5056				    adapter->shadow_vfta[i], TRUE);
5057			else
5058				e1000_write_vfta(hw,
5059				    i, adapter->shadow_vfta[i]);
5060		}
5061}
5062
5063static void
5064igb_enable_intr(struct adapter *adapter)
5065{
5066	/* With RSS set up what to auto clear */
5067	if (adapter->msix_mem) {
5068		u32 mask = (adapter->que_mask | adapter->link_mask);
5069		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5070		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5071		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5072		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5073		    E1000_IMS_LSC);
5074	} else {
5075		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5076		    IMS_ENABLE_MASK);
5077	}
5078	E1000_WRITE_FLUSH(&adapter->hw);
5079
5080	return;
5081}
5082
5083static void
5084igb_disable_intr(struct adapter *adapter)
5085{
5086	if (adapter->msix_mem) {
5087		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5088		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5089	}
5090	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5091	E1000_WRITE_FLUSH(&adapter->hw);
5092	return;
5093}
5094
5095/*
5096 * Bit of a misnomer, what this really means is
5097 * to enable OS management of the system... aka
5098 * to disable special hardware management features
5099 */
5100static void
5101igb_init_manageability(struct adapter *adapter)
5102{
5103	if (adapter->has_manage) {
5104		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5105		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5106
5107		/* disable hardware interception of ARP */
5108		manc &= ~(E1000_MANC_ARP_EN);
5109
5110                /* enable receiving management packets to the host */
5111		manc |= E1000_MANC_EN_MNG2HOST;
5112		manc2h |= 1 << 5;  /* Mng Port 623 */
5113		manc2h |= 1 << 6;  /* Mng Port 664 */
5114		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5115		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5116	}
5117}
5118
5119/*
5120 * Give control back to hardware management
5121 * controller if there is one.
5122 */
5123static void
5124igb_release_manageability(struct adapter *adapter)
5125{
5126	if (adapter->has_manage) {
5127		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5128
5129		/* re-enable hardware interception of ARP */
5130		manc |= E1000_MANC_ARP_EN;
5131		manc &= ~E1000_MANC_EN_MNG2HOST;
5132
5133		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5134	}
5135}
5136
5137/*
5138 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5139 * For ASF and Pass Through versions of f/w this means that
5140 * the driver is loaded.
5141 *
5142 */
5143static void
5144igb_get_hw_control(struct adapter *adapter)
5145{
5146	u32 ctrl_ext;
5147
5148	if (adapter->vf_ifp)
5149		return;
5150
5151	/* Let firmware know the driver has taken over */
5152	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5153	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5154	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5155}
5156
5157/*
5158 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5159 * For ASF and Pass Through versions of f/w this means that the
5160 * driver is no longer loaded.
5161 *
5162 */
5163static void
5164igb_release_hw_control(struct adapter *adapter)
5165{
5166	u32 ctrl_ext;
5167
5168	if (adapter->vf_ifp)
5169		return;
5170
5171	/* Let firmware taken over control of h/w */
5172	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5173	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5174	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5175}
5176
5177static int
5178igb_is_valid_ether_addr(uint8_t *addr)
5179{
5180	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5181
5182	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5183		return (FALSE);
5184	}
5185
5186	return (TRUE);
5187}
5188
5189
5190/*
5191 * Enable PCI Wake On Lan capability
5192 */
5193static void
5194igb_enable_wakeup(device_t dev)
5195{
5196	u16     cap, status;
5197	u8      id;
5198
5199	/* First find the capabilities pointer*/
5200	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5201	/* Read the PM Capabilities */
5202	id = pci_read_config(dev, cap, 1);
5203	if (id != PCIY_PMG)     /* Something wrong */
5204		return;
5205	/* OK, we have the power capabilities, so
5206	   now get the status register */
5207	cap += PCIR_POWER_STATUS;
5208	status = pci_read_config(dev, cap, 2);
5209	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5210	pci_write_config(dev, cap, status, 2);
5211	return;
5212}
5213
5214static void
5215igb_led_func(void *arg, int onoff)
5216{
5217	struct adapter	*adapter = arg;
5218
5219	IGB_CORE_LOCK(adapter);
5220	if (onoff) {
5221		e1000_setup_led(&adapter->hw);
5222		e1000_led_on(&adapter->hw);
5223	} else {
5224		e1000_led_off(&adapter->hw);
5225		e1000_cleanup_led(&adapter->hw);
5226	}
5227	IGB_CORE_UNLOCK(adapter);
5228}
5229
5230/**********************************************************************
5231 *
5232 *  Update the board statistics counters.
5233 *
5234 **********************************************************************/
5235static void
5236igb_update_stats_counters(struct adapter *adapter)
5237{
5238	struct ifnet		*ifp;
5239        struct e1000_hw		*hw = &adapter->hw;
5240	struct e1000_hw_stats	*stats;
5241
5242	/*
5243	** The virtual function adapter has only a
5244	** small controlled set of stats, do only
5245	** those and return.
5246	*/
5247	if (adapter->vf_ifp) {
5248		igb_update_vf_stats_counters(adapter);
5249		return;
5250	}
5251
5252	stats = (struct e1000_hw_stats	*)adapter->stats;
5253
5254	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5255	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5256		stats->symerrs +=
5257		    E1000_READ_REG(hw,E1000_SYMERRS);
5258		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5259	}
5260
5261	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5262	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5263	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5264	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5265
5266	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5267	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5268	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5269	stats->dc += E1000_READ_REG(hw, E1000_DC);
5270	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5271	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5272	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5273	/*
5274	** For watchdog management we need to know if we have been
5275	** paused during the last interval, so capture that here.
5276	*/
5277        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5278        stats->xoffrxc += adapter->pause_frames;
5279	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5280	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5281	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5282	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5283	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5284	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5285	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5286	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5287	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5288	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5289	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5290	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5291
5292	/* For the 64-bit byte counters the low dword must be read first. */
5293	/* Both registers clear on the read of the high dword */
5294
5295	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5296	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5297	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5298	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5299
5300	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5301	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5302	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5303	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5304	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5305
5306	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5307	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5308
5309	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5310	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5311	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5312	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5313	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5314	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5315	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5316	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5317	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5318	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5319
5320	/* Interrupt Counts */
5321
5322	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5323	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5324	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5325	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5326	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5327	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5328	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5329	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5330	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5331
5332	/* Host to Card Statistics */
5333
5334	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5335	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5336	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5337	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5338	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5339	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5340	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5341	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5342	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5343	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5344	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5345	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5346	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5347	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5348
5349	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5350	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5351	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5352	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5353	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5354	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5355
5356	ifp = adapter->ifp;
5357	ifp->if_collisions = stats->colc;
5358
5359	/* Rx Errors */
5360	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5361	    stats->crcerrs + stats->algnerrc +
5362	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5363
5364	/* Tx Errors */
5365	ifp->if_oerrors = stats->ecol +
5366	    stats->latecol + adapter->watchdog_events;
5367
5368	/* Driver specific counters */
5369	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5370	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5371	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5372	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5373	adapter->packet_buf_alloc_tx =
5374	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5375	adapter->packet_buf_alloc_rx =
5376	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5377}
5378
5379
5380/**********************************************************************
5381 *
5382 *  Initialize the VF board statistics counters.
5383 *
5384 **********************************************************************/
5385static void
5386igb_vf_init_stats(struct adapter *adapter)
5387{
5388        struct e1000_hw *hw = &adapter->hw;
5389	struct e1000_vf_stats	*stats;
5390
5391	stats = (struct e1000_vf_stats	*)adapter->stats;
5392	if (stats == NULL)
5393		return;
5394        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5395        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5396        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5397        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5398        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5399}
5400
5401/**********************************************************************
5402 *
5403 *  Update the VF board statistics counters.
5404 *
5405 **********************************************************************/
5406static void
5407igb_update_vf_stats_counters(struct adapter *adapter)
5408{
5409	struct e1000_hw *hw = &adapter->hw;
5410	struct e1000_vf_stats	*stats;
5411
5412	if (adapter->link_speed == 0)
5413		return;
5414
5415	stats = (struct e1000_vf_stats	*)adapter->stats;
5416
5417	UPDATE_VF_REG(E1000_VFGPRC,
5418	    stats->last_gprc, stats->gprc);
5419	UPDATE_VF_REG(E1000_VFGORC,
5420	    stats->last_gorc, stats->gorc);
5421	UPDATE_VF_REG(E1000_VFGPTC,
5422	    stats->last_gptc, stats->gptc);
5423	UPDATE_VF_REG(E1000_VFGOTC,
5424	    stats->last_gotc, stats->gotc);
5425	UPDATE_VF_REG(E1000_VFMPRC,
5426	    stats->last_mprc, stats->mprc);
5427}
5428
5429/* Export a single 32-bit register via a read-only sysctl. */
5430static int
5431igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5432{
5433	struct adapter *adapter;
5434	u_int val;
5435
5436	adapter = oidp->oid_arg1;
5437	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5438	return (sysctl_handle_int(oidp, &val, 0, req));
5439}
5440
5441/*
5442**  Tuneable interrupt rate handler
5443*/
5444static int
5445igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5446{
5447	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5448	int			error;
5449	u32			reg, usec, rate;
5450
5451	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5452	usec = ((reg & 0x7FFC) >> 2);
5453	if (usec > 0)
5454		rate = 1000000 / usec;
5455	else
5456		rate = 0;
5457	error = sysctl_handle_int(oidp, &rate, 0, req);
5458	if (error || !req->newptr)
5459		return error;
5460	return 0;
5461}
5462
5463/*
5464 * Add sysctl variables, one per statistic, to the system.
5465 */
5466static void
5467igb_add_hw_stats(struct adapter *adapter)
5468{
5469	device_t dev = adapter->dev;
5470
5471	struct tx_ring *txr = adapter->tx_rings;
5472	struct rx_ring *rxr = adapter->rx_rings;
5473
5474	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5475	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5476	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5477	struct e1000_hw_stats *stats = adapter->stats;
5478
5479	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5480	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5481
5482#define QUEUE_NAME_LEN 32
5483	char namebuf[QUEUE_NAME_LEN];
5484
5485	/* Driver Statistics */
5486	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5487			CTLFLAG_RD, &adapter->link_irq, 0,
5488			"Link MSIX IRQ Handled");
5489	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5490			CTLFLAG_RD, &adapter->dropped_pkts,
5491			"Driver dropped packets");
5492	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5493			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5494			"Driver tx dma failure in xmit");
5495	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5496			CTLFLAG_RD, &adapter->rx_overruns,
5497			"RX overruns");
5498	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5499			CTLFLAG_RD, &adapter->watchdog_events,
5500			"Watchdog timeouts");
5501
5502	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5503			CTLFLAG_RD, &adapter->device_control,
5504			"Device Control Register");
5505	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5506			CTLFLAG_RD, &adapter->rx_control,
5507			"Receiver Control Register");
5508	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5509			CTLFLAG_RD, &adapter->int_mask,
5510			"Interrupt Mask");
5511	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5512			CTLFLAG_RD, &adapter->eint_mask,
5513			"Extended Interrupt Mask");
5514	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5515			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5516			"Transmit Buffer Packet Allocation");
5517	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5518			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5519			"Receive Buffer Packet Allocation");
5520	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5521			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5522			"Flow Control High Watermark");
5523	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5524			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5525			"Flow Control Low Watermark");
5526
5527	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5528		struct lro_ctrl *lro = &rxr->lro;
5529
5530		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5531		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5532					    CTLFLAG_RD, NULL, "Queue Name");
5533		queue_list = SYSCTL_CHILDREN(queue_node);
5534
5535		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5536				CTLFLAG_RD, &adapter->queues[i],
5537				sizeof(&adapter->queues[i]),
5538				igb_sysctl_interrupt_rate_handler,
5539				"IU", "Interrupt Rate");
5540
5541		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5542				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5543				igb_sysctl_reg_handler, "IU",
5544 				"Transmit Descriptor Head");
5545		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5546				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5547				igb_sysctl_reg_handler, "IU",
5548 				"Transmit Descriptor Tail");
5549		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5550				CTLFLAG_RD, &txr->no_desc_avail,
5551				"Queue No Descriptor Available");
5552		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5553				CTLFLAG_RD, &txr->tx_packets,
5554				"Queue Packets Transmitted");
5555
5556		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5557				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5558				igb_sysctl_reg_handler, "IU",
5559				"Receive Descriptor Head");
5560		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5561				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5562				igb_sysctl_reg_handler, "IU",
5563				"Receive Descriptor Tail");
5564		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5565				CTLFLAG_RD, &rxr->rx_packets,
5566				"Queue Packets Received");
5567		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5568				CTLFLAG_RD, &rxr->rx_bytes,
5569				"Queue Bytes Received");
5570		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5571				CTLFLAG_RD, &lro->lro_queued, 0,
5572				"LRO Queued");
5573		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5574				CTLFLAG_RD, &lro->lro_flushed, 0,
5575				"LRO Flushed");
5576	}
5577
5578	/* MAC stats get their own sub node */
5579
5580	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5581				    CTLFLAG_RD, NULL, "MAC Statistics");
5582	stat_list = SYSCTL_CHILDREN(stat_node);
5583
5584	/*
5585	** VF adapter has a very limited set of stats
5586	** since its not managing the metal, so to speak.
5587	*/
5588	if (adapter->vf_ifp) {
5589	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5590			CTLFLAG_RD, &stats->gprc,
5591			"Good Packets Received");
5592	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5593			CTLFLAG_RD, &stats->gptc,
5594			"Good Packets Transmitted");
5595 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5596 			CTLFLAG_RD, &stats->gorc,
5597 			"Good Octets Received");
5598 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5599 			CTLFLAG_RD, &stats->gotc,
5600 			"Good Octets Transmitted");
5601	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5602			CTLFLAG_RD, &stats->mprc,
5603			"Multicast Packets Received");
5604		return;
5605	}
5606
5607	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5608			CTLFLAG_RD, &stats->ecol,
5609			"Excessive collisions");
5610	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5611			CTLFLAG_RD, &stats->scc,
5612			"Single collisions");
5613	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5614			CTLFLAG_RD, &stats->mcc,
5615			"Multiple collisions");
5616	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5617			CTLFLAG_RD, &stats->latecol,
5618			"Late collisions");
5619	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5620			CTLFLAG_RD, &stats->colc,
5621			"Collision Count");
5622	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5623			CTLFLAG_RD, &stats->symerrs,
5624			"Symbol Errors");
5625	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5626			CTLFLAG_RD, &stats->sec,
5627			"Sequence Errors");
5628	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5629			CTLFLAG_RD, &stats->dc,
5630			"Defer Count");
5631	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5632			CTLFLAG_RD, &stats->mpc,
5633			"Missed Packets");
5634	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5635			CTLFLAG_RD, &stats->rnbc,
5636			"Receive No Buffers");
5637	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5638			CTLFLAG_RD, &stats->ruc,
5639			"Receive Undersize");
5640	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5641			CTLFLAG_RD, &stats->rfc,
5642			"Fragmented Packets Received ");
5643	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5644			CTLFLAG_RD, &stats->roc,
5645			"Oversized Packets Received");
5646	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5647			CTLFLAG_RD, &stats->rjc,
5648			"Recevied Jabber");
5649	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5650			CTLFLAG_RD, &stats->rxerrc,
5651			"Receive Errors");
5652	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5653			CTLFLAG_RD, &stats->crcerrs,
5654			"CRC errors");
5655	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5656			CTLFLAG_RD, &stats->algnerrc,
5657			"Alignment Errors");
5658	/* On 82575 these are collision counts */
5659	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5660			CTLFLAG_RD, &stats->cexterr,
5661			"Collision/Carrier extension errors");
5662	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5663			CTLFLAG_RD, &stats->xonrxc,
5664			"XON Received");
5665	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5666			CTLFLAG_RD, &stats->xontxc,
5667			"XON Transmitted");
5668	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5669			CTLFLAG_RD, &stats->xoffrxc,
5670			"XOFF Received");
5671	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5672			CTLFLAG_RD, &stats->xofftxc,
5673			"XOFF Transmitted");
5674	/* Packet Reception Stats */
5675	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5676			CTLFLAG_RD, &stats->tpr,
5677			"Total Packets Received ");
5678	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5679			CTLFLAG_RD, &stats->gprc,
5680			"Good Packets Received");
5681	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5682			CTLFLAG_RD, &stats->bprc,
5683			"Broadcast Packets Received");
5684	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5685			CTLFLAG_RD, &stats->mprc,
5686			"Multicast Packets Received");
5687	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5688			CTLFLAG_RD, &stats->prc64,
5689			"64 byte frames received ");
5690	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5691			CTLFLAG_RD, &stats->prc127,
5692			"65-127 byte frames received");
5693	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5694			CTLFLAG_RD, &stats->prc255,
5695			"128-255 byte frames received");
5696	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5697			CTLFLAG_RD, &stats->prc511,
5698			"256-511 byte frames received");
5699	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5700			CTLFLAG_RD, &stats->prc1023,
5701			"512-1023 byte frames received");
5702	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5703			CTLFLAG_RD, &stats->prc1522,
5704			"1023-1522 byte frames received");
5705 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5706 			CTLFLAG_RD, &stats->gorc,
5707 			"Good Octets Received");
5708
5709	/* Packet Transmission Stats */
5710 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5711 			CTLFLAG_RD, &stats->gotc,
5712 			"Good Octets Transmitted");
5713	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5714			CTLFLAG_RD, &stats->tpt,
5715			"Total Packets Transmitted");
5716	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5717			CTLFLAG_RD, &stats->gptc,
5718			"Good Packets Transmitted");
5719	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5720			CTLFLAG_RD, &stats->bptc,
5721			"Broadcast Packets Transmitted");
5722	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5723			CTLFLAG_RD, &stats->mptc,
5724			"Multicast Packets Transmitted");
5725	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5726			CTLFLAG_RD, &stats->ptc64,
5727			"64 byte frames transmitted ");
5728	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5729			CTLFLAG_RD, &stats->ptc127,
5730			"65-127 byte frames transmitted");
5731	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5732			CTLFLAG_RD, &stats->ptc255,
5733			"128-255 byte frames transmitted");
5734	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5735			CTLFLAG_RD, &stats->ptc511,
5736			"256-511 byte frames transmitted");
5737	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5738			CTLFLAG_RD, &stats->ptc1023,
5739			"512-1023 byte frames transmitted");
5740	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5741			CTLFLAG_RD, &stats->ptc1522,
5742			"1024-1522 byte frames transmitted");
5743	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5744			CTLFLAG_RD, &stats->tsctc,
5745			"TSO Contexts Transmitted");
5746	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5747			CTLFLAG_RD, &stats->tsctfc,
5748			"TSO Contexts Failed");
5749
5750
5751	/* Interrupt Stats */
5752
5753	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5754				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5755	int_list = SYSCTL_CHILDREN(int_node);
5756
5757	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5758			CTLFLAG_RD, &stats->iac,
5759			"Interrupt Assertion Count");
5760
5761	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5762			CTLFLAG_RD, &stats->icrxptc,
5763			"Interrupt Cause Rx Pkt Timer Expire Count");
5764
5765	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5766			CTLFLAG_RD, &stats->icrxatc,
5767			"Interrupt Cause Rx Abs Timer Expire Count");
5768
5769	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5770			CTLFLAG_RD, &stats->ictxptc,
5771			"Interrupt Cause Tx Pkt Timer Expire Count");
5772
5773	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5774			CTLFLAG_RD, &stats->ictxatc,
5775			"Interrupt Cause Tx Abs Timer Expire Count");
5776
5777	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5778			CTLFLAG_RD, &stats->ictxqec,
5779			"Interrupt Cause Tx Queue Empty Count");
5780
5781	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5782			CTLFLAG_RD, &stats->ictxqmtc,
5783			"Interrupt Cause Tx Queue Min Thresh Count");
5784
5785	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5786			CTLFLAG_RD, &stats->icrxdmtc,
5787			"Interrupt Cause Rx Desc Min Thresh Count");
5788
5789	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5790			CTLFLAG_RD, &stats->icrxoc,
5791			"Interrupt Cause Receiver Overrun Count");
5792
5793	/* Host to Card Stats */
5794
5795	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5796				    CTLFLAG_RD, NULL,
5797				    "Host to Card Statistics");
5798
5799	host_list = SYSCTL_CHILDREN(host_node);
5800
5801	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5802			CTLFLAG_RD, &stats->cbtmpc,
5803			"Circuit Breaker Tx Packet Count");
5804
5805	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5806			CTLFLAG_RD, &stats->htdpmc,
5807			"Host Transmit Discarded Packets");
5808
5809	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5810			CTLFLAG_RD, &stats->rpthc,
5811			"Rx Packets To Host");
5812
5813	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5814			CTLFLAG_RD, &stats->cbrmpc,
5815			"Circuit Breaker Rx Packet Count");
5816
5817	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5818			CTLFLAG_RD, &stats->cbrdpc,
5819			"Circuit Breaker Rx Dropped Count");
5820
5821	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5822			CTLFLAG_RD, &stats->hgptc,
5823			"Host Good Packets Tx Count");
5824
5825	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5826			CTLFLAG_RD, &stats->htcbdpc,
5827			"Host Tx Circuit Breaker Dropped Count");
5828
5829	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5830			CTLFLAG_RD, &stats->hgorc,
5831			"Host Good Octets Received Count");
5832
5833	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5834			CTLFLAG_RD, &stats->hgotc,
5835			"Host Good Octets Transmit Count");
5836
5837	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5838			CTLFLAG_RD, &stats->lenerrs,
5839			"Length Errors");
5840
5841	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5842			CTLFLAG_RD, &stats->scvpc,
5843			"SerDes/SGMII Code Violation Pkt Count");
5844
5845	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5846			CTLFLAG_RD, &stats->hrmpc,
5847			"Header Redirection Missed Packet Count");
5848}
5849
5850
5851/**********************************************************************
5852 *
5853 *  This routine provides a way to dump out the adapter eeprom,
5854 *  often a useful debug/service tool. This only dumps the first
5855 *  32 words, stuff that matters is in that extent.
5856 *
5857 **********************************************************************/
5858static int
5859igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5860{
5861	struct adapter *adapter;
5862	int error;
5863	int result;
5864
5865	result = -1;
5866	error = sysctl_handle_int(oidp, &result, 0, req);
5867
5868	if (error || !req->newptr)
5869		return (error);
5870
5871	/*
5872	 * This value will cause a hex dump of the
5873	 * first 32 16-bit words of the EEPROM to
5874	 * the screen.
5875	 */
5876	if (result == 1) {
5877		adapter = (struct adapter *)arg1;
5878		igb_print_nvm_info(adapter);
5879        }
5880
5881	return (error);
5882}
5883
5884static void
5885igb_print_nvm_info(struct adapter *adapter)
5886{
5887	u16	eeprom_data;
5888	int	i, j, row = 0;
5889
5890	/* Its a bit crude, but it gets the job done */
5891	printf("\nInterface EEPROM Dump:\n");
5892	printf("Offset\n0x0000  ");
5893	for (i = 0, j = 0; i < 32; i++, j++) {
5894		if (j == 8) { /* Make the offset block */
5895			j = 0; ++row;
5896			printf("\n0x00%x0  ",row);
5897		}
5898		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5899		printf("%04x ", eeprom_data);
5900	}
5901	printf("\n");
5902}
5903
5904static void
5905igb_set_sysctl_value(struct adapter *adapter, const char *name,
5906	const char *description, int *limit, int value)
5907{
5908	*limit = value;
5909	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5910	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5911	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5912}
5913
5914/*
5915** Set flow control using sysctl:
5916** Flow control values:
5917** 	0 - off
5918**	1 - rx pause
5919**	2 - tx pause
5920**	3 - full
5921*/
5922static int
5923igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5924{
5925	int		error;
5926	static int	input = 3; /* default is full */
5927	struct adapter	*adapter = (struct adapter *) arg1;
5928
5929	error = sysctl_handle_int(oidp, &input, 0, req);
5930
5931	if ((error) || (req->newptr == NULL))
5932		return (error);
5933
5934	switch (input) {
5935		case e1000_fc_rx_pause:
5936		case e1000_fc_tx_pause:
5937		case e1000_fc_full:
5938		case e1000_fc_none:
5939			adapter->hw.fc.requested_mode = input;
5940			adapter->fc = input;
5941			break;
5942		default:
5943			/* Do nothing */
5944			return (error);
5945	}
5946
5947	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5948	e1000_force_mac_fc(&adapter->hw);
5949	return (error);
5950}
5951
5952/*
5953** Manage DMA Coalesce:
5954** Control values:
5955** 	0/1 - off/on
5956**	Legal timer values are:
5957**	250,500,1000-10000 in thousands
5958*/
5959static int
5960igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5961{
5962	struct adapter *adapter = (struct adapter *) arg1;
5963	int		error;
5964
5965	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5966
5967	if ((error) || (req->newptr == NULL))
5968		return (error);
5969
5970	switch (adapter->dmac) {
5971		case 0:
5972			/*Disabling */
5973			break;
5974		case 1: /* Just enable and use default */
5975			adapter->dmac = 1000;
5976			break;
5977		case 250:
5978		case 500:
5979		case 1000:
5980		case 2000:
5981		case 3000:
5982		case 4000:
5983		case 5000:
5984		case 6000:
5985		case 7000:
5986		case 8000:
5987		case 9000:
5988		case 10000:
5989			/* Legal values - allow */
5990			break;
5991		default:
5992			/* Do nothing, illegal value */
5993			adapter->dmac = 0;
5994			return (error);
5995	}
5996	/* Reinit the interface */
5997	igb_init(adapter);
5998	return (error);
5999}
6000
6001/*
6002** Manage Energy Efficient Ethernet:
6003** Control values:
6004**     0/1 - enabled/disabled
6005*/
6006static int
6007igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6008{
6009	struct adapter	*adapter = (struct adapter *) arg1;
6010	int		error, value;
6011
6012	value = adapter->hw.dev_spec._82575.eee_disable;
6013	error = sysctl_handle_int(oidp, &value, 0, req);
6014	if (error || req->newptr == NULL)
6015		return (error);
6016	IGB_CORE_LOCK(adapter);
6017	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6018	igb_init_locked(adapter);
6019	IGB_CORE_UNLOCK(adapter);
6020	return (0);
6021}
6022