if_igb.c revision 246482
1/******************************************************************************
2
3  Copyright (c) 2001-2012, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 246482 2013-02-07 15:20:54Z rrs $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#if __FreeBSD_version >= 800000
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
185static void	igb_qflush(struct ifnet *);
186static void	igb_deferred_mq_start(void *, int);
187#else
188static void	igb_start(struct ifnet *);
189static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
190#endif
191static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
192static void	igb_init(void *);
193static void	igb_init_locked(struct adapter *);
194static void	igb_stop(void *);
195static void	igb_media_status(struct ifnet *, struct ifmediareq *);
196static int	igb_media_change(struct ifnet *);
197static void	igb_identify_hardware(struct adapter *);
198static int	igb_allocate_pci_resources(struct adapter *);
199static int	igb_allocate_msix(struct adapter *);
200static int	igb_allocate_legacy(struct adapter *);
201static int	igb_setup_msix(struct adapter *);
202static void	igb_free_pci_resources(struct adapter *);
203static void	igb_local_timer(void *);
204static void	igb_reset(struct adapter *);
205static int	igb_setup_interface(device_t, struct adapter *);
206static int	igb_allocate_queues(struct adapter *);
207static void	igb_configure_queues(struct adapter *);
208
209static int	igb_allocate_transmit_buffers(struct tx_ring *);
210static void	igb_setup_transmit_structures(struct adapter *);
211static void	igb_setup_transmit_ring(struct tx_ring *);
212static void	igb_initialize_transmit_units(struct adapter *);
213static void	igb_free_transmit_structures(struct adapter *);
214static void	igb_free_transmit_buffers(struct tx_ring *);
215
216static int	igb_allocate_receive_buffers(struct rx_ring *);
217static int	igb_setup_receive_structures(struct adapter *);
218static int	igb_setup_receive_ring(struct rx_ring *);
219static void	igb_initialize_receive_units(struct adapter *);
220static void	igb_free_receive_structures(struct adapter *);
221static void	igb_free_receive_buffers(struct rx_ring *);
222static void	igb_free_receive_ring(struct rx_ring *);
223
224static void	igb_enable_intr(struct adapter *);
225static void	igb_disable_intr(struct adapter *);
226static void	igb_update_stats_counters(struct adapter *);
227static bool	igb_txeof(struct tx_ring *);
228
229static __inline	void igb_rx_discard(struct rx_ring *, int);
230static __inline void igb_rx_input(struct rx_ring *,
231		    struct ifnet *, struct mbuf *, u32);
232
233static bool	igb_rxeof(struct igb_queue *, int, int *);
234static void	igb_rx_checksum(u32, struct mbuf *, u32);
235static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
236static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
237		    struct ip *, struct tcphdr *);
238static void	igb_set_promisc(struct adapter *);
239static void	igb_disable_promisc(struct adapter *);
240static void	igb_set_multi(struct adapter *);
241static void	igb_update_link_status(struct adapter *);
242static void	igb_refresh_mbufs(struct rx_ring *, int);
243
244static void	igb_register_vlan(void *, struct ifnet *, u16);
245static void	igb_unregister_vlan(void *, struct ifnet *, u16);
246static void	igb_setup_vlan_hw_support(struct adapter *);
247
248static int	igb_xmit(struct tx_ring *, struct mbuf **);
249static int	igb_dma_malloc(struct adapter *, bus_size_t,
250		    struct igb_dma_alloc *, int);
251static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
252static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
253static void	igb_print_nvm_info(struct adapter *);
254static int 	igb_is_valid_ether_addr(u8 *);
255static void     igb_add_hw_stats(struct adapter *);
256
257static void	igb_vf_init_stats(struct adapter *);
258static void	igb_update_vf_stats_counters(struct adapter *);
259
260/* Management and WOL Support */
261static void	igb_init_manageability(struct adapter *);
262static void	igb_release_manageability(struct adapter *);
263static void     igb_get_hw_control(struct adapter *);
264static void     igb_release_hw_control(struct adapter *);
265static void     igb_enable_wakeup(device_t);
266static void     igb_led_func(void *, int);
267
268static int	igb_irq_fast(void *);
269static void	igb_msix_que(void *);
270static void	igb_msix_link(void *);
271static void	igb_handle_que(void *context, int pending);
272static void	igb_handle_link(void *context, int pending);
273static void	igb_handle_link_locked(struct adapter *);
274
275static void	igb_set_sysctl_value(struct adapter *, const char *,
276		    const char *, int *, int);
277static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
278static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
280
281#ifdef DEVICE_POLLING
282static poll_handler_t igb_poll;
283#endif /* POLLING */
284
285/*********************************************************************
286 *  FreeBSD Device Interface Entry Points
287 *********************************************************************/
288
289static device_method_t igb_methods[] = {
290	/* Device interface */
291	DEVMETHOD(device_probe, igb_probe),
292	DEVMETHOD(device_attach, igb_attach),
293	DEVMETHOD(device_detach, igb_detach),
294	DEVMETHOD(device_shutdown, igb_shutdown),
295	DEVMETHOD(device_suspend, igb_suspend),
296	DEVMETHOD(device_resume, igb_resume),
297	DEVMETHOD_END
298};
299
300static driver_t igb_driver = {
301	"igb", igb_methods, sizeof(struct adapter),
302};
303
304static devclass_t igb_devclass;
305DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
306MODULE_DEPEND(igb, pci, 1, 1, 1);
307MODULE_DEPEND(igb, ether, 1, 1, 1);
308
309/*********************************************************************
310 *  Tunable default values.
311 *********************************************************************/
312
313static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
314
315/* Descriptor defaults */
316static int igb_rxd = IGB_DEFAULT_RXD;
317static int igb_txd = IGB_DEFAULT_TXD;
318TUNABLE_INT("hw.igb.rxd", &igb_rxd);
319TUNABLE_INT("hw.igb.txd", &igb_txd);
320SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
321    "Number of receive descriptors per queue");
322SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
323    "Number of transmit descriptors per queue");
324
325/*
326** AIM: Adaptive Interrupt Moderation
327** which means that the interrupt rate
328** is varied over time based on the
329** traffic for that interrupt vector
330*/
331static int igb_enable_aim = TRUE;
332TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
333SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
334    "Enable adaptive interrupt moderation");
335
336/*
337 * MSIX should be the default for best performance,
338 * but this allows it to be forced off for testing.
339 */
340static int igb_enable_msix = 1;
341TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
342SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
343    "Enable MSI-X interrupts");
344
345/*
346** Tuneable Interrupt rate
347*/
348static int igb_max_interrupt_rate = 8000;
349TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
350SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
351    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
352
353#if __FreeBSD_version >= 800000
354/*
355** Tuneable number of buffers in the buf-ring (drbr_xxx)
356*/
357static int igb_buf_ring_size = IGB_BR_SIZE;
358TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
359SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
360    &igb_buf_ring_size, 0, "Size of the bufring");
361#endif
362
363/*
364** Header split causes the packet header to
365** be dma'd to a seperate mbuf from the payload.
366** this can have memory alignment benefits. But
367** another plus is that small packets often fit
368** into the header and thus use no cluster. Its
369** a very workload dependent type feature.
370*/
371static int igb_header_split = FALSE;
372TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
373SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
374    "Enable receive mbuf header split");
375
376/*
377** This will autoconfigure based on
378** the number of CPUs if left at 0.
379*/
380static int igb_num_queues = 0;
381TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
382SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
383    "Number of queues to configure, 0 indicates autoconfigure");
384
385/*
386** Global variable to store last used CPU when binding queues
387** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
388** queue is bound to a cpu.
389*/
390static int igb_last_bind_cpu = -1;
391
392/* How many packets rxeof tries to clean at a time */
393static int igb_rx_process_limit = 100;
394TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
395SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
396    &igb_rx_process_limit, 0,
397    "Maximum number of received packets to process at a time, -1 means unlimited");
398
399#ifdef DEV_NETMAP	/* see ixgbe.c for details */
400#include <dev/netmap/if_igb_netmap.h>
401#endif /* DEV_NETMAP */
402/*********************************************************************
403 *  Device identification routine
404 *
405 *  igb_probe determines if the driver should be loaded on
406 *  adapter based on PCI vendor/device id of the adapter.
407 *
408 *  return BUS_PROBE_DEFAULT on success, positive on failure
409 *********************************************************************/
410
411static int
412igb_probe(device_t dev)
413{
414	char		adapter_name[60];
415	uint16_t	pci_vendor_id = 0;
416	uint16_t	pci_device_id = 0;
417	uint16_t	pci_subvendor_id = 0;
418	uint16_t	pci_subdevice_id = 0;
419	igb_vendor_info_t *ent;
420
421	INIT_DEBUGOUT("igb_probe: begin");
422
423	pci_vendor_id = pci_get_vendor(dev);
424	if (pci_vendor_id != IGB_VENDOR_ID)
425		return (ENXIO);
426
427	pci_device_id = pci_get_device(dev);
428	pci_subvendor_id = pci_get_subvendor(dev);
429	pci_subdevice_id = pci_get_subdevice(dev);
430
431	ent = igb_vendor_info_array;
432	while (ent->vendor_id != 0) {
433		if ((pci_vendor_id == ent->vendor_id) &&
434		    (pci_device_id == ent->device_id) &&
435
436		    ((pci_subvendor_id == ent->subvendor_id) ||
437		    (ent->subvendor_id == PCI_ANY_ID)) &&
438
439		    ((pci_subdevice_id == ent->subdevice_id) ||
440		    (ent->subdevice_id == PCI_ANY_ID))) {
441			sprintf(adapter_name, "%s %s",
442				igb_strings[ent->index],
443				igb_driver_version);
444			device_set_desc_copy(dev, adapter_name);
445			return (BUS_PROBE_DEFAULT);
446		}
447		ent++;
448	}
449
450	return (ENXIO);
451}
452
453/*********************************************************************
454 *  Device initialization routine
455 *
456 *  The attach entry point is called when the driver is being loaded.
457 *  This routine identifies the type of hardware, allocates all resources
458 *  and initializes the hardware.
459 *
460 *  return 0 on success, positive on failure
461 *********************************************************************/
462
463static int
464igb_attach(device_t dev)
465{
466	struct adapter	*adapter;
467	int		error = 0;
468	u16		eeprom_data;
469
470	INIT_DEBUGOUT("igb_attach: begin");
471
472	if (resource_disabled("igb", device_get_unit(dev))) {
473		device_printf(dev, "Disabled by device hint\n");
474		return (ENXIO);
475	}
476
477	adapter = device_get_softc(dev);
478	adapter->dev = adapter->osdep.dev = dev;
479	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
480
481	/* SYSCTL stuff */
482	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
485	    igb_sysctl_nvm_info, "I", "NVM Information");
486
487	igb_set_sysctl_value(adapter, "enable_aim",
488	    "Interrupt Moderation", &adapter->enable_aim,
489	    igb_enable_aim);
490
491	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
494	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
495
496	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
497
498	/* Determine hardware and mac info */
499	igb_identify_hardware(adapter);
500
501	/* Setup PCI resources */
502	if (igb_allocate_pci_resources(adapter)) {
503		device_printf(dev, "Allocation of PCI resources failed\n");
504		error = ENXIO;
505		goto err_pci;
506	}
507
508	/* Do Shared Code initialization */
509	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
510		device_printf(dev, "Setup of Shared code failed\n");
511		error = ENXIO;
512		goto err_pci;
513	}
514
515	e1000_get_bus_info(&adapter->hw);
516
517	/* Sysctl for limiting the amount of work done in the taskqueue */
518	igb_set_sysctl_value(adapter, "rx_processing_limit",
519	    "max number of rx packets to process",
520	    &adapter->rx_process_limit, igb_rx_process_limit);
521
522	/*
523	 * Validate number of transmit and receive descriptors. It
524	 * must not exceed hardware maximum, and must be multiple
525	 * of E1000_DBA_ALIGN.
526	 */
527	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
528	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
529		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
530		    IGB_DEFAULT_TXD, igb_txd);
531		adapter->num_tx_desc = IGB_DEFAULT_TXD;
532	} else
533		adapter->num_tx_desc = igb_txd;
534	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
535	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
536		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
537		    IGB_DEFAULT_RXD, igb_rxd);
538		adapter->num_rx_desc = IGB_DEFAULT_RXD;
539	} else
540		adapter->num_rx_desc = igb_rxd;
541
542	adapter->hw.mac.autoneg = DO_AUTO_NEG;
543	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
544	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
545
546	/* Copper options */
547	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
548		adapter->hw.phy.mdix = AUTO_ALL_MODES;
549		adapter->hw.phy.disable_polarity_correction = FALSE;
550		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
551	}
552
553	/*
554	 * Set the frame limits assuming
555	 * standard ethernet sized frames.
556	 */
557	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
558	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
559
560	/*
561	** Allocate and Setup Queues
562	*/
563	if (igb_allocate_queues(adapter)) {
564		error = ENOMEM;
565		goto err_pci;
566	}
567
568	/* Allocate the appropriate stats memory */
569	if (adapter->vf_ifp) {
570		adapter->stats =
571		    (struct e1000_vf_stats *)malloc(sizeof \
572		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
573		igb_vf_init_stats(adapter);
574	} else
575		adapter->stats =
576		    (struct e1000_hw_stats *)malloc(sizeof \
577		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
578	if (adapter->stats == NULL) {
579		device_printf(dev, "Can not allocate stats memory\n");
580		error = ENOMEM;
581		goto err_late;
582	}
583
584	/* Allocate multicast array memory. */
585	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587	if (adapter->mta == NULL) {
588		device_printf(dev, "Can not allocate multicast setup array\n");
589		error = ENOMEM;
590		goto err_late;
591	}
592
593	/* Some adapter-specific advanced features */
594	if (adapter->hw.mac.type >= e1000_i350) {
595		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
596		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
597		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
598		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
599		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
600		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
601		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
602		    adapter, 0, igb_sysctl_eee, "I",
603		    "Disable Energy Efficient Ethernet");
604		if (adapter->hw.phy.media_type == e1000_media_type_copper)
605			e1000_set_eee_i350(&adapter->hw);
606	}
607
608	/*
609	** Start from a known state, this is
610	** important in reading the nvm and
611	** mac from that.
612	*/
613	e1000_reset_hw(&adapter->hw);
614
615	/* Make sure we have a good EEPROM before we read from it */
616	if (((adapter->hw.mac.type != e1000_i210) &&
617	    (adapter->hw.mac.type != e1000_i211)) &&
618	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
619		/*
620		** Some PCI-E parts fail the first check due to
621		** the link being in sleep state, call it again,
622		** if it fails a second time its a real issue.
623		*/
624		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
625			device_printf(dev,
626			    "The EEPROM Checksum Is Not Valid\n");
627			error = EIO;
628			goto err_late;
629		}
630	}
631
632	/*
633	** Copy the permanent MAC address out of the EEPROM
634	*/
635	if (e1000_read_mac_addr(&adapter->hw) < 0) {
636		device_printf(dev, "EEPROM read error while reading MAC"
637		    " address\n");
638		error = EIO;
639		goto err_late;
640	}
641	/* Check its sanity */
642	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
643		device_printf(dev, "Invalid MAC address\n");
644		error = EIO;
645		goto err_late;
646	}
647
648	/* Setup OS specific network interface */
649	if (igb_setup_interface(dev, adapter) != 0)
650		goto err_late;
651
652	/* Now get a good starting state */
653	igb_reset(adapter);
654
655	/* Initialize statistics */
656	igb_update_stats_counters(adapter);
657
658	adapter->hw.mac.get_link_status = 1;
659	igb_update_link_status(adapter);
660
661	/* Indicate SOL/IDER usage */
662	if (e1000_check_reset_block(&adapter->hw))
663		device_printf(dev,
664		    "PHY reset is blocked due to SOL/IDER session.\n");
665
666	/* Determine if we have to control management hardware */
667	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
668
669	/*
670	 * Setup Wake-on-Lan
671	 */
672	/* APME bit in EEPROM is mapped to WUC.APME */
673	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
674	if (eeprom_data)
675		adapter->wol = E1000_WUFC_MAG;
676
677	/* Register for VLAN events */
678	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
679	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
680	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
681	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682
683	igb_add_hw_stats(adapter);
684
685	/* Tell the stack that the interface is not active */
686	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
687	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
688
689	adapter->led_dev = led_create(igb_led_func, adapter,
690	    device_get_nameunit(dev));
691
692	/*
693	** Configure Interrupts
694	*/
695	if ((adapter->msix > 1) && (igb_enable_msix))
696		error = igb_allocate_msix(adapter);
697	else /* MSI or Legacy */
698		error = igb_allocate_legacy(adapter);
699	if (error)
700		goto err_late;
701
702#ifdef DEV_NETMAP
703	igb_netmap_attach(adapter);
704#endif /* DEV_NETMAP */
705	INIT_DEBUGOUT("igb_attach: end");
706
707	return (0);
708
709err_late:
710	igb_detach(dev);
711	igb_free_transmit_structures(adapter);
712	igb_free_receive_structures(adapter);
713	igb_release_hw_control(adapter);
714err_pci:
715	igb_free_pci_resources(adapter);
716	if (adapter->ifp != NULL)
717		if_free(adapter->ifp);
718	free(adapter->mta, M_DEVBUF);
719	IGB_CORE_LOCK_DESTROY(adapter);
720
721	return (error);
722}
723
724/*********************************************************************
725 *  Device removal routine
726 *
727 *  The detach entry point is called when the driver is being removed.
728 *  This routine stops the adapter and deallocates all the resources
729 *  that were allocated for driver operation.
730 *
731 *  return 0 on success, positive on failure
732 *********************************************************************/
733
734static int
735igb_detach(device_t dev)
736{
737	struct adapter	*adapter = device_get_softc(dev);
738	struct ifnet	*ifp = adapter->ifp;
739
740	INIT_DEBUGOUT("igb_detach: begin");
741
742	/* Make sure VLANS are not using driver */
743	if (adapter->ifp->if_vlantrunk != NULL) {
744		device_printf(dev,"Vlan in use, detach first\n");
745		return (EBUSY);
746	}
747
748	ether_ifdetach(adapter->ifp);
749
750	if (adapter->led_dev != NULL)
751		led_destroy(adapter->led_dev);
752
753#ifdef DEVICE_POLLING
754	if (ifp->if_capenable & IFCAP_POLLING)
755		ether_poll_deregister(ifp);
756#endif
757
758	IGB_CORE_LOCK(adapter);
759	adapter->in_detach = 1;
760	igb_stop(adapter);
761	IGB_CORE_UNLOCK(adapter);
762
763	e1000_phy_hw_reset(&adapter->hw);
764
765	/* Give control back to firmware */
766	igb_release_manageability(adapter);
767	igb_release_hw_control(adapter);
768
769	if (adapter->wol) {
770		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
771		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
772		igb_enable_wakeup(dev);
773	}
774
775	/* Unregister VLAN events */
776	if (adapter->vlan_attach != NULL)
777		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
778	if (adapter->vlan_detach != NULL)
779		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
780
781	callout_drain(&adapter->timer);
782
783#ifdef DEV_NETMAP
784	netmap_detach(adapter->ifp);
785#endif /* DEV_NETMAP */
786	igb_free_pci_resources(adapter);
787	bus_generic_detach(dev);
788	if_free(ifp);
789
790	igb_free_transmit_structures(adapter);
791	igb_free_receive_structures(adapter);
792	if (adapter->mta != NULL)
793		free(adapter->mta, M_DEVBUF);
794
795	IGB_CORE_LOCK_DESTROY(adapter);
796
797	return (0);
798}
799
800/*********************************************************************
801 *
802 *  Shutdown entry point
803 *
804 **********************************************************************/
805
806static int
807igb_shutdown(device_t dev)
808{
809	return igb_suspend(dev);
810}
811
812/*
813 * Suspend/resume device methods.
814 */
815static int
816igb_suspend(device_t dev)
817{
818	struct adapter *adapter = device_get_softc(dev);
819
820	IGB_CORE_LOCK(adapter);
821
822	igb_stop(adapter);
823
824        igb_release_manageability(adapter);
825	igb_release_hw_control(adapter);
826
827        if (adapter->wol) {
828                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
829                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
830                igb_enable_wakeup(dev);
831        }
832
833	IGB_CORE_UNLOCK(adapter);
834
835	return bus_generic_suspend(dev);
836}
837
838static int
839igb_resume(device_t dev)
840{
841	struct adapter *adapter = device_get_softc(dev);
842	struct tx_ring	*txr = adapter->tx_rings;
843	struct ifnet *ifp = adapter->ifp;
844
845	IGB_CORE_LOCK(adapter);
846	igb_init_locked(adapter);
847	igb_init_manageability(adapter);
848
849	if ((ifp->if_flags & IFF_UP) &&
850	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
851		for (int i = 0; i < adapter->num_queues; i++, txr++) {
852			IGB_TX_LOCK(txr);
853#if __FreeBSD_version >= 800000
854			/* Process the stack queue only if not depleted */
855			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
856			    !drbr_empty(ifp, txr->br))
857				igb_mq_start_locked(ifp, txr);
858#else
859			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
860				igb_start_locked(txr, ifp);
861#endif
862			IGB_TX_UNLOCK(txr);
863		}
864	}
865	IGB_CORE_UNLOCK(adapter);
866
867	return bus_generic_resume(dev);
868}
869
870
871#if __FreeBSD_version < 800000
872
873/*********************************************************************
874 *  Transmit entry point
875 *
876 *  igb_start is called by the stack to initiate a transmit.
877 *  The driver will remain in this routine as long as there are
878 *  packets to transmit and transmit resources are available.
879 *  In case resources are not available stack is notified and
880 *  the packet is requeued.
881 **********************************************************************/
882
883static void
884igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
885{
886	struct adapter	*adapter = ifp->if_softc;
887	struct mbuf	*m_head;
888
889	IGB_TX_LOCK_ASSERT(txr);
890
891	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
892	    IFF_DRV_RUNNING)
893		return;
894	if (!adapter->link_active)
895		return;
896
897	/* Call cleanup if number of TX descriptors low */
898	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
899		igb_txeof(txr);
900
901	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
902		if (txr->tx_avail <= IGB_MAX_SCATTER) {
903			txr->queue_status |= IGB_QUEUE_DEPLETED;
904			break;
905		}
906		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
907		if (m_head == NULL)
908			break;
909		/*
910		 *  Encapsulation can modify our pointer, and or make it
911		 *  NULL on failure.  In that event, we can't requeue.
912		 */
913		if (igb_xmit(txr, &m_head)) {
914			if (m_head != NULL)
915				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
916			if (txr->tx_avail <= IGB_MAX_SCATTER)
917				txr->queue_status |= IGB_QUEUE_DEPLETED;
918			break;
919		}
920
921		/* Send a copy of the frame to the BPF listener */
922		ETHER_BPF_MTAP(ifp, m_head);
923
924		/* Set watchdog on */
925		txr->watchdog_time = ticks;
926		txr->queue_status |= IGB_QUEUE_WORKING;
927	}
928}
929
930/*
931 * Legacy TX driver routine, called from the
932 * stack, always uses tx[0], and spins for it.
933 * Should not be used with multiqueue tx
934 */
935static void
936igb_start(struct ifnet *ifp)
937{
938	struct adapter	*adapter = ifp->if_softc;
939	struct tx_ring	*txr = adapter->tx_rings;
940
941	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
942		IGB_TX_LOCK(txr);
943		igb_start_locked(txr, ifp);
944		IGB_TX_UNLOCK(txr);
945	}
946	return;
947}
948
949#else /* __FreeBSD_version >= 800000 */
950
951/*
952** Multiqueue Transmit driver
953**
954*/
955static int
956igb_mq_start(struct ifnet *ifp, struct mbuf *m)
957{
958	struct adapter		*adapter = ifp->if_softc;
959	struct igb_queue	*que;
960	struct tx_ring		*txr;
961	int 			i, err = 0;
962
963	/* Which queue to use */
964	if ((m->m_flags & M_FLOWID) != 0)
965		i = m->m_pkthdr.flowid % adapter->num_queues;
966	else
967		i = curcpu % adapter->num_queues;
968
969	txr = &adapter->tx_rings[i];
970	que = &adapter->queues[i];
971	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
972	    IGB_TX_TRYLOCK(txr)) {
973		/*
974		** Try to queue first to avoid
975		** out-of-order delivery, but
976		** settle for it if that fails
977		*/
978		if (m != NULL)
979			drbr_enqueue(ifp, txr->br, m);
980		err = igb_mq_start_locked(ifp, txr);
981		IGB_TX_UNLOCK(txr);
982	} else {
983		if (m != NULL)
984			err = drbr_enqueue(ifp, txr->br, m);
985		taskqueue_enqueue(que->tq, &txr->txq_task);
986	}
987
988	return (err);
989}
990
991static int
992igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
993{
994	struct adapter  *adapter = txr->adapter;
995        struct mbuf     *next;
996        int             err = 0, enq;
997
998	IGB_TX_LOCK_ASSERT(txr);
999
1000	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1001	    (txr->queue_status & IGB_QUEUE_DEPLETED) ||
1002	    adapter->link_active == 0)
1003		return (err);
1004
1005	enq = 0;
1006
1007	/* Process the queue */
1008	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1009		if ((err = igb_xmit(txr, &next)) != 0) {
1010			if (next == NULL) {
1011				/* It was freed, move forward */
1012				drbr_advance(ifp, txr->br);
1013			} else {
1014				/*
1015				 * Still have one left, it may not be
1016				 * the same since the transmit function
1017				 * may have changed it.
1018				 */
1019				drbr_putback(ifp, txr->br, next);
1020			}
1021			break;
1022		}
1023		drbr_advance(ifp, txr->br);
1024		enq++;
1025		ifp->if_obytes += next->m_pkthdr.len;
1026		if (next->m_flags & M_MCAST)
1027			ifp->if_omcasts++;
1028		ETHER_BPF_MTAP(ifp, next);
1029		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1030			break;
1031	}
1032	if (enq > 0) {
1033		/* Set the watchdog */
1034		txr->queue_status |= IGB_QUEUE_WORKING;
1035		txr->watchdog_time = ticks;
1036	}
1037	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1038		igb_txeof(txr);
1039	if (txr->tx_avail <= IGB_MAX_SCATTER)
1040		txr->queue_status |= IGB_QUEUE_DEPLETED;
1041	return (err);
1042}
1043
1044/*
1045 * Called from a taskqueue to drain queued transmit packets.
1046 */
1047static void
1048igb_deferred_mq_start(void *arg, int pending)
1049{
1050	struct tx_ring *txr = arg;
1051	struct adapter *adapter = txr->adapter;
1052	struct ifnet *ifp = adapter->ifp;
1053
1054	IGB_TX_LOCK(txr);
1055	if (!drbr_empty(ifp, txr->br))
1056		igb_mq_start_locked(ifp, txr);
1057	IGB_TX_UNLOCK(txr);
1058}
1059
1060/*
1061** Flush all ring buffers
1062*/
1063static void
1064igb_qflush(struct ifnet *ifp)
1065{
1066	struct adapter	*adapter = ifp->if_softc;
1067	struct tx_ring	*txr = adapter->tx_rings;
1068	struct mbuf	*m;
1069
1070	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1071		IGB_TX_LOCK(txr);
1072		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1073			m_freem(m);
1074		IGB_TX_UNLOCK(txr);
1075	}
1076	if_qflush(ifp);
1077}
1078#endif /* __FreeBSD_version >= 800000 */
1079
1080/*********************************************************************
1081 *  Ioctl entry point
1082 *
1083 *  igb_ioctl is called when the user wants to configure the
1084 *  interface.
1085 *
1086 *  return 0 on success, positive on failure
1087 **********************************************************************/
1088
1089static int
1090igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1091{
1092	struct adapter	*adapter = ifp->if_softc;
1093	struct ifreq	*ifr = (struct ifreq *)data;
1094#if defined(INET) || defined(INET6)
1095	struct ifaddr	*ifa = (struct ifaddr *)data;
1096#endif
1097	bool		avoid_reset = FALSE;
1098	int		error = 0;
1099
1100	if (adapter->in_detach)
1101		return (error);
1102
1103	switch (command) {
1104	case SIOCSIFADDR:
1105#ifdef INET
1106		if (ifa->ifa_addr->sa_family == AF_INET)
1107			avoid_reset = TRUE;
1108#endif
1109#ifdef INET6
1110		if (ifa->ifa_addr->sa_family == AF_INET6)
1111			avoid_reset = TRUE;
1112#endif
1113		/*
1114		** Calling init results in link renegotiation,
1115		** so we avoid doing it when possible.
1116		*/
1117		if (avoid_reset) {
1118			ifp->if_flags |= IFF_UP;
1119			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1120				igb_init(adapter);
1121#ifdef INET
1122			if (!(ifp->if_flags & IFF_NOARP))
1123				arp_ifinit(ifp, ifa);
1124#endif
1125		} else
1126			error = ether_ioctl(ifp, command, data);
1127		break;
1128	case SIOCSIFMTU:
1129	    {
1130		int max_frame_size;
1131
1132		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1133
1134		IGB_CORE_LOCK(adapter);
1135		max_frame_size = 9234;
1136		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1137		    ETHER_CRC_LEN) {
1138			IGB_CORE_UNLOCK(adapter);
1139			error = EINVAL;
1140			break;
1141		}
1142
1143		ifp->if_mtu = ifr->ifr_mtu;
1144		adapter->max_frame_size =
1145		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1146		igb_init_locked(adapter);
1147		IGB_CORE_UNLOCK(adapter);
1148		break;
1149	    }
1150	case SIOCSIFFLAGS:
1151		IOCTL_DEBUGOUT("ioctl rcv'd:\
1152		    SIOCSIFFLAGS (Set Interface Flags)");
1153		IGB_CORE_LOCK(adapter);
1154		if (ifp->if_flags & IFF_UP) {
1155			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1156				if ((ifp->if_flags ^ adapter->if_flags) &
1157				    (IFF_PROMISC | IFF_ALLMULTI)) {
1158					igb_disable_promisc(adapter);
1159					igb_set_promisc(adapter);
1160				}
1161			} else
1162				igb_init_locked(adapter);
1163		} else
1164			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1165				igb_stop(adapter);
1166		adapter->if_flags = ifp->if_flags;
1167		IGB_CORE_UNLOCK(adapter);
1168		break;
1169	case SIOCADDMULTI:
1170	case SIOCDELMULTI:
1171		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1172		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1173			IGB_CORE_LOCK(adapter);
1174			igb_disable_intr(adapter);
1175			igb_set_multi(adapter);
1176#ifdef DEVICE_POLLING
1177			if (!(ifp->if_capenable & IFCAP_POLLING))
1178#endif
1179				igb_enable_intr(adapter);
1180			IGB_CORE_UNLOCK(adapter);
1181		}
1182		break;
1183	case SIOCSIFMEDIA:
1184		/* Check SOL/IDER usage */
1185		IGB_CORE_LOCK(adapter);
1186		if (e1000_check_reset_block(&adapter->hw)) {
1187			IGB_CORE_UNLOCK(adapter);
1188			device_printf(adapter->dev, "Media change is"
1189			    " blocked due to SOL/IDER session.\n");
1190			break;
1191		}
1192		IGB_CORE_UNLOCK(adapter);
1193	case SIOCGIFMEDIA:
1194		IOCTL_DEBUGOUT("ioctl rcv'd: \
1195		    SIOCxIFMEDIA (Get/Set Interface Media)");
1196		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1197		break;
1198	case SIOCSIFCAP:
1199	    {
1200		int mask, reinit;
1201
1202		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1203		reinit = 0;
1204		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1205#ifdef DEVICE_POLLING
1206		if (mask & IFCAP_POLLING) {
1207			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1208				error = ether_poll_register(igb_poll, ifp);
1209				if (error)
1210					return (error);
1211				IGB_CORE_LOCK(adapter);
1212				igb_disable_intr(adapter);
1213				ifp->if_capenable |= IFCAP_POLLING;
1214				IGB_CORE_UNLOCK(adapter);
1215			} else {
1216				error = ether_poll_deregister(ifp);
1217				/* Enable interrupt even in error case */
1218				IGB_CORE_LOCK(adapter);
1219				igb_enable_intr(adapter);
1220				ifp->if_capenable &= ~IFCAP_POLLING;
1221				IGB_CORE_UNLOCK(adapter);
1222			}
1223		}
1224#endif
1225		if (mask & IFCAP_HWCSUM) {
1226			ifp->if_capenable ^= IFCAP_HWCSUM;
1227			reinit = 1;
1228		}
1229		if (mask & IFCAP_TSO4) {
1230			ifp->if_capenable ^= IFCAP_TSO4;
1231			reinit = 1;
1232		}
1233		if (mask & IFCAP_VLAN_HWTAGGING) {
1234			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1235			reinit = 1;
1236		}
1237		if (mask & IFCAP_VLAN_HWFILTER) {
1238			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1239			reinit = 1;
1240		}
1241		if (mask & IFCAP_VLAN_HWTSO) {
1242			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1243			reinit = 1;
1244		}
1245		if (mask & IFCAP_LRO) {
1246			ifp->if_capenable ^= IFCAP_LRO;
1247			reinit = 1;
1248		}
1249		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1250			igb_init(adapter);
1251		VLAN_CAPABILITIES(ifp);
1252		break;
1253	    }
1254
1255	default:
1256		error = ether_ioctl(ifp, command, data);
1257		break;
1258	}
1259
1260	return (error);
1261}
1262
1263
1264/*********************************************************************
1265 *  Init entry point
1266 *
1267 *  This routine is used in two ways. It is used by the stack as
1268 *  init entry point in network interface structure. It is also used
1269 *  by the driver as a hw/sw initialization routine to get to a
1270 *  consistent state.
1271 *
1272 *  return 0 on success, positive on failure
1273 **********************************************************************/
1274
1275static void
1276igb_init_locked(struct adapter *adapter)
1277{
1278	struct ifnet	*ifp = adapter->ifp;
1279	device_t	dev = adapter->dev;
1280
1281	INIT_DEBUGOUT("igb_init: begin");
1282
1283	IGB_CORE_LOCK_ASSERT(adapter);
1284
1285	igb_disable_intr(adapter);
1286	callout_stop(&adapter->timer);
1287
1288	/* Get the latest mac address, User can use a LAA */
1289        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1290              ETHER_ADDR_LEN);
1291
1292	/* Put the address into the Receive Address Array */
1293	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1294
1295	igb_reset(adapter);
1296	igb_update_link_status(adapter);
1297
1298	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1299
1300	/* Set hardware offload abilities */
1301	ifp->if_hwassist = 0;
1302	if (ifp->if_capenable & IFCAP_TXCSUM) {
1303		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1304#if __FreeBSD_version >= 800000
1305		if (adapter->hw.mac.type == e1000_82576)
1306			ifp->if_hwassist |= CSUM_SCTP;
1307#endif
1308	}
1309
1310	if (ifp->if_capenable & IFCAP_TSO4)
1311		ifp->if_hwassist |= CSUM_TSO;
1312
1313	/* Configure for OS presence */
1314	igb_init_manageability(adapter);
1315
1316	/* Prepare transmit descriptors and buffers */
1317	igb_setup_transmit_structures(adapter);
1318	igb_initialize_transmit_units(adapter);
1319
1320	/* Setup Multicast table */
1321	igb_set_multi(adapter);
1322
1323	/*
1324	** Figure out the desired mbuf pool
1325	** for doing jumbo/packetsplit
1326	*/
1327	if (adapter->max_frame_size <= 2048)
1328		adapter->rx_mbuf_sz = MCLBYTES;
1329	else if (adapter->max_frame_size <= 4096)
1330		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1331	else
1332		adapter->rx_mbuf_sz = MJUM9BYTES;
1333
1334	/* Prepare receive descriptors and buffers */
1335	if (igb_setup_receive_structures(adapter)) {
1336		device_printf(dev, "Could not setup receive structures\n");
1337		return;
1338	}
1339	igb_initialize_receive_units(adapter);
1340
1341        /* Enable VLAN support */
1342	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1343		igb_setup_vlan_hw_support(adapter);
1344
1345	/* Don't lose promiscuous settings */
1346	igb_set_promisc(adapter);
1347
1348	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1349	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1350
1351	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1352	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1353
1354	if (adapter->msix > 1) /* Set up queue routing */
1355		igb_configure_queues(adapter);
1356
1357	/* this clears any pending interrupts */
1358	E1000_READ_REG(&adapter->hw, E1000_ICR);
1359#ifdef DEVICE_POLLING
1360	/*
1361	 * Only enable interrupts if we are not polling, make sure
1362	 * they are off otherwise.
1363	 */
1364	if (ifp->if_capenable & IFCAP_POLLING)
1365		igb_disable_intr(adapter);
1366	else
1367#endif /* DEVICE_POLLING */
1368	{
1369		igb_enable_intr(adapter);
1370		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1371	}
1372
1373	/* Set Energy Efficient Ethernet */
1374	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1375		e1000_set_eee_i350(&adapter->hw);
1376}
1377
1378static void
1379igb_init(void *arg)
1380{
1381	struct adapter *adapter = arg;
1382
1383	IGB_CORE_LOCK(adapter);
1384	igb_init_locked(adapter);
1385	IGB_CORE_UNLOCK(adapter);
1386}
1387
1388
1389static void
1390igb_handle_que(void *context, int pending)
1391{
1392	struct igb_queue *que = context;
1393	struct adapter *adapter = que->adapter;
1394	struct tx_ring *txr = que->txr;
1395	struct ifnet	*ifp = adapter->ifp;
1396
1397	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1398		bool	more;
1399
1400		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1401
1402		IGB_TX_LOCK(txr);
1403		igb_txeof(txr);
1404#if __FreeBSD_version >= 800000
1405		/* Process the stack queue only if not depleted */
1406		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1407		    !drbr_empty(ifp, txr->br))
1408			igb_mq_start_locked(ifp, txr);
1409#else
1410		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1411			igb_start_locked(txr, ifp);
1412#endif
1413		IGB_TX_UNLOCK(txr);
1414		/* Do we need another? */
1415		if (more) {
1416			taskqueue_enqueue(que->tq, &que->que_task);
1417			return;
1418		}
1419	}
1420
1421#ifdef DEVICE_POLLING
1422	if (ifp->if_capenable & IFCAP_POLLING)
1423		return;
1424#endif
1425	/* Reenable this interrupt */
1426	if (que->eims)
1427		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1428	else
1429		igb_enable_intr(adapter);
1430}
1431
1432/* Deal with link in a sleepable context */
1433static void
1434igb_handle_link(void *context, int pending)
1435{
1436	struct adapter *adapter = context;
1437
1438	IGB_CORE_LOCK(adapter);
1439	igb_handle_link_locked(adapter);
1440	IGB_CORE_UNLOCK(adapter);
1441}
1442
1443static void
1444igb_handle_link_locked(struct adapter *adapter)
1445{
1446	struct tx_ring	*txr = adapter->tx_rings;
1447	struct ifnet *ifp = adapter->ifp;
1448
1449	IGB_CORE_LOCK_ASSERT(adapter);
1450	adapter->hw.mac.get_link_status = 1;
1451	igb_update_link_status(adapter);
1452	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1453		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1454			IGB_TX_LOCK(txr);
1455#if __FreeBSD_version >= 800000
1456			/* Process the stack queue only if not depleted */
1457			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1458			    !drbr_empty(ifp, txr->br))
1459				igb_mq_start_locked(ifp, txr);
1460#else
1461			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1462				igb_start_locked(txr, ifp);
1463#endif
1464			IGB_TX_UNLOCK(txr);
1465		}
1466	}
1467}
1468
1469/*********************************************************************
1470 *
1471 *  MSI/Legacy Deferred
1472 *  Interrupt Service routine
1473 *
1474 *********************************************************************/
1475static int
1476igb_irq_fast(void *arg)
1477{
1478	struct adapter		*adapter = arg;
1479	struct igb_queue	*que = adapter->queues;
1480	u32			reg_icr;
1481
1482
1483	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484
1485	/* Hot eject?  */
1486	if (reg_icr == 0xffffffff)
1487		return FILTER_STRAY;
1488
1489	/* Definitely not our interrupt.  */
1490	if (reg_icr == 0x0)
1491		return FILTER_STRAY;
1492
1493	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1494		return FILTER_STRAY;
1495
1496	/*
1497	 * Mask interrupts until the taskqueue is finished running.  This is
1498	 * cheap, just assume that it is needed.  This also works around the
1499	 * MSI message reordering errata on certain systems.
1500	 */
1501	igb_disable_intr(adapter);
1502	taskqueue_enqueue(que->tq, &que->que_task);
1503
1504	/* Link status change */
1505	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1506		taskqueue_enqueue(que->tq, &adapter->link_task);
1507
1508	if (reg_icr & E1000_ICR_RXO)
1509		adapter->rx_overruns++;
1510	return FILTER_HANDLED;
1511}
1512
1513#ifdef DEVICE_POLLING
1514#if __FreeBSD_version >= 800000
1515#define POLL_RETURN_COUNT(a) (a)
1516static int
1517#else
1518#define POLL_RETURN_COUNT(a)
1519static void
1520#endif
1521igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1522{
1523	struct adapter		*adapter = ifp->if_softc;
1524	struct igb_queue	*que;
1525	struct tx_ring		*txr;
1526	u32			reg_icr, rx_done = 0;
1527	u32			loop = IGB_MAX_LOOP;
1528	bool			more;
1529
1530	IGB_CORE_LOCK(adapter);
1531	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1532		IGB_CORE_UNLOCK(adapter);
1533		return POLL_RETURN_COUNT(rx_done);
1534	}
1535
1536	if (cmd == POLL_AND_CHECK_STATUS) {
1537		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538		/* Link status change */
1539		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1540			igb_handle_link_locked(adapter);
1541
1542		if (reg_icr & E1000_ICR_RXO)
1543			adapter->rx_overruns++;
1544	}
1545	IGB_CORE_UNLOCK(adapter);
1546
1547	for (int i = 0; i < adapter->num_queues; i++) {
1548		que = &adapter->queues[i];
1549		txr = que->txr;
1550
1551		igb_rxeof(que, count, &rx_done);
1552
1553		IGB_TX_LOCK(txr);
1554		do {
1555			more = igb_txeof(txr);
1556		} while (loop-- && more);
1557#if __FreeBSD_version >= 800000
1558		if (!drbr_empty(ifp, txr->br))
1559			igb_mq_start_locked(ifp, txr);
1560#else
1561		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1562			igb_start_locked(txr, ifp);
1563#endif
1564		IGB_TX_UNLOCK(txr);
1565	}
1566
1567	return POLL_RETURN_COUNT(rx_done);
1568}
1569#endif /* DEVICE_POLLING */
1570
1571/*********************************************************************
1572 *
1573 *  MSIX Que Interrupt Service routine
1574 *
1575 **********************************************************************/
1576static void
1577igb_msix_que(void *arg)
1578{
1579	struct igb_queue *que = arg;
1580	struct adapter *adapter = que->adapter;
1581	struct ifnet   *ifp = adapter->ifp;
1582	struct tx_ring *txr = que->txr;
1583	struct rx_ring *rxr = que->rxr;
1584	u32		newitr = 0;
1585	bool		more_rx;
1586
1587	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1588	++que->irqs;
1589
1590	IGB_TX_LOCK(txr);
1591	igb_txeof(txr);
1592#if __FreeBSD_version >= 800000
1593	/* Process the stack queue only if not depleted */
1594	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1595	    !drbr_empty(ifp, txr->br))
1596		igb_mq_start_locked(ifp, txr);
1597#else
1598	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1599		igb_start_locked(txr, ifp);
1600#endif
1601	IGB_TX_UNLOCK(txr);
1602
1603	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1604
1605	if (adapter->enable_aim == FALSE)
1606		goto no_calc;
1607	/*
1608	** Do Adaptive Interrupt Moderation:
1609        **  - Write out last calculated setting
1610	**  - Calculate based on average size over
1611	**    the last interval.
1612	*/
1613        if (que->eitr_setting)
1614                E1000_WRITE_REG(&adapter->hw,
1615                    E1000_EITR(que->msix), que->eitr_setting);
1616
1617        que->eitr_setting = 0;
1618
1619        /* Idle, do nothing */
1620        if ((txr->bytes == 0) && (rxr->bytes == 0))
1621                goto no_calc;
1622
1623        /* Used half Default if sub-gig */
1624        if (adapter->link_speed != 1000)
1625                newitr = IGB_DEFAULT_ITR / 2;
1626        else {
1627		if ((txr->bytes) && (txr->packets))
1628                	newitr = txr->bytes/txr->packets;
1629		if ((rxr->bytes) && (rxr->packets))
1630			newitr = max(newitr,
1631			    (rxr->bytes / rxr->packets));
1632                newitr += 24; /* account for hardware frame, crc */
1633		/* set an upper boundary */
1634		newitr = min(newitr, 3000);
1635		/* Be nice to the mid range */
1636                if ((newitr > 300) && (newitr < 1200))
1637                        newitr = (newitr / 3);
1638                else
1639                        newitr = (newitr / 2);
1640        }
1641        newitr &= 0x7FFC;  /* Mask invalid bits */
1642        if (adapter->hw.mac.type == e1000_82575)
1643                newitr |= newitr << 16;
1644        else
1645                newitr |= E1000_EITR_CNT_IGNR;
1646
1647        /* save for next interrupt */
1648        que->eitr_setting = newitr;
1649
1650        /* Reset state */
1651        txr->bytes = 0;
1652        txr->packets = 0;
1653        rxr->bytes = 0;
1654        rxr->packets = 0;
1655
1656no_calc:
1657	/* Schedule a clean task if needed*/
1658	if (more_rx)
1659		taskqueue_enqueue(que->tq, &que->que_task);
1660	else
1661		/* Reenable this interrupt */
1662		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1663	return;
1664}
1665
1666
1667/*********************************************************************
1668 *
1669 *  MSIX Link Interrupt Service routine
1670 *
1671 **********************************************************************/
1672
1673static void
1674igb_msix_link(void *arg)
1675{
1676	struct adapter	*adapter = arg;
1677	u32       	icr;
1678
1679	++adapter->link_irq;
1680	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1681	if (!(icr & E1000_ICR_LSC))
1682		goto spurious;
1683	igb_handle_link(adapter, 0);
1684
1685spurious:
1686	/* Rearm */
1687	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1688	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1689	return;
1690}
1691
1692
1693/*********************************************************************
1694 *
1695 *  Media Ioctl callback
1696 *
1697 *  This routine is called whenever the user queries the status of
1698 *  the interface using ifconfig.
1699 *
1700 **********************************************************************/
1701static void
1702igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703{
1704	struct adapter *adapter = ifp->if_softc;
1705	u_char fiber_type = IFM_1000_SX;
1706
1707	INIT_DEBUGOUT("igb_media_status: begin");
1708
1709	IGB_CORE_LOCK(adapter);
1710	igb_update_link_status(adapter);
1711
1712	ifmr->ifm_status = IFM_AVALID;
1713	ifmr->ifm_active = IFM_ETHER;
1714
1715	if (!adapter->link_active) {
1716		IGB_CORE_UNLOCK(adapter);
1717		return;
1718	}
1719
1720	ifmr->ifm_status |= IFM_ACTIVE;
1721
1722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1724		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725	else {
1726		switch (adapter->link_speed) {
1727		case 10:
1728			ifmr->ifm_active |= IFM_10_T;
1729			break;
1730		case 100:
1731			ifmr->ifm_active |= IFM_100_TX;
1732			break;
1733		case 1000:
1734			ifmr->ifm_active |= IFM_1000_T;
1735			break;
1736		}
1737		if (adapter->link_duplex == FULL_DUPLEX)
1738			ifmr->ifm_active |= IFM_FDX;
1739		else
1740			ifmr->ifm_active |= IFM_HDX;
1741	}
1742	IGB_CORE_UNLOCK(adapter);
1743}
1744
1745/*********************************************************************
1746 *
1747 *  Media Ioctl callback
1748 *
1749 *  This routine is called when the user changes speed/duplex using
1750 *  media/mediopt option with ifconfig.
1751 *
1752 **********************************************************************/
1753static int
1754igb_media_change(struct ifnet *ifp)
1755{
1756	struct adapter *adapter = ifp->if_softc;
1757	struct ifmedia  *ifm = &adapter->media;
1758
1759	INIT_DEBUGOUT("igb_media_change: begin");
1760
1761	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762		return (EINVAL);
1763
1764	IGB_CORE_LOCK(adapter);
1765	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766	case IFM_AUTO:
1767		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769		break;
1770	case IFM_1000_LX:
1771	case IFM_1000_SX:
1772	case IFM_1000_T:
1773		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775		break;
1776	case IFM_100_TX:
1777		adapter->hw.mac.autoneg = FALSE;
1778		adapter->hw.phy.autoneg_advertised = 0;
1779		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781		else
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783		break;
1784	case IFM_10_T:
1785		adapter->hw.mac.autoneg = FALSE;
1786		adapter->hw.phy.autoneg_advertised = 0;
1787		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789		else
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791		break;
1792	default:
1793		device_printf(adapter->dev, "Unsupported media type\n");
1794	}
1795
1796	igb_init_locked(adapter);
1797	IGB_CORE_UNLOCK(adapter);
1798
1799	return (0);
1800}
1801
1802
1803/*********************************************************************
1804 *
1805 *  This routine maps the mbufs to Advanced TX descriptors.
1806 *
1807 **********************************************************************/
1808static int
1809igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810{
1811	struct adapter		*adapter = txr->adapter;
1812	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1813	bus_dmamap_t		map;
1814	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1815	union e1000_adv_tx_desc	*txd = NULL;
1816	struct mbuf		*m_head = *m_headp;
1817	struct ether_vlan_header *eh = NULL;
1818	struct ip		*ip = NULL;
1819	struct tcphdr		*th = NULL;
1820	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1821	int			ehdrlen, poff;
1822	int			nsegs, i, first, last = 0;
1823	int			error, do_tso, remap = 1;
1824
1825	/* Set basic descriptor constants */
1826	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1827	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1828	if (m_head->m_flags & M_VLANTAG)
1829		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1830
1831retry:
1832	m_head = *m_headp;
1833	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1834	hdrlen = ehdrlen = poff = 0;
1835
1836	/*
1837	 * Intel recommends entire IP/TCP header length reside in a single
1838	 * buffer. If multiple descriptors are used to describe the IP and
1839	 * TCP header, each descriptor should describe one or more
1840	 * complete headers; descriptors referencing only parts of headers
1841	 * are not supported. If all layer headers are not coalesced into
1842	 * a single buffer, each buffer should not cross a 4KB boundary,
1843	 * or be larger than the maximum read request size.
1844	 * Controller also requires modifing IP/TCP header to make TSO work
1845	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1846	 * IP/TCP header into a single buffer to meet the requirement of
1847	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1848	 * which also has similiar restrictions.
1849	 */
1850	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1851		if (do_tso || (m_head->m_next != NULL &&
1852		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1853			if (M_WRITABLE(*m_headp) == 0) {
1854				m_head = m_dup(*m_headp, M_NOWAIT);
1855				m_freem(*m_headp);
1856				if (m_head == NULL) {
1857					*m_headp = NULL;
1858					return (ENOBUFS);
1859				}
1860				*m_headp = m_head;
1861			}
1862		}
1863		/*
1864		 * Assume IPv4, we don't have TSO/checksum offload support
1865		 * for IPv6 yet.
1866		 */
1867		ehdrlen = sizeof(struct ether_header);
1868		m_head = m_pullup(m_head, ehdrlen);
1869		if (m_head == NULL) {
1870			*m_headp = NULL;
1871			return (ENOBUFS);
1872		}
1873		eh = mtod(m_head, struct ether_vlan_header *);
1874		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1875			ehdrlen = sizeof(struct ether_vlan_header);
1876			m_head = m_pullup(m_head, ehdrlen);
1877			if (m_head == NULL) {
1878				*m_headp = NULL;
1879				return (ENOBUFS);
1880			}
1881		}
1882		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1883		if (m_head == NULL) {
1884			*m_headp = NULL;
1885			return (ENOBUFS);
1886		}
1887		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1888		poff = ehdrlen + (ip->ip_hl << 2);
1889		if (do_tso) {
1890			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1891			if (m_head == NULL) {
1892				*m_headp = NULL;
1893				return (ENOBUFS);
1894			}
1895			/*
1896			 * The pseudo TCP checksum does not include TCP payload
1897			 * length so driver should recompute the checksum here
1898			 * what hardware expect to see. This is adherence of
1899			 * Microsoft's Large Send specification.
1900			 */
1901			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1902			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1903			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1904			/* Keep track of the full header length */
1905			hdrlen = poff + (th->th_off << 2);
1906		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1907			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1908			if (m_head == NULL) {
1909				*m_headp = NULL;
1910				return (ENOBUFS);
1911			}
1912			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1913			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1914			if (m_head == NULL) {
1915				*m_headp = NULL;
1916				return (ENOBUFS);
1917			}
1918			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1919			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1921			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1927		}
1928		*m_headp = m_head;
1929	}
1930
1931	/*
1932	 * Map the packet for DMA
1933	 *
1934	 * Capture the first descriptor index,
1935	 * this descriptor will have the index
1936	 * of the EOP which is the only one that
1937	 * now gets a DONE bit writeback.
1938	 */
1939	first = txr->next_avail_desc;
1940	tx_buffer = &txr->tx_buffers[first];
1941	tx_buffer_mapped = tx_buffer;
1942	map = tx_buffer->map;
1943
1944	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1945	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1946
1947	/*
1948	 * There are two types of errors we can (try) to handle:
1949	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1950	 *   out of segments.  Defragment the mbuf chain and try again.
1951	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1952	 *   at this point in time.  Defer sending and try again later.
1953	 * All other errors, in particular EINVAL, are fatal and prevent the
1954	 * mbuf chain from ever going through.  Drop it and report error.
1955	 */
1956	if (error == EFBIG && remap) {
1957		struct mbuf *m;
1958
1959		m = m_defrag(*m_headp, M_NOWAIT);
1960		if (m == NULL) {
1961			adapter->mbuf_defrag_failed++;
1962			m_freem(*m_headp);
1963			*m_headp = NULL;
1964			return (ENOBUFS);
1965		}
1966		*m_headp = m;
1967
1968		/* Try it again, but only once */
1969		remap = 0;
1970		goto retry;
1971	} else if (error == ENOMEM) {
1972		adapter->no_tx_dma_setup++;
1973		return (error);
1974	} else if (error != 0) {
1975		adapter->no_tx_dma_setup++;
1976		m_freem(*m_headp);
1977		*m_headp = NULL;
1978		return (error);
1979	}
1980
1981	/*
1982	** Make sure we don't overrun the ring,
1983	** we need nsegs descriptors and one for
1984	** the context descriptor used for the
1985	** offloads.
1986	*/
1987        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1988                txr->no_desc_avail++;
1989		bus_dmamap_unload(txr->txtag, map);
1990		return (ENOBUFS);
1991        }
1992	m_head = *m_headp;
1993
1994	/* Do hardware assists:
1995         * Set up the context descriptor, used
1996         * when any hardware offload is done.
1997         * This includes CSUM, VLAN, and TSO.
1998         * It will use the first descriptor.
1999         */
2000
2001	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2002		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
2003			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2004			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2005			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2006		} else
2007			return (ENXIO);
2008	} else if (igb_tx_ctx_setup(txr, m_head))
2009			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2010
2011	/* Calculate payload length */
2012	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2013	    << E1000_ADVTXD_PAYLEN_SHIFT);
2014
2015	/* 82575 needs the queue index added */
2016	if (adapter->hw.mac.type == e1000_82575)
2017		olinfo_status |= txr->me << 4;
2018
2019	/* Set up our transmit descriptors */
2020	i = txr->next_avail_desc;
2021	for (int j = 0; j < nsegs; j++) {
2022		bus_size_t seg_len;
2023		bus_addr_t seg_addr;
2024
2025		tx_buffer = &txr->tx_buffers[i];
2026		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2027		seg_addr = segs[j].ds_addr;
2028		seg_len  = segs[j].ds_len;
2029
2030		txd->read.buffer_addr = htole64(seg_addr);
2031		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2032		txd->read.olinfo_status = htole32(olinfo_status);
2033		last = i;
2034		if (++i == adapter->num_tx_desc)
2035			i = 0;
2036		tx_buffer->m_head = NULL;
2037		tx_buffer->next_eop = -1;
2038	}
2039
2040	txr->next_avail_desc = i;
2041	txr->tx_avail -= nsegs;
2042        tx_buffer->m_head = m_head;
2043
2044	/*
2045	** Here we swap the map so the last descriptor,
2046	** which gets the completion interrupt has the
2047	** real map, and the first descriptor gets the
2048	** unused map from this descriptor.
2049	*/
2050	tx_buffer_mapped->map = tx_buffer->map;
2051	tx_buffer->map = map;
2052        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2053
2054        /*
2055         * Last Descriptor of Packet
2056	 * needs End Of Packet (EOP)
2057	 * and Report Status (RS)
2058         */
2059        txd->read.cmd_type_len |=
2060	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2061	/*
2062	 * Keep track in the first buffer which
2063	 * descriptor will be written back
2064	 */
2065	tx_buffer = &txr->tx_buffers[first];
2066	tx_buffer->next_eop = last;
2067	/* Update the watchdog time early and often */
2068	txr->watchdog_time = ticks;
2069
2070	/*
2071	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2072	 * that this frame is available to transmit.
2073	 */
2074	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2075	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2076	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2077	++txr->tx_packets;
2078
2079	return (0);
2080}
2081static void
2082igb_set_promisc(struct adapter *adapter)
2083{
2084	struct ifnet	*ifp = adapter->ifp;
2085	struct e1000_hw *hw = &adapter->hw;
2086	u32		reg;
2087
2088	if (adapter->vf_ifp) {
2089		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2090		return;
2091	}
2092
2093	reg = E1000_READ_REG(hw, E1000_RCTL);
2094	if (ifp->if_flags & IFF_PROMISC) {
2095		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2096		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2097	} else if (ifp->if_flags & IFF_ALLMULTI) {
2098		reg |= E1000_RCTL_MPE;
2099		reg &= ~E1000_RCTL_UPE;
2100		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2101	}
2102}
2103
2104static void
2105igb_disable_promisc(struct adapter *adapter)
2106{
2107	struct e1000_hw *hw = &adapter->hw;
2108	u32		reg;
2109
2110	if (adapter->vf_ifp) {
2111		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2112		return;
2113	}
2114	reg = E1000_READ_REG(hw, E1000_RCTL);
2115	reg &=  (~E1000_RCTL_UPE);
2116	reg &=  (~E1000_RCTL_MPE);
2117	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2118}
2119
2120
2121/*********************************************************************
2122 *  Multicast Update
2123 *
2124 *  This routine is called whenever multicast address list is updated.
2125 *
2126 **********************************************************************/
2127
2128static void
2129igb_set_multi(struct adapter *adapter)
2130{
2131	struct ifnet	*ifp = adapter->ifp;
2132	struct ifmultiaddr *ifma;
2133	u32 reg_rctl = 0;
2134	u8  *mta;
2135
2136	int mcnt = 0;
2137
2138	IOCTL_DEBUGOUT("igb_set_multi: begin");
2139
2140	mta = adapter->mta;
2141	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2142	    MAX_NUM_MULTICAST_ADDRESSES);
2143
2144#if __FreeBSD_version < 800000
2145	IF_ADDR_LOCK(ifp);
2146#else
2147	if_maddr_rlock(ifp);
2148#endif
2149	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2150		if (ifma->ifma_addr->sa_family != AF_LINK)
2151			continue;
2152
2153		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2154			break;
2155
2156		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2157		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2158		mcnt++;
2159	}
2160#if __FreeBSD_version < 800000
2161	IF_ADDR_UNLOCK(ifp);
2162#else
2163	if_maddr_runlock(ifp);
2164#endif
2165
2166	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2167		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2168		reg_rctl |= E1000_RCTL_MPE;
2169		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2170	} else
2171		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2172}
2173
2174
2175/*********************************************************************
2176 *  Timer routine:
2177 *  	This routine checks for link status,
2178 *	updates statistics, and does the watchdog.
2179 *
2180 **********************************************************************/
2181
2182static void
2183igb_local_timer(void *arg)
2184{
2185	struct adapter		*adapter = arg;
2186	device_t		dev = adapter->dev;
2187	struct ifnet		*ifp = adapter->ifp;
2188	struct tx_ring		*txr = adapter->tx_rings;
2189	struct igb_queue	*que = adapter->queues;
2190	int			hung = 0, busy = 0;
2191
2192
2193	IGB_CORE_LOCK_ASSERT(adapter);
2194
2195	igb_update_link_status(adapter);
2196	igb_update_stats_counters(adapter);
2197
2198        /*
2199        ** Check the TX queues status
2200	**	- central locked handling of OACTIVE
2201	**	- watchdog only if all queues show hung
2202        */
2203	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2204		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2205		    (adapter->pause_frames == 0))
2206			++hung;
2207		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2208			++busy;
2209		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2210			taskqueue_enqueue(que->tq, &que->que_task);
2211	}
2212	if (hung == adapter->num_queues)
2213		goto timeout;
2214	if (busy == adapter->num_queues)
2215		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2216	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2217	    (busy < adapter->num_queues))
2218		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2219
2220	adapter->pause_frames = 0;
2221	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2222#ifndef DEVICE_POLLING
2223	/* Schedule all queue interrupts - deadlock protection */
2224	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2225#endif
2226	return;
2227
2228timeout:
2229	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2230	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2231            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2232            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2233	device_printf(dev,"TX(%d) desc avail = %d,"
2234            "Next TX to Clean = %d\n",
2235            txr->me, txr->tx_avail, txr->next_to_clean);
2236	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2237	adapter->watchdog_events++;
2238	igb_init_locked(adapter);
2239}
2240
2241static void
2242igb_update_link_status(struct adapter *adapter)
2243{
2244	struct e1000_hw *hw = &adapter->hw;
2245	struct ifnet *ifp = adapter->ifp;
2246	device_t dev = adapter->dev;
2247	struct tx_ring *txr = adapter->tx_rings;
2248	u32 link_check, thstat, ctrl;
2249
2250	link_check = thstat = ctrl = 0;
2251
2252	/* Get the cached link value or read for real */
2253        switch (hw->phy.media_type) {
2254        case e1000_media_type_copper:
2255                if (hw->mac.get_link_status) {
2256			/* Do the work to read phy */
2257                        e1000_check_for_link(hw);
2258                        link_check = !hw->mac.get_link_status;
2259                } else
2260                        link_check = TRUE;
2261                break;
2262        case e1000_media_type_fiber:
2263                e1000_check_for_link(hw);
2264                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2265                                 E1000_STATUS_LU);
2266                break;
2267        case e1000_media_type_internal_serdes:
2268                e1000_check_for_link(hw);
2269                link_check = adapter->hw.mac.serdes_has_link;
2270                break;
2271	/* VF device is type_unknown */
2272        case e1000_media_type_unknown:
2273                e1000_check_for_link(hw);
2274		link_check = !hw->mac.get_link_status;
2275		/* Fall thru */
2276        default:
2277                break;
2278        }
2279
2280	/* Check for thermal downshift or shutdown */
2281	if (hw->mac.type == e1000_i350) {
2282		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2283		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2284	}
2285
2286	/* Now we check if a transition has happened */
2287	if (link_check && (adapter->link_active == 0)) {
2288		e1000_get_speed_and_duplex(&adapter->hw,
2289		    &adapter->link_speed, &adapter->link_duplex);
2290		if (bootverbose)
2291			device_printf(dev, "Link is up %d Mbps %s\n",
2292			    adapter->link_speed,
2293			    ((adapter->link_duplex == FULL_DUPLEX) ?
2294			    "Full Duplex" : "Half Duplex"));
2295		adapter->link_active = 1;
2296		ifp->if_baudrate = adapter->link_speed * 1000000;
2297		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2298		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2299			device_printf(dev, "Link: thermal downshift\n");
2300		/* This can sleep */
2301		if_link_state_change(ifp, LINK_STATE_UP);
2302	} else if (!link_check && (adapter->link_active == 1)) {
2303		ifp->if_baudrate = adapter->link_speed = 0;
2304		adapter->link_duplex = 0;
2305		if (bootverbose)
2306			device_printf(dev, "Link is Down\n");
2307		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2308		    (thstat & E1000_THSTAT_PWR_DOWN))
2309			device_printf(dev, "Link: thermal shutdown\n");
2310		adapter->link_active = 0;
2311		/* This can sleep */
2312		if_link_state_change(ifp, LINK_STATE_DOWN);
2313		/* Reset queue state */
2314		for (int i = 0; i < adapter->num_queues; i++, txr++)
2315			txr->queue_status = IGB_QUEUE_IDLE;
2316	}
2317}
2318
2319/*********************************************************************
2320 *
2321 *  This routine disables all traffic on the adapter by issuing a
2322 *  global reset on the MAC and deallocates TX/RX buffers.
2323 *
2324 **********************************************************************/
2325
2326static void
2327igb_stop(void *arg)
2328{
2329	struct adapter	*adapter = arg;
2330	struct ifnet	*ifp = adapter->ifp;
2331	struct tx_ring *txr = adapter->tx_rings;
2332
2333	IGB_CORE_LOCK_ASSERT(adapter);
2334
2335	INIT_DEBUGOUT("igb_stop: begin");
2336
2337	igb_disable_intr(adapter);
2338
2339	callout_stop(&adapter->timer);
2340
2341	/* Tell the stack that the interface is no longer active */
2342	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2343	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2344
2345	/* Disarm watchdog timer. */
2346	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2347		IGB_TX_LOCK(txr);
2348		txr->queue_status = IGB_QUEUE_IDLE;
2349		IGB_TX_UNLOCK(txr);
2350	}
2351
2352	e1000_reset_hw(&adapter->hw);
2353	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2354
2355	e1000_led_off(&adapter->hw);
2356	e1000_cleanup_led(&adapter->hw);
2357}
2358
2359
2360/*********************************************************************
2361 *
2362 *  Determine hardware revision.
2363 *
2364 **********************************************************************/
2365static void
2366igb_identify_hardware(struct adapter *adapter)
2367{
2368	device_t dev = adapter->dev;
2369
2370	/* Make sure our PCI config space has the necessary stuff set */
2371	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2372	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2373	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2374		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2375		    "bits were not set!\n");
2376		adapter->hw.bus.pci_cmd_word |=
2377		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2378		pci_write_config(dev, PCIR_COMMAND,
2379		    adapter->hw.bus.pci_cmd_word, 2);
2380	}
2381
2382	/* Save off the information about this board */
2383	adapter->hw.vendor_id = pci_get_vendor(dev);
2384	adapter->hw.device_id = pci_get_device(dev);
2385	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2386	adapter->hw.subsystem_vendor_id =
2387	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2388	adapter->hw.subsystem_device_id =
2389	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2390
2391	/* Set MAC type early for PCI setup */
2392	e1000_set_mac_type(&adapter->hw);
2393
2394	/* Are we a VF device? */
2395	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2396	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2397		adapter->vf_ifp = 1;
2398	else
2399		adapter->vf_ifp = 0;
2400}
2401
2402static int
2403igb_allocate_pci_resources(struct adapter *adapter)
2404{
2405	device_t	dev = adapter->dev;
2406	int		rid;
2407
2408	rid = PCIR_BAR(0);
2409	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2410	    &rid, RF_ACTIVE);
2411	if (adapter->pci_mem == NULL) {
2412		device_printf(dev, "Unable to allocate bus resource: memory\n");
2413		return (ENXIO);
2414	}
2415	adapter->osdep.mem_bus_space_tag =
2416	    rman_get_bustag(adapter->pci_mem);
2417	adapter->osdep.mem_bus_space_handle =
2418	    rman_get_bushandle(adapter->pci_mem);
2419	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2420
2421	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2422
2423	/* This will setup either MSI/X or MSI */
2424	adapter->msix = igb_setup_msix(adapter);
2425	adapter->hw.back = &adapter->osdep;
2426
2427	return (0);
2428}
2429
2430/*********************************************************************
2431 *
2432 *  Setup the Legacy or MSI Interrupt handler
2433 *
2434 **********************************************************************/
2435static int
2436igb_allocate_legacy(struct adapter *adapter)
2437{
2438	device_t		dev = adapter->dev;
2439	struct igb_queue	*que = adapter->queues;
2440	struct tx_ring		*txr = adapter->tx_rings;
2441	int			error, rid = 0;
2442
2443	/* Turn off all interrupts */
2444	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2445
2446	/* MSI RID is 1 */
2447	if (adapter->msix == 1)
2448		rid = 1;
2449
2450	/* We allocate a single interrupt resource */
2451	adapter->res = bus_alloc_resource_any(dev,
2452	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2453	if (adapter->res == NULL) {
2454		device_printf(dev, "Unable to allocate bus resource: "
2455		    "interrupt\n");
2456		return (ENXIO);
2457	}
2458
2459#if __FreeBSD_version >= 800000
2460	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2461#endif
2462
2463	/*
2464	 * Try allocating a fast interrupt and the associated deferred
2465	 * processing contexts.
2466	 */
2467	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2468	/* Make tasklet for deferred link handling */
2469	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2470	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2471	    taskqueue_thread_enqueue, &que->tq);
2472	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2473	    device_get_nameunit(adapter->dev));
2474	if ((error = bus_setup_intr(dev, adapter->res,
2475	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2476	    adapter, &adapter->tag)) != 0) {
2477		device_printf(dev, "Failed to register fast interrupt "
2478			    "handler: %d\n", error);
2479		taskqueue_free(que->tq);
2480		que->tq = NULL;
2481		return (error);
2482	}
2483
2484	return (0);
2485}
2486
2487
2488/*********************************************************************
2489 *
2490 *  Setup the MSIX Queue Interrupt handlers:
2491 *
2492 **********************************************************************/
2493static int
2494igb_allocate_msix(struct adapter *adapter)
2495{
2496	device_t		dev = adapter->dev;
2497	struct igb_queue	*que = adapter->queues;
2498	int			error, rid, vector = 0;
2499
2500	/* Be sure to start with all interrupts disabled */
2501	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2502	E1000_WRITE_FLUSH(&adapter->hw);
2503
2504	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2505		rid = vector +1;
2506		que->res = bus_alloc_resource_any(dev,
2507		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2508		if (que->res == NULL) {
2509			device_printf(dev,
2510			    "Unable to allocate bus resource: "
2511			    "MSIX Queue Interrupt\n");
2512			return (ENXIO);
2513		}
2514		error = bus_setup_intr(dev, que->res,
2515	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2516		    igb_msix_que, que, &que->tag);
2517		if (error) {
2518			que->res = NULL;
2519			device_printf(dev, "Failed to register Queue handler");
2520			return (error);
2521		}
2522#if __FreeBSD_version >= 800504
2523		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2524#endif
2525		que->msix = vector;
2526		if (adapter->hw.mac.type == e1000_82575)
2527			que->eims = E1000_EICR_TX_QUEUE0 << i;
2528		else
2529			que->eims = 1 << vector;
2530		/*
2531		** Bind the msix vector, and thus the
2532		** rings to the corresponding cpu.
2533		*/
2534		if (adapter->num_queues > 1) {
2535			if (igb_last_bind_cpu < 0)
2536				igb_last_bind_cpu = CPU_FIRST();
2537			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2538			device_printf(dev,
2539				"Bound queue %d to cpu %d\n",
2540				i,igb_last_bind_cpu);
2541			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2542		}
2543#if __FreeBSD_version >= 800000
2544		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2545		    que->txr);
2546#endif
2547		/* Make tasklet for deferred handling */
2548		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2549		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2550		    taskqueue_thread_enqueue, &que->tq);
2551		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2552		    device_get_nameunit(adapter->dev));
2553	}
2554
2555	/* And Link */
2556	rid = vector + 1;
2557	adapter->res = bus_alloc_resource_any(dev,
2558	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2559	if (adapter->res == NULL) {
2560		device_printf(dev,
2561		    "Unable to allocate bus resource: "
2562		    "MSIX Link Interrupt\n");
2563		return (ENXIO);
2564	}
2565	if ((error = bus_setup_intr(dev, adapter->res,
2566	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2567	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2568		device_printf(dev, "Failed to register Link handler");
2569		return (error);
2570	}
2571#if __FreeBSD_version >= 800504
2572	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2573#endif
2574	adapter->linkvec = vector;
2575
2576	return (0);
2577}
2578
2579
2580static void
2581igb_configure_queues(struct adapter *adapter)
2582{
2583	struct	e1000_hw	*hw = &adapter->hw;
2584	struct	igb_queue	*que;
2585	u32			tmp, ivar = 0, newitr = 0;
2586
2587	/* First turn on RSS capability */
2588	if (adapter->hw.mac.type != e1000_82575)
2589		E1000_WRITE_REG(hw, E1000_GPIE,
2590		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2591		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2592
2593	/* Turn on MSIX */
2594	switch (adapter->hw.mac.type) {
2595	case e1000_82580:
2596	case e1000_i350:
2597	case e1000_i210:
2598	case e1000_i211:
2599	case e1000_vfadapt:
2600	case e1000_vfadapt_i350:
2601		/* RX entries */
2602		for (int i = 0; i < adapter->num_queues; i++) {
2603			u32 index = i >> 1;
2604			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2605			que = &adapter->queues[i];
2606			if (i & 1) {
2607				ivar &= 0xFF00FFFF;
2608				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2609			} else {
2610				ivar &= 0xFFFFFF00;
2611				ivar |= que->msix | E1000_IVAR_VALID;
2612			}
2613			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2614		}
2615		/* TX entries */
2616		for (int i = 0; i < adapter->num_queues; i++) {
2617			u32 index = i >> 1;
2618			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2619			que = &adapter->queues[i];
2620			if (i & 1) {
2621				ivar &= 0x00FFFFFF;
2622				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2623			} else {
2624				ivar &= 0xFFFF00FF;
2625				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2626			}
2627			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2628			adapter->que_mask |= que->eims;
2629		}
2630
2631		/* And for the link interrupt */
2632		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2633		adapter->link_mask = 1 << adapter->linkvec;
2634		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2635		break;
2636	case e1000_82576:
2637		/* RX entries */
2638		for (int i = 0; i < adapter->num_queues; i++) {
2639			u32 index = i & 0x7; /* Each IVAR has two entries */
2640			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2641			que = &adapter->queues[i];
2642			if (i < 8) {
2643				ivar &= 0xFFFFFF00;
2644				ivar |= que->msix | E1000_IVAR_VALID;
2645			} else {
2646				ivar &= 0xFF00FFFF;
2647				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648			}
2649			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2650			adapter->que_mask |= que->eims;
2651		}
2652		/* TX entries */
2653		for (int i = 0; i < adapter->num_queues; i++) {
2654			u32 index = i & 0x7; /* Each IVAR has two entries */
2655			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2656			que = &adapter->queues[i];
2657			if (i < 8) {
2658				ivar &= 0xFFFF00FF;
2659				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2660			} else {
2661				ivar &= 0x00FFFFFF;
2662				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2663			}
2664			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2665			adapter->que_mask |= que->eims;
2666		}
2667
2668		/* And for the link interrupt */
2669		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2670		adapter->link_mask = 1 << adapter->linkvec;
2671		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2672		break;
2673
2674	case e1000_82575:
2675                /* enable MSI-X support*/
2676		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2677                tmp |= E1000_CTRL_EXT_PBA_CLR;
2678                /* Auto-Mask interrupts upon ICR read. */
2679                tmp |= E1000_CTRL_EXT_EIAME;
2680                tmp |= E1000_CTRL_EXT_IRCA;
2681                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2682
2683		/* Queues */
2684		for (int i = 0; i < adapter->num_queues; i++) {
2685			que = &adapter->queues[i];
2686			tmp = E1000_EICR_RX_QUEUE0 << i;
2687			tmp |= E1000_EICR_TX_QUEUE0 << i;
2688			que->eims = tmp;
2689			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2690			    i, que->eims);
2691			adapter->que_mask |= que->eims;
2692		}
2693
2694		/* Link */
2695		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2696		    E1000_EIMS_OTHER);
2697		adapter->link_mask |= E1000_EIMS_OTHER;
2698	default:
2699		break;
2700	}
2701
2702	/* Set the starting interrupt rate */
2703	if (igb_max_interrupt_rate > 0)
2704		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2705
2706        if (hw->mac.type == e1000_82575)
2707                newitr |= newitr << 16;
2708        else
2709                newitr |= E1000_EITR_CNT_IGNR;
2710
2711	for (int i = 0; i < adapter->num_queues; i++) {
2712		que = &adapter->queues[i];
2713		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2714	}
2715
2716	return;
2717}
2718
2719
2720static void
2721igb_free_pci_resources(struct adapter *adapter)
2722{
2723	struct		igb_queue *que = adapter->queues;
2724	device_t	dev = adapter->dev;
2725	int		rid;
2726
2727	/*
2728	** There is a slight possibility of a failure mode
2729	** in attach that will result in entering this function
2730	** before interrupt resources have been initialized, and
2731	** in that case we do not want to execute the loops below
2732	** We can detect this reliably by the state of the adapter
2733	** res pointer.
2734	*/
2735	if (adapter->res == NULL)
2736		goto mem;
2737
2738	/*
2739	 * First release all the interrupt resources:
2740	 */
2741	for (int i = 0; i < adapter->num_queues; i++, que++) {
2742		rid = que->msix + 1;
2743		if (que->tag != NULL) {
2744			bus_teardown_intr(dev, que->res, que->tag);
2745			que->tag = NULL;
2746		}
2747		if (que->res != NULL)
2748			bus_release_resource(dev,
2749			    SYS_RES_IRQ, rid, que->res);
2750	}
2751
2752	/* Clean the Legacy or Link interrupt last */
2753	if (adapter->linkvec) /* we are doing MSIX */
2754		rid = adapter->linkvec + 1;
2755	else
2756		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2757
2758	que = adapter->queues;
2759	if (adapter->tag != NULL) {
2760		taskqueue_drain(que->tq, &adapter->link_task);
2761		bus_teardown_intr(dev, adapter->res, adapter->tag);
2762		adapter->tag = NULL;
2763	}
2764	if (adapter->res != NULL)
2765		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2766
2767	for (int i = 0; i < adapter->num_queues; i++, que++) {
2768		if (que->tq != NULL) {
2769#if __FreeBSD_version >= 800000
2770			taskqueue_drain(que->tq, &que->txr->txq_task);
2771#endif
2772			taskqueue_drain(que->tq, &que->que_task);
2773			taskqueue_free(que->tq);
2774		}
2775	}
2776mem:
2777	if (adapter->msix)
2778		pci_release_msi(dev);
2779
2780	if (adapter->msix_mem != NULL)
2781		bus_release_resource(dev, SYS_RES_MEMORY,
2782		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2783
2784	if (adapter->pci_mem != NULL)
2785		bus_release_resource(dev, SYS_RES_MEMORY,
2786		    PCIR_BAR(0), adapter->pci_mem);
2787
2788}
2789
2790/*
2791 * Setup Either MSI/X or MSI
2792 */
2793static int
2794igb_setup_msix(struct adapter *adapter)
2795{
2796	device_t dev = adapter->dev;
2797	int rid, want, queues, msgs, maxqueues;
2798
2799	/* tuneable override */
2800	if (igb_enable_msix == 0)
2801		goto msi;
2802
2803	/* First try MSI/X */
2804	rid = PCIR_BAR(IGB_MSIX_BAR);
2805	adapter->msix_mem = bus_alloc_resource_any(dev,
2806	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2807       	if (!adapter->msix_mem) {
2808		/* May not be enabled */
2809		device_printf(adapter->dev,
2810		    "Unable to map MSIX table \n");
2811		goto msi;
2812	}
2813
2814	msgs = pci_msix_count(dev);
2815	if (msgs == 0) { /* system has msix disabled */
2816		bus_release_resource(dev, SYS_RES_MEMORY,
2817		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2818		adapter->msix_mem = NULL;
2819		goto msi;
2820	}
2821
2822	/* Figure out a reasonable auto config value */
2823	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2824
2825	/* Manual override */
2826	if (igb_num_queues != 0)
2827		queues = igb_num_queues;
2828
2829	/* Sanity check based on HW */
2830	switch (adapter->hw.mac.type) {
2831		case e1000_82575:
2832			maxqueues = 4;
2833			break;
2834		case e1000_82576:
2835		case e1000_82580:
2836		case e1000_i350:
2837			maxqueues = 8;
2838			break;
2839		case e1000_i210:
2840			maxqueues = 4;
2841			break;
2842		case e1000_i211:
2843			maxqueues = 2;
2844			break;
2845		default:  /* VF interfaces */
2846			maxqueues = 1;
2847			break;
2848	}
2849	if (queues > maxqueues)
2850		queues = maxqueues;
2851
2852	/*
2853	** One vector (RX/TX pair) per queue
2854	** plus an additional for Link interrupt
2855	*/
2856	want = queues + 1;
2857	if (msgs >= want)
2858		msgs = want;
2859	else {
2860               	device_printf(adapter->dev,
2861		    "MSIX Configuration Problem, "
2862		    "%d vectors configured, but %d queues wanted!\n",
2863		    msgs, want);
2864		return (0);
2865	}
2866	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2867               	device_printf(adapter->dev,
2868		    "Using MSIX interrupts with %d vectors\n", msgs);
2869		adapter->num_queues = queues;
2870		return (msgs);
2871	}
2872msi:
2873       	msgs = pci_msi_count(dev);
2874	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2875		device_printf(adapter->dev," Using MSI interrupt\n");
2876		return (msgs);
2877	}
2878	return (0);
2879}
2880
2881/*********************************************************************
2882 *
2883 *  Set up an fresh starting state
2884 *
2885 **********************************************************************/
2886static void
2887igb_reset(struct adapter *adapter)
2888{
2889	device_t	dev = adapter->dev;
2890	struct e1000_hw *hw = &adapter->hw;
2891	struct e1000_fc_info *fc = &hw->fc;
2892	struct ifnet	*ifp = adapter->ifp;
2893	u32		pba = 0;
2894	u16		hwm;
2895
2896	INIT_DEBUGOUT("igb_reset: begin");
2897
2898	/* Let the firmware know the OS is in control */
2899	igb_get_hw_control(adapter);
2900
2901	/*
2902	 * Packet Buffer Allocation (PBA)
2903	 * Writing PBA sets the receive portion of the buffer
2904	 * the remainder is used for the transmit buffer.
2905	 */
2906	switch (hw->mac.type) {
2907	case e1000_82575:
2908		pba = E1000_PBA_32K;
2909		break;
2910	case e1000_82576:
2911	case e1000_vfadapt:
2912		pba = E1000_READ_REG(hw, E1000_RXPBS);
2913		pba &= E1000_RXPBS_SIZE_MASK_82576;
2914		break;
2915	case e1000_82580:
2916	case e1000_i350:
2917	case e1000_vfadapt_i350:
2918		pba = E1000_READ_REG(hw, E1000_RXPBS);
2919		pba = e1000_rxpbs_adjust_82580(pba);
2920		break;
2921	case e1000_i210:
2922	case e1000_i211:
2923		pba = E1000_PBA_34K;
2924	default:
2925		break;
2926	}
2927
2928	/* Special needs in case of Jumbo frames */
2929	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2930		u32 tx_space, min_tx, min_rx;
2931		pba = E1000_READ_REG(hw, E1000_PBA);
2932		tx_space = pba >> 16;
2933		pba &= 0xffff;
2934		min_tx = (adapter->max_frame_size +
2935		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2936		min_tx = roundup2(min_tx, 1024);
2937		min_tx >>= 10;
2938                min_rx = adapter->max_frame_size;
2939                min_rx = roundup2(min_rx, 1024);
2940                min_rx >>= 10;
2941		if (tx_space < min_tx &&
2942		    ((min_tx - tx_space) < pba)) {
2943			pba = pba - (min_tx - tx_space);
2944			/*
2945                         * if short on rx space, rx wins
2946                         * and must trump tx adjustment
2947			 */
2948                        if (pba < min_rx)
2949                                pba = min_rx;
2950		}
2951		E1000_WRITE_REG(hw, E1000_PBA, pba);
2952	}
2953
2954	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2955
2956	/*
2957	 * These parameters control the automatic generation (Tx) and
2958	 * response (Rx) to Ethernet PAUSE frames.
2959	 * - High water mark should allow for at least two frames to be
2960	 *   received after sending an XOFF.
2961	 * - Low water mark works best when it is very near the high water mark.
2962	 *   This allows the receiver to restart by sending XON when it has
2963	 *   drained a bit.
2964	 */
2965	hwm = min(((pba << 10) * 9 / 10),
2966	    ((pba << 10) - 2 * adapter->max_frame_size));
2967
2968	if (hw->mac.type < e1000_82576) {
2969		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2970		fc->low_water = fc->high_water - 8;
2971	} else {
2972		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2973		fc->low_water = fc->high_water - 16;
2974	}
2975
2976	fc->pause_time = IGB_FC_PAUSE_TIME;
2977	fc->send_xon = TRUE;
2978	if (adapter->fc)
2979		fc->requested_mode = adapter->fc;
2980	else
2981		fc->requested_mode = e1000_fc_default;
2982
2983	/* Issue a global reset */
2984	e1000_reset_hw(hw);
2985	E1000_WRITE_REG(hw, E1000_WUC, 0);
2986
2987	if (e1000_init_hw(hw) < 0)
2988		device_printf(dev, "Hardware Initialization Failed\n");
2989
2990	/* Setup DMA Coalescing */
2991	if ((hw->mac.type > e1000_82580) &&
2992	    (hw->mac.type != e1000_i211)) {
2993		u32 dmac;
2994		u32 reg = ~E1000_DMACR_DMAC_EN;
2995
2996		if (adapter->dmac == 0) { /* Disabling it */
2997			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2998			goto reset_out;
2999		}
3000
3001		/* Set starting thresholds */
3002		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3003		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3004
3005		hwm = 64 * pba - adapter->max_frame_size / 16;
3006		if (hwm < 64 * (pba - 6))
3007			hwm = 64 * (pba - 6);
3008		reg = E1000_READ_REG(hw, E1000_FCRTC);
3009		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3010		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3011		    & E1000_FCRTC_RTH_COAL_MASK);
3012		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3013
3014
3015		dmac = pba - adapter->max_frame_size / 512;
3016		if (dmac < pba - 10)
3017			dmac = pba - 10;
3018		reg = E1000_READ_REG(hw, E1000_DMACR);
3019		reg &= ~E1000_DMACR_DMACTHR_MASK;
3020		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3021		    & E1000_DMACR_DMACTHR_MASK);
3022		/* transition to L0x or L1 if available..*/
3023		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3024		/* timer = value in adapter->dmac in 32usec intervals */
3025		reg |= (adapter->dmac >> 5);
3026		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3027
3028		/* Set the interval before transition */
3029		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3030		reg |= 0x80000004;
3031		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3032
3033		/* free space in tx packet buffer to wake from DMA coal */
3034		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3035		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3036
3037		/* make low power state decision controlled by DMA coal */
3038		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3039		reg &= ~E1000_PCIEMISC_LX_DECISION;
3040		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3041		device_printf(dev, "DMA Coalescing enabled\n");
3042
3043	} else if (hw->mac.type == e1000_82580) {
3044		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3045		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3046		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3047		    reg & ~E1000_PCIEMISC_LX_DECISION);
3048	}
3049
3050reset_out:
3051	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3052	e1000_get_phy_info(hw);
3053	e1000_check_for_link(hw);
3054	return;
3055}
3056
3057/*********************************************************************
3058 *
3059 *  Setup networking device structure and register an interface.
3060 *
3061 **********************************************************************/
3062static int
3063igb_setup_interface(device_t dev, struct adapter *adapter)
3064{
3065	struct ifnet   *ifp;
3066
3067	INIT_DEBUGOUT("igb_setup_interface: begin");
3068
3069	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3070	if (ifp == NULL) {
3071		device_printf(dev, "can not allocate ifnet structure\n");
3072		return (-1);
3073	}
3074	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3075	ifp->if_init =  igb_init;
3076	ifp->if_softc = adapter;
3077	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3078	ifp->if_ioctl = igb_ioctl;
3079#if __FreeBSD_version >= 800000
3080	ifp->if_transmit = igb_mq_start;
3081	ifp->if_qflush = igb_qflush;
3082#else
3083	ifp->if_start = igb_start;
3084	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3085	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3086	IFQ_SET_READY(&ifp->if_snd);
3087#endif
3088
3089	ether_ifattach(ifp, adapter->hw.mac.addr);
3090
3091	ifp->if_capabilities = ifp->if_capenable = 0;
3092
3093	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3094	ifp->if_capabilities |= IFCAP_TSO4;
3095	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3096	ifp->if_capenable = ifp->if_capabilities;
3097
3098	/* Don't enable LRO by default */
3099	ifp->if_capabilities |= IFCAP_LRO;
3100
3101#ifdef DEVICE_POLLING
3102	ifp->if_capabilities |= IFCAP_POLLING;
3103#endif
3104
3105	/*
3106	 * Tell the upper layer(s) we
3107	 * support full VLAN capability.
3108	 */
3109	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3110	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3111			     |  IFCAP_VLAN_HWTSO
3112			     |  IFCAP_VLAN_MTU;
3113	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3114			  |  IFCAP_VLAN_HWTSO
3115			  |  IFCAP_VLAN_MTU;
3116
3117	/*
3118	** Don't turn this on by default, if vlans are
3119	** created on another pseudo device (eg. lagg)
3120	** then vlan events are not passed thru, breaking
3121	** operation, but with HW FILTER off it works. If
3122	** using vlans directly on the igb driver you can
3123	** enable this and get full hardware tag filtering.
3124	*/
3125	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3126
3127	/*
3128	 * Specify the media types supported by this adapter and register
3129	 * callbacks to update media and link information
3130	 */
3131	ifmedia_init(&adapter->media, IFM_IMASK,
3132	    igb_media_change, igb_media_status);
3133	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3134	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3135		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3136			    0, NULL);
3137		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3138	} else {
3139		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3140		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3141			    0, NULL);
3142		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3143			    0, NULL);
3144		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3145			    0, NULL);
3146		if (adapter->hw.phy.type != e1000_phy_ife) {
3147			ifmedia_add(&adapter->media,
3148				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3149			ifmedia_add(&adapter->media,
3150				IFM_ETHER | IFM_1000_T, 0, NULL);
3151		}
3152	}
3153	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3154	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3155	return (0);
3156}
3157
3158
3159/*
3160 * Manage DMA'able memory.
3161 */
3162static void
3163igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3164{
3165	if (error)
3166		return;
3167	*(bus_addr_t *) arg = segs[0].ds_addr;
3168}
3169
3170static int
3171igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3172        struct igb_dma_alloc *dma, int mapflags)
3173{
3174	int error;
3175
3176	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3177				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3178				BUS_SPACE_MAXADDR,	/* lowaddr */
3179				BUS_SPACE_MAXADDR,	/* highaddr */
3180				NULL, NULL,		/* filter, filterarg */
3181				size,			/* maxsize */
3182				1,			/* nsegments */
3183				size,			/* maxsegsize */
3184				0,			/* flags */
3185				NULL,			/* lockfunc */
3186				NULL,			/* lockarg */
3187				&dma->dma_tag);
3188	if (error) {
3189		device_printf(adapter->dev,
3190		    "%s: bus_dma_tag_create failed: %d\n",
3191		    __func__, error);
3192		goto fail_0;
3193	}
3194
3195	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3196	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3197	if (error) {
3198		device_printf(adapter->dev,
3199		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3200		    __func__, (uintmax_t)size, error);
3201		goto fail_2;
3202	}
3203
3204	dma->dma_paddr = 0;
3205	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3206	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3207	if (error || dma->dma_paddr == 0) {
3208		device_printf(adapter->dev,
3209		    "%s: bus_dmamap_load failed: %d\n",
3210		    __func__, error);
3211		goto fail_3;
3212	}
3213
3214	return (0);
3215
3216fail_3:
3217	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3218fail_2:
3219	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3220	bus_dma_tag_destroy(dma->dma_tag);
3221fail_0:
3222	dma->dma_map = NULL;
3223	dma->dma_tag = NULL;
3224
3225	return (error);
3226}
3227
3228static void
3229igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3230{
3231	if (dma->dma_tag == NULL)
3232		return;
3233	if (dma->dma_map != NULL) {
3234		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3235		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3236		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3237		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3238		dma->dma_map = NULL;
3239	}
3240	bus_dma_tag_destroy(dma->dma_tag);
3241	dma->dma_tag = NULL;
3242}
3243
3244
3245/*********************************************************************
3246 *
3247 *  Allocate memory for the transmit and receive rings, and then
3248 *  the descriptors associated with each, called only once at attach.
3249 *
3250 **********************************************************************/
3251static int
3252igb_allocate_queues(struct adapter *adapter)
3253{
3254	device_t dev = adapter->dev;
3255	struct igb_queue	*que = NULL;
3256	struct tx_ring		*txr = NULL;
3257	struct rx_ring		*rxr = NULL;
3258	int rsize, tsize, error = E1000_SUCCESS;
3259	int txconf = 0, rxconf = 0;
3260
3261	/* First allocate the top level queue structs */
3262	if (!(adapter->queues =
3263	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3264	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3265		device_printf(dev, "Unable to allocate queue memory\n");
3266		error = ENOMEM;
3267		goto fail;
3268	}
3269
3270	/* Next allocate the TX ring struct memory */
3271	if (!(adapter->tx_rings =
3272	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3273	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274		device_printf(dev, "Unable to allocate TX ring memory\n");
3275		error = ENOMEM;
3276		goto tx_fail;
3277	}
3278
3279	/* Now allocate the RX */
3280	if (!(adapter->rx_rings =
3281	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3282	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3283		device_printf(dev, "Unable to allocate RX ring memory\n");
3284		error = ENOMEM;
3285		goto rx_fail;
3286	}
3287
3288	tsize = roundup2(adapter->num_tx_desc *
3289	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3290	/*
3291	 * Now set up the TX queues, txconf is needed to handle the
3292	 * possibility that things fail midcourse and we need to
3293	 * undo memory gracefully
3294	 */
3295	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3296		/* Set up some basics */
3297		txr = &adapter->tx_rings[i];
3298		txr->adapter = adapter;
3299		txr->me = i;
3300
3301		/* Initialize the TX lock */
3302		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3303		    device_get_nameunit(dev), txr->me);
3304		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3305
3306		if (igb_dma_malloc(adapter, tsize,
3307			&txr->txdma, BUS_DMA_NOWAIT)) {
3308			device_printf(dev,
3309			    "Unable to allocate TX Descriptor memory\n");
3310			error = ENOMEM;
3311			goto err_tx_desc;
3312		}
3313		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3314		bzero((void *)txr->tx_base, tsize);
3315
3316        	/* Now allocate transmit buffers for the ring */
3317        	if (igb_allocate_transmit_buffers(txr)) {
3318			device_printf(dev,
3319			    "Critical Failure setting up transmit buffers\n");
3320			error = ENOMEM;
3321			goto err_tx_desc;
3322        	}
3323#if __FreeBSD_version >= 800000
3324		/* Allocate a buf ring */
3325		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3326		    M_WAITOK, &txr->tx_mtx);
3327#endif
3328	}
3329
3330	/*
3331	 * Next the RX queues...
3332	 */
3333	rsize = roundup2(adapter->num_rx_desc *
3334	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3335	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3336		rxr = &adapter->rx_rings[i];
3337		rxr->adapter = adapter;
3338		rxr->me = i;
3339
3340		/* Initialize the RX lock */
3341		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3342		    device_get_nameunit(dev), txr->me);
3343		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3344
3345		if (igb_dma_malloc(adapter, rsize,
3346			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3347			device_printf(dev,
3348			    "Unable to allocate RxDescriptor memory\n");
3349			error = ENOMEM;
3350			goto err_rx_desc;
3351		}
3352		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3353		bzero((void *)rxr->rx_base, rsize);
3354
3355        	/* Allocate receive buffers for the ring*/
3356		if (igb_allocate_receive_buffers(rxr)) {
3357			device_printf(dev,
3358			    "Critical Failure setting up receive buffers\n");
3359			error = ENOMEM;
3360			goto err_rx_desc;
3361		}
3362	}
3363
3364	/*
3365	** Finally set up the queue holding structs
3366	*/
3367	for (int i = 0; i < adapter->num_queues; i++) {
3368		que = &adapter->queues[i];
3369		que->adapter = adapter;
3370		que->txr = &adapter->tx_rings[i];
3371		que->rxr = &adapter->rx_rings[i];
3372	}
3373
3374	return (0);
3375
3376err_rx_desc:
3377	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3378		igb_dma_free(adapter, &rxr->rxdma);
3379err_tx_desc:
3380	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3381		igb_dma_free(adapter, &txr->txdma);
3382	free(adapter->rx_rings, M_DEVBUF);
3383rx_fail:
3384#if __FreeBSD_version >= 800000
3385	buf_ring_free(txr->br, M_DEVBUF);
3386#endif
3387	free(adapter->tx_rings, M_DEVBUF);
3388tx_fail:
3389	free(adapter->queues, M_DEVBUF);
3390fail:
3391	return (error);
3392}
3393
3394/*********************************************************************
3395 *
3396 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3397 *  the information needed to transmit a packet on the wire. This is
3398 *  called only once at attach, setup is done every reset.
3399 *
3400 **********************************************************************/
3401static int
3402igb_allocate_transmit_buffers(struct tx_ring *txr)
3403{
3404	struct adapter *adapter = txr->adapter;
3405	device_t dev = adapter->dev;
3406	struct igb_tx_buffer *txbuf;
3407	int error, i;
3408
3409	/*
3410	 * Setup DMA descriptor areas.
3411	 */
3412	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3413			       1, 0,			/* alignment, bounds */
3414			       BUS_SPACE_MAXADDR,	/* lowaddr */
3415			       BUS_SPACE_MAXADDR,	/* highaddr */
3416			       NULL, NULL,		/* filter, filterarg */
3417			       IGB_TSO_SIZE,		/* maxsize */
3418			       IGB_MAX_SCATTER,		/* nsegments */
3419			       PAGE_SIZE,		/* maxsegsize */
3420			       0,			/* flags */
3421			       NULL,			/* lockfunc */
3422			       NULL,			/* lockfuncarg */
3423			       &txr->txtag))) {
3424		device_printf(dev,"Unable to allocate TX DMA tag\n");
3425		goto fail;
3426	}
3427
3428	if (!(txr->tx_buffers =
3429	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3430	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3431		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3432		error = ENOMEM;
3433		goto fail;
3434	}
3435
3436        /* Create the descriptor buffer dma maps */
3437	txbuf = txr->tx_buffers;
3438	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3439		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3440		if (error != 0) {
3441			device_printf(dev, "Unable to create TX DMA map\n");
3442			goto fail;
3443		}
3444	}
3445
3446	return 0;
3447fail:
3448	/* We free all, it handles case where we are in the middle */
3449	igb_free_transmit_structures(adapter);
3450	return (error);
3451}
3452
3453/*********************************************************************
3454 *
3455 *  Initialize a transmit ring.
3456 *
3457 **********************************************************************/
3458static void
3459igb_setup_transmit_ring(struct tx_ring *txr)
3460{
3461	struct adapter *adapter = txr->adapter;
3462	struct igb_tx_buffer *txbuf;
3463	int i;
3464#ifdef DEV_NETMAP
3465	struct netmap_adapter *na = NA(adapter->ifp);
3466	struct netmap_slot *slot;
3467#endif /* DEV_NETMAP */
3468
3469	/* Clear the old descriptor contents */
3470	IGB_TX_LOCK(txr);
3471#ifdef DEV_NETMAP
3472	slot = netmap_reset(na, NR_TX, txr->me, 0);
3473#endif /* DEV_NETMAP */
3474	bzero((void *)txr->tx_base,
3475	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3476	/* Reset indices */
3477	txr->next_avail_desc = 0;
3478	txr->next_to_clean = 0;
3479
3480	/* Free any existing tx buffers. */
3481        txbuf = txr->tx_buffers;
3482	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3483		if (txbuf->m_head != NULL) {
3484			bus_dmamap_sync(txr->txtag, txbuf->map,
3485			    BUS_DMASYNC_POSTWRITE);
3486			bus_dmamap_unload(txr->txtag, txbuf->map);
3487			m_freem(txbuf->m_head);
3488			txbuf->m_head = NULL;
3489		}
3490#ifdef DEV_NETMAP
3491		if (slot) {
3492			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3493			/* no need to set the address */
3494			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3495		}
3496#endif /* DEV_NETMAP */
3497		/* clear the watch index */
3498		txbuf->next_eop = -1;
3499        }
3500
3501	/* Set number of descriptors available */
3502	txr->tx_avail = adapter->num_tx_desc;
3503
3504	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3505	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3506	IGB_TX_UNLOCK(txr);
3507}
3508
3509/*********************************************************************
3510 *
3511 *  Initialize all transmit rings.
3512 *
3513 **********************************************************************/
3514static void
3515igb_setup_transmit_structures(struct adapter *adapter)
3516{
3517	struct tx_ring *txr = adapter->tx_rings;
3518
3519	for (int i = 0; i < adapter->num_queues; i++, txr++)
3520		igb_setup_transmit_ring(txr);
3521
3522	return;
3523}
3524
3525/*********************************************************************
3526 *
3527 *  Enable transmit unit.
3528 *
3529 **********************************************************************/
3530static void
3531igb_initialize_transmit_units(struct adapter *adapter)
3532{
3533	struct tx_ring	*txr = adapter->tx_rings;
3534	struct e1000_hw *hw = &adapter->hw;
3535	u32		tctl, txdctl;
3536
3537	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3538	tctl = txdctl = 0;
3539
3540	/* Setup the Tx Descriptor Rings */
3541	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3542		u64 bus_addr = txr->txdma.dma_paddr;
3543
3544		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3545		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3546		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3547		    (uint32_t)(bus_addr >> 32));
3548		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3549		    (uint32_t)bus_addr);
3550
3551		/* Setup the HW Tx Head and Tail descriptor pointers */
3552		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3553		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3554
3555		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3556		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3557		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3558
3559		txr->queue_status = IGB_QUEUE_IDLE;
3560
3561		txdctl |= IGB_TX_PTHRESH;
3562		txdctl |= IGB_TX_HTHRESH << 8;
3563		txdctl |= IGB_TX_WTHRESH << 16;
3564		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3565		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3566	}
3567
3568	if (adapter->vf_ifp)
3569		return;
3570
3571	e1000_config_collision_dist(hw);
3572
3573	/* Program the Transmit Control Register */
3574	tctl = E1000_READ_REG(hw, E1000_TCTL);
3575	tctl &= ~E1000_TCTL_CT;
3576	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3577		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3578
3579	/* This write will effectively turn on the transmit unit. */
3580	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3581}
3582
3583/*********************************************************************
3584 *
3585 *  Free all transmit rings.
3586 *
3587 **********************************************************************/
3588static void
3589igb_free_transmit_structures(struct adapter *adapter)
3590{
3591	struct tx_ring *txr = adapter->tx_rings;
3592
3593	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3594		IGB_TX_LOCK(txr);
3595		igb_free_transmit_buffers(txr);
3596		igb_dma_free(adapter, &txr->txdma);
3597		IGB_TX_UNLOCK(txr);
3598		IGB_TX_LOCK_DESTROY(txr);
3599	}
3600	free(adapter->tx_rings, M_DEVBUF);
3601}
3602
3603/*********************************************************************
3604 *
3605 *  Free transmit ring related data structures.
3606 *
3607 **********************************************************************/
3608static void
3609igb_free_transmit_buffers(struct tx_ring *txr)
3610{
3611	struct adapter *adapter = txr->adapter;
3612	struct igb_tx_buffer *tx_buffer;
3613	int             i;
3614
3615	INIT_DEBUGOUT("free_transmit_ring: begin");
3616
3617	if (txr->tx_buffers == NULL)
3618		return;
3619
3620	tx_buffer = txr->tx_buffers;
3621	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3622		if (tx_buffer->m_head != NULL) {
3623			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3624			    BUS_DMASYNC_POSTWRITE);
3625			bus_dmamap_unload(txr->txtag,
3626			    tx_buffer->map);
3627			m_freem(tx_buffer->m_head);
3628			tx_buffer->m_head = NULL;
3629			if (tx_buffer->map != NULL) {
3630				bus_dmamap_destroy(txr->txtag,
3631				    tx_buffer->map);
3632				tx_buffer->map = NULL;
3633			}
3634		} else if (tx_buffer->map != NULL) {
3635			bus_dmamap_unload(txr->txtag,
3636			    tx_buffer->map);
3637			bus_dmamap_destroy(txr->txtag,
3638			    tx_buffer->map);
3639			tx_buffer->map = NULL;
3640		}
3641	}
3642#if __FreeBSD_version >= 800000
3643	if (txr->br != NULL)
3644		buf_ring_free(txr->br, M_DEVBUF);
3645#endif
3646	if (txr->tx_buffers != NULL) {
3647		free(txr->tx_buffers, M_DEVBUF);
3648		txr->tx_buffers = NULL;
3649	}
3650	if (txr->txtag != NULL) {
3651		bus_dma_tag_destroy(txr->txtag);
3652		txr->txtag = NULL;
3653	}
3654	return;
3655}
3656
3657/**********************************************************************
3658 *
3659 *  Setup work for hardware segmentation offload (TSO)
3660 *
3661 **********************************************************************/
3662static bool
3663igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3664	struct ip *ip, struct tcphdr *th)
3665{
3666	struct adapter *adapter = txr->adapter;
3667	struct e1000_adv_tx_context_desc *TXD;
3668	struct igb_tx_buffer        *tx_buffer;
3669	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3670	u32 mss_l4len_idx = 0;
3671	u16 vtag = 0;
3672	int ctxd, ip_hlen, tcp_hlen;
3673
3674	ctxd = txr->next_avail_desc;
3675	tx_buffer = &txr->tx_buffers[ctxd];
3676	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3677
3678	ip->ip_sum = 0;
3679	ip_hlen = ip->ip_hl << 2;
3680	tcp_hlen = th->th_off << 2;
3681
3682	/* VLAN MACLEN IPLEN */
3683	if (mp->m_flags & M_VLANTAG) {
3684		vtag = htole16(mp->m_pkthdr.ether_vtag);
3685		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3686	}
3687
3688	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3689	vlan_macip_lens |= ip_hlen;
3690	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3691
3692	/* ADV DTYPE TUCMD */
3693	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3694	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3695	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3696	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3697
3698	/* MSS L4LEN IDX */
3699	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3700	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3701	/* 82575 needs the queue index added */
3702	if (adapter->hw.mac.type == e1000_82575)
3703		mss_l4len_idx |= txr->me << 4;
3704	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3705
3706	TXD->seqnum_seed = htole32(0);
3707	tx_buffer->m_head = NULL;
3708	tx_buffer->next_eop = -1;
3709
3710	if (++ctxd == adapter->num_tx_desc)
3711		ctxd = 0;
3712
3713	txr->tx_avail--;
3714	txr->next_avail_desc = ctxd;
3715	return TRUE;
3716}
3717
3718
3719/*********************************************************************
3720 *
3721 *  Context Descriptor setup for VLAN or CSUM
3722 *
3723 **********************************************************************/
3724
3725static bool
3726igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3727{
3728	struct adapter *adapter = txr->adapter;
3729	struct e1000_adv_tx_context_desc *TXD;
3730	struct igb_tx_buffer        *tx_buffer;
3731	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3732	struct ether_vlan_header *eh;
3733	struct ip *ip = NULL;
3734	struct ip6_hdr *ip6;
3735	int  ehdrlen, ctxd, ip_hlen = 0;
3736	u16	etype, vtag = 0;
3737	u8	ipproto = 0;
3738	bool	offload = TRUE;
3739
3740	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3741		offload = FALSE;
3742
3743	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3744	ctxd = txr->next_avail_desc;
3745	tx_buffer = &txr->tx_buffers[ctxd];
3746	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3747
3748	/*
3749	** In advanced descriptors the vlan tag must
3750	** be placed into the context descriptor, thus
3751	** we need to be here just for that setup.
3752	*/
3753	if (mp->m_flags & M_VLANTAG) {
3754		vtag = htole16(mp->m_pkthdr.ether_vtag);
3755		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3756	} else if (offload == FALSE)
3757		return FALSE;
3758
3759	/*
3760	 * Determine where frame payload starts.
3761	 * Jump over vlan headers if already present,
3762	 * helpful for QinQ too.
3763	 */
3764	eh = mtod(mp, struct ether_vlan_header *);
3765	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3766		etype = ntohs(eh->evl_proto);
3767		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3768	} else {
3769		etype = ntohs(eh->evl_encap_proto);
3770		ehdrlen = ETHER_HDR_LEN;
3771	}
3772
3773	/* Set the ether header length */
3774	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3775
3776	switch (etype) {
3777		case ETHERTYPE_IP:
3778			ip = (struct ip *)(mp->m_data + ehdrlen);
3779			ip_hlen = ip->ip_hl << 2;
3780			if (mp->m_len < ehdrlen + ip_hlen) {
3781				offload = FALSE;
3782				break;
3783			}
3784			ipproto = ip->ip_p;
3785			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3786			break;
3787		case ETHERTYPE_IPV6:
3788			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3789			ip_hlen = sizeof(struct ip6_hdr);
3790			ipproto = ip6->ip6_nxt;
3791			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3792			break;
3793		default:
3794			offload = FALSE;
3795			break;
3796	}
3797
3798	vlan_macip_lens |= ip_hlen;
3799	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3800
3801	switch (ipproto) {
3802		case IPPROTO_TCP:
3803			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3804				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3805			break;
3806		case IPPROTO_UDP:
3807			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3808				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3809			break;
3810#if __FreeBSD_version >= 800000
3811		case IPPROTO_SCTP:
3812			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3813				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3814			break;
3815#endif
3816		default:
3817			offload = FALSE;
3818			break;
3819	}
3820
3821	/* 82575 needs the queue index added */
3822	if (adapter->hw.mac.type == e1000_82575)
3823		mss_l4len_idx = txr->me << 4;
3824
3825	/* Now copy bits into descriptor */
3826	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3827	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3828	TXD->seqnum_seed = htole32(0);
3829	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3830
3831	tx_buffer->m_head = NULL;
3832	tx_buffer->next_eop = -1;
3833
3834	/* We've consumed the first desc, adjust counters */
3835	if (++ctxd == adapter->num_tx_desc)
3836		ctxd = 0;
3837	txr->next_avail_desc = ctxd;
3838	--txr->tx_avail;
3839
3840        return (offload);
3841}
3842
3843
3844/**********************************************************************
3845 *
3846 *  Examine each tx_buffer in the used queue. If the hardware is done
3847 *  processing the packet then free associated resources. The
3848 *  tx_buffer is put back on the free queue.
3849 *
3850 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3851 **********************************************************************/
3852static bool
3853igb_txeof(struct tx_ring *txr)
3854{
3855	struct adapter	*adapter = txr->adapter;
3856        int first, last, done, processed;
3857        struct igb_tx_buffer *tx_buffer;
3858        struct e1000_tx_desc   *tx_desc, *eop_desc;
3859	struct ifnet   *ifp = adapter->ifp;
3860
3861	IGB_TX_LOCK_ASSERT(txr);
3862
3863#ifdef DEV_NETMAP
3864	if (ifp->if_capenable & IFCAP_NETMAP) {
3865		struct netmap_adapter *na = NA(ifp);
3866
3867		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3868		IGB_TX_UNLOCK(txr);
3869		IGB_CORE_LOCK(adapter);
3870		selwakeuppri(&na->tx_si, PI_NET);
3871		IGB_CORE_UNLOCK(adapter);
3872		IGB_TX_LOCK(txr);
3873		return FALSE;
3874	}
3875#endif /* DEV_NETMAP */
3876        if (txr->tx_avail == adapter->num_tx_desc) {
3877		txr->queue_status = IGB_QUEUE_IDLE;
3878                return FALSE;
3879	}
3880
3881	processed = 0;
3882        first = txr->next_to_clean;
3883        tx_desc = &txr->tx_base[first];
3884        tx_buffer = &txr->tx_buffers[first];
3885	last = tx_buffer->next_eop;
3886        eop_desc = &txr->tx_base[last];
3887
3888	/*
3889	 * What this does is get the index of the
3890	 * first descriptor AFTER the EOP of the
3891	 * first packet, that way we can do the
3892	 * simple comparison on the inner while loop.
3893	 */
3894	if (++last == adapter->num_tx_desc)
3895 		last = 0;
3896	done = last;
3897
3898        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3899            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3900
3901        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3902		/* We clean the range of the packet */
3903		while (first != done) {
3904                	tx_desc->upper.data = 0;
3905                	tx_desc->lower.data = 0;
3906                	tx_desc->buffer_addr = 0;
3907                	++txr->tx_avail;
3908			++processed;
3909
3910			if (tx_buffer->m_head) {
3911				txr->bytes +=
3912				    tx_buffer->m_head->m_pkthdr.len;
3913				bus_dmamap_sync(txr->txtag,
3914				    tx_buffer->map,
3915				    BUS_DMASYNC_POSTWRITE);
3916				bus_dmamap_unload(txr->txtag,
3917				    tx_buffer->map);
3918
3919                        	m_freem(tx_buffer->m_head);
3920                        	tx_buffer->m_head = NULL;
3921                	}
3922			tx_buffer->next_eop = -1;
3923			txr->watchdog_time = ticks;
3924
3925	                if (++first == adapter->num_tx_desc)
3926				first = 0;
3927
3928	                tx_buffer = &txr->tx_buffers[first];
3929			tx_desc = &txr->tx_base[first];
3930		}
3931		++txr->packets;
3932		++ifp->if_opackets;
3933		/* See if we can continue to the next packet */
3934		last = tx_buffer->next_eop;
3935		if (last != -1) {
3936        		eop_desc = &txr->tx_base[last];
3937			/* Get new done point */
3938			if (++last == adapter->num_tx_desc) last = 0;
3939			done = last;
3940		} else
3941			break;
3942        }
3943        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3944            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3945
3946        txr->next_to_clean = first;
3947
3948	/*
3949	** Watchdog calculation, we know there's
3950	** work outstanding or the first return
3951	** would have been taken, so none processed
3952	** for too long indicates a hang.
3953	*/
3954	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3955		txr->queue_status |= IGB_QUEUE_HUNG;
3956        /*
3957         * If we have a minimum free,
3958         * clear depleted state bit
3959         */
3960        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3961                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3962
3963	/* All clean, turn off the watchdog */
3964	if (txr->tx_avail == adapter->num_tx_desc) {
3965		txr->queue_status = IGB_QUEUE_IDLE;
3966		return (FALSE);
3967        }
3968
3969	return (TRUE);
3970}
3971
3972/*********************************************************************
3973 *
3974 *  Refresh mbuf buffers for RX descriptor rings
3975 *   - now keeps its own state so discards due to resource
3976 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3977 *     it just returns, keeping its placeholder, thus it can simply
3978 *     be recalled to try again.
3979 *
3980 **********************************************************************/
3981static void
3982igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3983{
3984	struct adapter		*adapter = rxr->adapter;
3985	bus_dma_segment_t	hseg[1];
3986	bus_dma_segment_t	pseg[1];
3987	struct igb_rx_buf	*rxbuf;
3988	struct mbuf		*mh, *mp;
3989	int			i, j, nsegs, error;
3990	bool			refreshed = FALSE;
3991
3992	i = j = rxr->next_to_refresh;
3993	/*
3994	** Get one descriptor beyond
3995	** our work mark to control
3996	** the loop.
3997        */
3998	if (++j == adapter->num_rx_desc)
3999		j = 0;
4000
4001	while (j != limit) {
4002		rxbuf = &rxr->rx_buffers[i];
4003		/* No hdr mbuf used with header split off */
4004		if (rxr->hdr_split == FALSE)
4005			goto no_split;
4006		if (rxbuf->m_head == NULL) {
4007			mh = m_gethdr(M_NOWAIT, MT_DATA);
4008			if (mh == NULL)
4009				goto update;
4010		} else
4011			mh = rxbuf->m_head;
4012
4013		mh->m_pkthdr.len = mh->m_len = MHLEN;
4014		mh->m_len = MHLEN;
4015		mh->m_flags |= M_PKTHDR;
4016		/* Get the memory mapping */
4017		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4018		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4019		if (error != 0) {
4020			printf("Refresh mbufs: hdr dmamap load"
4021			    " failure - %d\n", error);
4022			m_free(mh);
4023			rxbuf->m_head = NULL;
4024			goto update;
4025		}
4026		rxbuf->m_head = mh;
4027		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4028		    BUS_DMASYNC_PREREAD);
4029		rxr->rx_base[i].read.hdr_addr =
4030		    htole64(hseg[0].ds_addr);
4031no_split:
4032		if (rxbuf->m_pack == NULL) {
4033			mp = m_getjcl(M_NOWAIT, MT_DATA,
4034			    M_PKTHDR, adapter->rx_mbuf_sz);
4035			if (mp == NULL)
4036				goto update;
4037		} else
4038			mp = rxbuf->m_pack;
4039
4040		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4041		/* Get the memory mapping */
4042		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4043		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4044		if (error != 0) {
4045			printf("Refresh mbufs: payload dmamap load"
4046			    " failure - %d\n", error);
4047			m_free(mp);
4048			rxbuf->m_pack = NULL;
4049			goto update;
4050		}
4051		rxbuf->m_pack = mp;
4052		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4053		    BUS_DMASYNC_PREREAD);
4054		rxr->rx_base[i].read.pkt_addr =
4055		    htole64(pseg[0].ds_addr);
4056		refreshed = TRUE; /* I feel wefreshed :) */
4057
4058		i = j; /* our next is precalculated */
4059		rxr->next_to_refresh = i;
4060		if (++j == adapter->num_rx_desc)
4061			j = 0;
4062	}
4063update:
4064	if (refreshed) /* update tail */
4065		E1000_WRITE_REG(&adapter->hw,
4066		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4067	return;
4068}
4069
4070
4071/*********************************************************************
4072 *
4073 *  Allocate memory for rx_buffer structures. Since we use one
4074 *  rx_buffer per received packet, the maximum number of rx_buffer's
4075 *  that we'll need is equal to the number of receive descriptors
4076 *  that we've allocated.
4077 *
4078 **********************************************************************/
4079static int
4080igb_allocate_receive_buffers(struct rx_ring *rxr)
4081{
4082	struct	adapter 	*adapter = rxr->adapter;
4083	device_t 		dev = adapter->dev;
4084	struct igb_rx_buf	*rxbuf;
4085	int             	i, bsize, error;
4086
4087	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4088	if (!(rxr->rx_buffers =
4089	    (struct igb_rx_buf *) malloc(bsize,
4090	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4091		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4092		error = ENOMEM;
4093		goto fail;
4094	}
4095
4096	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4097				   1, 0,		/* alignment, bounds */
4098				   BUS_SPACE_MAXADDR,	/* lowaddr */
4099				   BUS_SPACE_MAXADDR,	/* highaddr */
4100				   NULL, NULL,		/* filter, filterarg */
4101				   MSIZE,		/* maxsize */
4102				   1,			/* nsegments */
4103				   MSIZE,		/* maxsegsize */
4104				   0,			/* flags */
4105				   NULL,		/* lockfunc */
4106				   NULL,		/* lockfuncarg */
4107				   &rxr->htag))) {
4108		device_printf(dev, "Unable to create RX DMA tag\n");
4109		goto fail;
4110	}
4111
4112	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4113				   1, 0,		/* alignment, bounds */
4114				   BUS_SPACE_MAXADDR,	/* lowaddr */
4115				   BUS_SPACE_MAXADDR,	/* highaddr */
4116				   NULL, NULL,		/* filter, filterarg */
4117				   MJUM9BYTES,		/* maxsize */
4118				   1,			/* nsegments */
4119				   MJUM9BYTES,		/* maxsegsize */
4120				   0,			/* flags */
4121				   NULL,		/* lockfunc */
4122				   NULL,		/* lockfuncarg */
4123				   &rxr->ptag))) {
4124		device_printf(dev, "Unable to create RX payload DMA tag\n");
4125		goto fail;
4126	}
4127
4128	for (i = 0; i < adapter->num_rx_desc; i++) {
4129		rxbuf = &rxr->rx_buffers[i];
4130		error = bus_dmamap_create(rxr->htag,
4131		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4132		if (error) {
4133			device_printf(dev,
4134			    "Unable to create RX head DMA maps\n");
4135			goto fail;
4136		}
4137		error = bus_dmamap_create(rxr->ptag,
4138		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4139		if (error) {
4140			device_printf(dev,
4141			    "Unable to create RX packet DMA maps\n");
4142			goto fail;
4143		}
4144	}
4145
4146	return (0);
4147
4148fail:
4149	/* Frees all, but can handle partial completion */
4150	igb_free_receive_structures(adapter);
4151	return (error);
4152}
4153
4154
4155static void
4156igb_free_receive_ring(struct rx_ring *rxr)
4157{
4158	struct	adapter		*adapter = rxr->adapter;
4159	struct igb_rx_buf	*rxbuf;
4160
4161
4162	for (int i = 0; i < adapter->num_rx_desc; i++) {
4163		rxbuf = &rxr->rx_buffers[i];
4164		if (rxbuf->m_head != NULL) {
4165			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4166			    BUS_DMASYNC_POSTREAD);
4167			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4168			rxbuf->m_head->m_flags |= M_PKTHDR;
4169			m_freem(rxbuf->m_head);
4170		}
4171		if (rxbuf->m_pack != NULL) {
4172			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4173			    BUS_DMASYNC_POSTREAD);
4174			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4175			rxbuf->m_pack->m_flags |= M_PKTHDR;
4176			m_freem(rxbuf->m_pack);
4177		}
4178		rxbuf->m_head = NULL;
4179		rxbuf->m_pack = NULL;
4180	}
4181}
4182
4183
4184/*********************************************************************
4185 *
4186 *  Initialize a receive ring and its buffers.
4187 *
4188 **********************************************************************/
4189static int
4190igb_setup_receive_ring(struct rx_ring *rxr)
4191{
4192	struct	adapter		*adapter;
4193	struct  ifnet		*ifp;
4194	device_t		dev;
4195	struct igb_rx_buf	*rxbuf;
4196	bus_dma_segment_t	pseg[1], hseg[1];
4197	struct lro_ctrl		*lro = &rxr->lro;
4198	int			rsize, nsegs, error = 0;
4199#ifdef DEV_NETMAP
4200	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4201	struct netmap_slot *slot;
4202#endif /* DEV_NETMAP */
4203
4204	adapter = rxr->adapter;
4205	dev = adapter->dev;
4206	ifp = adapter->ifp;
4207
4208	/* Clear the ring contents */
4209	IGB_RX_LOCK(rxr);
4210#ifdef DEV_NETMAP
4211	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4212#endif /* DEV_NETMAP */
4213	rsize = roundup2(adapter->num_rx_desc *
4214	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4215	bzero((void *)rxr->rx_base, rsize);
4216
4217	/*
4218	** Free current RX buffer structures and their mbufs
4219	*/
4220	igb_free_receive_ring(rxr);
4221
4222	/* Configure for header split? */
4223	if (igb_header_split)
4224		rxr->hdr_split = TRUE;
4225
4226        /* Now replenish the ring mbufs */
4227	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4228		struct mbuf	*mh, *mp;
4229
4230		rxbuf = &rxr->rx_buffers[j];
4231#ifdef DEV_NETMAP
4232		if (slot) {
4233			/* slot sj is mapped to the i-th NIC-ring entry */
4234			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4235			uint64_t paddr;
4236			void *addr;
4237
4238			addr = PNMB(slot + sj, &paddr);
4239			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4240			/* Update descriptor */
4241			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4242			continue;
4243		}
4244#endif /* DEV_NETMAP */
4245		if (rxr->hdr_split == FALSE)
4246			goto skip_head;
4247
4248		/* First the header */
4249		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4250		if (rxbuf->m_head == NULL) {
4251			error = ENOBUFS;
4252                        goto fail;
4253		}
4254		m_adj(rxbuf->m_head, ETHER_ALIGN);
4255		mh = rxbuf->m_head;
4256		mh->m_len = mh->m_pkthdr.len = MHLEN;
4257		mh->m_flags |= M_PKTHDR;
4258		/* Get the memory mapping */
4259		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4260		    rxbuf->hmap, rxbuf->m_head, hseg,
4261		    &nsegs, BUS_DMA_NOWAIT);
4262		if (error != 0) /* Nothing elegant to do here */
4263                        goto fail;
4264		bus_dmamap_sync(rxr->htag,
4265		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4266		/* Update descriptor */
4267		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4268
4269skip_head:
4270		/* Now the payload cluster */
4271		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4272		    M_PKTHDR, adapter->rx_mbuf_sz);
4273		if (rxbuf->m_pack == NULL) {
4274			error = ENOBUFS;
4275                        goto fail;
4276		}
4277		mp = rxbuf->m_pack;
4278		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4279		/* Get the memory mapping */
4280		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4281		    rxbuf->pmap, mp, pseg,
4282		    &nsegs, BUS_DMA_NOWAIT);
4283		if (error != 0)
4284                        goto fail;
4285		bus_dmamap_sync(rxr->ptag,
4286		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4287		/* Update descriptor */
4288		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4289        }
4290
4291	/* Setup our descriptor indices */
4292	rxr->next_to_check = 0;
4293	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4294	rxr->lro_enabled = FALSE;
4295	rxr->rx_split_packets = 0;
4296	rxr->rx_bytes = 0;
4297
4298	rxr->fmp = NULL;
4299	rxr->lmp = NULL;
4300	rxr->discard = FALSE;
4301
4302	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4303	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4304
4305	/*
4306	** Now set up the LRO interface, we
4307	** also only do head split when LRO
4308	** is enabled, since so often they
4309	** are undesireable in similar setups.
4310	*/
4311	if (ifp->if_capenable & IFCAP_LRO) {
4312		error = tcp_lro_init(lro);
4313		if (error) {
4314			device_printf(dev, "LRO Initialization failed!\n");
4315			goto fail;
4316		}
4317		INIT_DEBUGOUT("RX LRO Initialized\n");
4318		rxr->lro_enabled = TRUE;
4319		lro->ifp = adapter->ifp;
4320	}
4321
4322	IGB_RX_UNLOCK(rxr);
4323	return (0);
4324
4325fail:
4326	igb_free_receive_ring(rxr);
4327	IGB_RX_UNLOCK(rxr);
4328	return (error);
4329}
4330
4331
4332/*********************************************************************
4333 *
4334 *  Initialize all receive rings.
4335 *
4336 **********************************************************************/
4337static int
4338igb_setup_receive_structures(struct adapter *adapter)
4339{
4340	struct rx_ring *rxr = adapter->rx_rings;
4341	int i;
4342
4343	for (i = 0; i < adapter->num_queues; i++, rxr++)
4344		if (igb_setup_receive_ring(rxr))
4345			goto fail;
4346
4347	return (0);
4348fail:
4349	/*
4350	 * Free RX buffers allocated so far, we will only handle
4351	 * the rings that completed, the failing case will have
4352	 * cleaned up for itself. 'i' is the endpoint.
4353	 */
4354	for (int j = 0; j < i; ++j) {
4355		rxr = &adapter->rx_rings[j];
4356		IGB_RX_LOCK(rxr);
4357		igb_free_receive_ring(rxr);
4358		IGB_RX_UNLOCK(rxr);
4359	}
4360
4361	return (ENOBUFS);
4362}
4363
4364/*********************************************************************
4365 *
4366 *  Enable receive unit.
4367 *
4368 **********************************************************************/
4369static void
4370igb_initialize_receive_units(struct adapter *adapter)
4371{
4372	struct rx_ring	*rxr = adapter->rx_rings;
4373	struct ifnet	*ifp = adapter->ifp;
4374	struct e1000_hw *hw = &adapter->hw;
4375	u32		rctl, rxcsum, psize, srrctl = 0;
4376
4377	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4378
4379	/*
4380	 * Make sure receives are disabled while setting
4381	 * up the descriptor ring
4382	 */
4383	rctl = E1000_READ_REG(hw, E1000_RCTL);
4384	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4385
4386	/*
4387	** Set up for header split
4388	*/
4389	if (igb_header_split) {
4390		/* Use a standard mbuf for the header */
4391		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4392		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4393	} else
4394		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4395
4396	/*
4397	** Set up for jumbo frames
4398	*/
4399	if (ifp->if_mtu > ETHERMTU) {
4400		rctl |= E1000_RCTL_LPE;
4401		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4402			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4403			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4404		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4405			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4406			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4407		}
4408		/* Set maximum packet len */
4409		psize = adapter->max_frame_size;
4410		/* are we on a vlan? */
4411		if (adapter->ifp->if_vlantrunk != NULL)
4412			psize += VLAN_TAG_SIZE;
4413		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4414	} else {
4415		rctl &= ~E1000_RCTL_LPE;
4416		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4417		rctl |= E1000_RCTL_SZ_2048;
4418	}
4419
4420	/* Setup the Base and Length of the Rx Descriptor Rings */
4421	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4422		u64 bus_addr = rxr->rxdma.dma_paddr;
4423		u32 rxdctl;
4424
4425		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4426		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4427		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4428		    (uint32_t)(bus_addr >> 32));
4429		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4430		    (uint32_t)bus_addr);
4431		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4432		/* Enable this Queue */
4433		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4434		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4435		rxdctl &= 0xFFF00000;
4436		rxdctl |= IGB_RX_PTHRESH;
4437		rxdctl |= IGB_RX_HTHRESH << 8;
4438		rxdctl |= IGB_RX_WTHRESH << 16;
4439		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4440	}
4441
4442	/*
4443	** Setup for RX MultiQueue
4444	*/
4445	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4446	if (adapter->num_queues >1) {
4447		u32 random[10], mrqc, shift = 0;
4448		union igb_reta {
4449			u32 dword;
4450			u8  bytes[4];
4451		} reta;
4452
4453		arc4rand(&random, sizeof(random), 0);
4454		if (adapter->hw.mac.type == e1000_82575)
4455			shift = 6;
4456		/* Warning FM follows */
4457		for (int i = 0; i < 128; i++) {
4458			reta.bytes[i & 3] =
4459			    (i % adapter->num_queues) << shift;
4460			if ((i & 3) == 3)
4461				E1000_WRITE_REG(hw,
4462				    E1000_RETA(i >> 2), reta.dword);
4463		}
4464		/* Now fill in hash table */
4465		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4466		for (int i = 0; i < 10; i++)
4467			E1000_WRITE_REG_ARRAY(hw,
4468			    E1000_RSSRK(0), i, random[i]);
4469
4470		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4471		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4472		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4473		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4474		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4475		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4476		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4477		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4478
4479		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4480
4481		/*
4482		** NOTE: Receive Full-Packet Checksum Offload
4483		** is mutually exclusive with Multiqueue. However
4484		** this is not the same as TCP/IP checksums which
4485		** still work.
4486		*/
4487		rxcsum |= E1000_RXCSUM_PCSD;
4488#if __FreeBSD_version >= 800000
4489		/* For SCTP Offload */
4490		if ((hw->mac.type == e1000_82576)
4491		    && (ifp->if_capenable & IFCAP_RXCSUM))
4492			rxcsum |= E1000_RXCSUM_CRCOFL;
4493#endif
4494	} else {
4495		/* Non RSS setup */
4496		if (ifp->if_capenable & IFCAP_RXCSUM) {
4497			rxcsum |= E1000_RXCSUM_IPPCSE;
4498#if __FreeBSD_version >= 800000
4499			if (adapter->hw.mac.type == e1000_82576)
4500				rxcsum |= E1000_RXCSUM_CRCOFL;
4501#endif
4502		} else
4503			rxcsum &= ~E1000_RXCSUM_TUOFL;
4504	}
4505	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4506
4507	/* Setup the Receive Control Register */
4508	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4509	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4510		   E1000_RCTL_RDMTS_HALF |
4511		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4512	/* Strip CRC bytes. */
4513	rctl |= E1000_RCTL_SECRC;
4514	/* Make sure VLAN Filters are off */
4515	rctl &= ~E1000_RCTL_VFE;
4516	/* Don't store bad packets */
4517	rctl &= ~E1000_RCTL_SBP;
4518
4519	/* Enable Receives */
4520	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4521
4522	/*
4523	 * Setup the HW Rx Head and Tail Descriptor Pointers
4524	 *   - needs to be after enable
4525	 */
4526	for (int i = 0; i < adapter->num_queues; i++) {
4527		rxr = &adapter->rx_rings[i];
4528		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4529#ifdef DEV_NETMAP
4530		/*
4531		 * an init() while a netmap client is active must
4532		 * preserve the rx buffers passed to userspace.
4533		 * In this driver it means we adjust RDT to
4534		 * somthing different from next_to_refresh
4535		 * (which is not used in netmap mode).
4536		 */
4537		if (ifp->if_capenable & IFCAP_NETMAP) {
4538			struct netmap_adapter *na = NA(adapter->ifp);
4539			struct netmap_kring *kring = &na->rx_rings[i];
4540			int t = rxr->next_to_refresh - kring->nr_hwavail;
4541
4542			if (t >= adapter->num_rx_desc)
4543				t -= adapter->num_rx_desc;
4544			else if (t < 0)
4545				t += adapter->num_rx_desc;
4546			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4547		} else
4548#endif /* DEV_NETMAP */
4549		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4550	}
4551	return;
4552}
4553
4554/*********************************************************************
4555 *
4556 *  Free receive rings.
4557 *
4558 **********************************************************************/
4559static void
4560igb_free_receive_structures(struct adapter *adapter)
4561{
4562	struct rx_ring *rxr = adapter->rx_rings;
4563
4564	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4565		struct lro_ctrl	*lro = &rxr->lro;
4566		igb_free_receive_buffers(rxr);
4567		tcp_lro_free(lro);
4568		igb_dma_free(adapter, &rxr->rxdma);
4569	}
4570
4571	free(adapter->rx_rings, M_DEVBUF);
4572}
4573
4574/*********************************************************************
4575 *
4576 *  Free receive ring data structures.
4577 *
4578 **********************************************************************/
4579static void
4580igb_free_receive_buffers(struct rx_ring *rxr)
4581{
4582	struct adapter		*adapter = rxr->adapter;
4583	struct igb_rx_buf	*rxbuf;
4584	int i;
4585
4586	INIT_DEBUGOUT("free_receive_structures: begin");
4587
4588	/* Cleanup any existing buffers */
4589	if (rxr->rx_buffers != NULL) {
4590		for (i = 0; i < adapter->num_rx_desc; i++) {
4591			rxbuf = &rxr->rx_buffers[i];
4592			if (rxbuf->m_head != NULL) {
4593				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4594				    BUS_DMASYNC_POSTREAD);
4595				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4596				rxbuf->m_head->m_flags |= M_PKTHDR;
4597				m_freem(rxbuf->m_head);
4598			}
4599			if (rxbuf->m_pack != NULL) {
4600				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4601				    BUS_DMASYNC_POSTREAD);
4602				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4603				rxbuf->m_pack->m_flags |= M_PKTHDR;
4604				m_freem(rxbuf->m_pack);
4605			}
4606			rxbuf->m_head = NULL;
4607			rxbuf->m_pack = NULL;
4608			if (rxbuf->hmap != NULL) {
4609				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4610				rxbuf->hmap = NULL;
4611			}
4612			if (rxbuf->pmap != NULL) {
4613				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4614				rxbuf->pmap = NULL;
4615			}
4616		}
4617		if (rxr->rx_buffers != NULL) {
4618			free(rxr->rx_buffers, M_DEVBUF);
4619			rxr->rx_buffers = NULL;
4620		}
4621	}
4622
4623	if (rxr->htag != NULL) {
4624		bus_dma_tag_destroy(rxr->htag);
4625		rxr->htag = NULL;
4626	}
4627	if (rxr->ptag != NULL) {
4628		bus_dma_tag_destroy(rxr->ptag);
4629		rxr->ptag = NULL;
4630	}
4631}
4632
4633static __inline void
4634igb_rx_discard(struct rx_ring *rxr, int i)
4635{
4636	struct igb_rx_buf	*rbuf;
4637
4638	rbuf = &rxr->rx_buffers[i];
4639
4640	/* Partially received? Free the chain */
4641	if (rxr->fmp != NULL) {
4642		rxr->fmp->m_flags |= M_PKTHDR;
4643		m_freem(rxr->fmp);
4644		rxr->fmp = NULL;
4645		rxr->lmp = NULL;
4646	}
4647
4648	/*
4649	** With advanced descriptors the writeback
4650	** clobbers the buffer addrs, so its easier
4651	** to just free the existing mbufs and take
4652	** the normal refresh path to get new buffers
4653	** and mapping.
4654	*/
4655	if (rbuf->m_head) {
4656		m_free(rbuf->m_head);
4657		rbuf->m_head = NULL;
4658	}
4659
4660	if (rbuf->m_pack) {
4661		m_free(rbuf->m_pack);
4662		rbuf->m_pack = NULL;
4663	}
4664
4665	return;
4666}
4667
4668static __inline void
4669igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4670{
4671
4672	/*
4673	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4674	 * should be computed by hardware. Also it should not have VLAN tag in
4675	 * ethernet header.
4676	 */
4677	if (rxr->lro_enabled &&
4678	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4679	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4680	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4681	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4682	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4683	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4684		/*
4685		 * Send to the stack if:
4686		 **  - LRO not enabled, or
4687		 **  - no LRO resources, or
4688		 **  - lro enqueue fails
4689		 */
4690		if (rxr->lro.lro_cnt != 0)
4691			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4692				return;
4693	}
4694	IGB_RX_UNLOCK(rxr);
4695	(*ifp->if_input)(ifp, m);
4696	IGB_RX_LOCK(rxr);
4697}
4698
4699/*********************************************************************
4700 *
4701 *  This routine executes in interrupt context. It replenishes
4702 *  the mbufs in the descriptor and sends data which has been
4703 *  dma'ed into host memory to upper layer.
4704 *
4705 *  We loop at most count times if count is > 0, or until done if
4706 *  count < 0.
4707 *
4708 *  Return TRUE if more to clean, FALSE otherwise
4709 *********************************************************************/
4710static bool
4711igb_rxeof(struct igb_queue *que, int count, int *done)
4712{
4713	struct adapter		*adapter = que->adapter;
4714	struct rx_ring		*rxr = que->rxr;
4715	struct ifnet		*ifp = adapter->ifp;
4716	struct lro_ctrl		*lro = &rxr->lro;
4717	struct lro_entry	*queued;
4718	int			i, processed = 0, rxdone = 0;
4719	u32			ptype, staterr = 0;
4720	union e1000_adv_rx_desc	*cur;
4721
4722	IGB_RX_LOCK(rxr);
4723	/* Sync the ring. */
4724	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4725	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4726
4727#ifdef DEV_NETMAP
4728	if (ifp->if_capenable & IFCAP_NETMAP) {
4729		struct netmap_adapter *na = NA(ifp);
4730
4731		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4732		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4733		IGB_RX_UNLOCK(rxr);
4734		IGB_CORE_LOCK(adapter);
4735		selwakeuppri(&na->rx_si, PI_NET);
4736		IGB_CORE_UNLOCK(adapter);
4737		return (0);
4738	}
4739#endif /* DEV_NETMAP */
4740
4741	/* Main clean loop */
4742	for (i = rxr->next_to_check; count != 0;) {
4743		struct mbuf		*sendmp, *mh, *mp;
4744		struct igb_rx_buf	*rxbuf;
4745		u16			hlen, plen, hdr, vtag;
4746		bool			eop = FALSE;
4747
4748		cur = &rxr->rx_base[i];
4749		staterr = le32toh(cur->wb.upper.status_error);
4750		if ((staterr & E1000_RXD_STAT_DD) == 0)
4751			break;
4752		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4753			break;
4754		count--;
4755		sendmp = mh = mp = NULL;
4756		cur->wb.upper.status_error = 0;
4757		rxbuf = &rxr->rx_buffers[i];
4758		plen = le16toh(cur->wb.upper.length);
4759		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4760		if ((adapter->hw.mac.type == e1000_i350) &&
4761		    (staterr & E1000_RXDEXT_STATERR_LB))
4762			vtag = be16toh(cur->wb.upper.vlan);
4763		else
4764			vtag = le16toh(cur->wb.upper.vlan);
4765		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4766		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4767
4768		/* Make sure all segments of a bad packet are discarded */
4769		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4770		    (rxr->discard)) {
4771			adapter->dropped_pkts++;
4772			++rxr->rx_discarded;
4773			if (!eop) /* Catch subsequent segs */
4774				rxr->discard = TRUE;
4775			else
4776				rxr->discard = FALSE;
4777			igb_rx_discard(rxr, i);
4778			goto next_desc;
4779		}
4780
4781		/*
4782		** The way the hardware is configured to
4783		** split, it will ONLY use the header buffer
4784		** when header split is enabled, otherwise we
4785		** get normal behavior, ie, both header and
4786		** payload are DMA'd into the payload buffer.
4787		**
4788		** The fmp test is to catch the case where a
4789		** packet spans multiple descriptors, in that
4790		** case only the first header is valid.
4791		*/
4792		if (rxr->hdr_split && rxr->fmp == NULL) {
4793			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4794			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4795			if (hlen > IGB_HDR_BUF)
4796				hlen = IGB_HDR_BUF;
4797			mh = rxr->rx_buffers[i].m_head;
4798			mh->m_len = hlen;
4799			/* clear buf pointer for refresh */
4800			rxbuf->m_head = NULL;
4801			/*
4802			** Get the payload length, this
4803			** could be zero if its a small
4804			** packet.
4805			*/
4806			if (plen > 0) {
4807				mp = rxr->rx_buffers[i].m_pack;
4808				mp->m_len = plen;
4809				mh->m_next = mp;
4810				/* clear buf pointer */
4811				rxbuf->m_pack = NULL;
4812				rxr->rx_split_packets++;
4813			}
4814		} else {
4815			/*
4816			** Either no header split, or a
4817			** secondary piece of a fragmented
4818			** split packet.
4819			*/
4820			mh = rxr->rx_buffers[i].m_pack;
4821			mh->m_len = plen;
4822			/* clear buf info for refresh */
4823			rxbuf->m_pack = NULL;
4824		}
4825
4826		++processed; /* So we know when to refresh */
4827
4828		/* Initial frame - setup */
4829		if (rxr->fmp == NULL) {
4830			mh->m_pkthdr.len = mh->m_len;
4831			/* Save the head of the chain */
4832			rxr->fmp = mh;
4833			rxr->lmp = mh;
4834			if (mp != NULL) {
4835				/* Add payload if split */
4836				mh->m_pkthdr.len += mp->m_len;
4837				rxr->lmp = mh->m_next;
4838			}
4839		} else {
4840			/* Chain mbuf's together */
4841			rxr->lmp->m_next = mh;
4842			rxr->lmp = rxr->lmp->m_next;
4843			rxr->fmp->m_pkthdr.len += mh->m_len;
4844		}
4845
4846		if (eop) {
4847			rxr->fmp->m_pkthdr.rcvif = ifp;
4848			ifp->if_ipackets++;
4849			rxr->rx_packets++;
4850			/* capture data for AIM */
4851			rxr->packets++;
4852			rxr->bytes += rxr->fmp->m_pkthdr.len;
4853			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4854
4855			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4856				igb_rx_checksum(staterr, rxr->fmp, ptype);
4857
4858			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4859			    (staterr & E1000_RXD_STAT_VP) != 0) {
4860				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4861				rxr->fmp->m_flags |= M_VLANTAG;
4862			}
4863#if __FreeBSD_version >= 800000
4864			rxr->fmp->m_pkthdr.flowid = que->msix;
4865			rxr->fmp->m_flags |= M_FLOWID;
4866#endif
4867			sendmp = rxr->fmp;
4868			/* Make sure to set M_PKTHDR. */
4869			sendmp->m_flags |= M_PKTHDR;
4870			rxr->fmp = NULL;
4871			rxr->lmp = NULL;
4872		}
4873
4874next_desc:
4875		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4876		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4877
4878		/* Advance our pointers to the next descriptor. */
4879		if (++i == adapter->num_rx_desc)
4880			i = 0;
4881		/*
4882		** Send to the stack or LRO
4883		*/
4884		if (sendmp != NULL) {
4885			rxr->next_to_check = i;
4886			igb_rx_input(rxr, ifp, sendmp, ptype);
4887			i = rxr->next_to_check;
4888			rxdone++;
4889		}
4890
4891		/* Every 8 descriptors we go to refresh mbufs */
4892		if (processed == 8) {
4893                        igb_refresh_mbufs(rxr, i);
4894                        processed = 0;
4895		}
4896	}
4897
4898	/* Catch any remainders */
4899	if (igb_rx_unrefreshed(rxr))
4900		igb_refresh_mbufs(rxr, i);
4901
4902	rxr->next_to_check = i;
4903
4904	/*
4905	 * Flush any outstanding LRO work
4906	 */
4907	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4908		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4909		tcp_lro_flush(lro, queued);
4910	}
4911
4912	if (done != NULL)
4913		*done += rxdone;
4914
4915	IGB_RX_UNLOCK(rxr);
4916	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4917}
4918
4919/*********************************************************************
4920 *
4921 *  Verify that the hardware indicated that the checksum is valid.
4922 *  Inform the stack about the status of checksum so that stack
4923 *  doesn't spend time verifying the checksum.
4924 *
4925 *********************************************************************/
4926static void
4927igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4928{
4929	u16 status = (u16)staterr;
4930	u8  errors = (u8) (staterr >> 24);
4931	int sctp;
4932
4933	/* Ignore Checksum bit is set */
4934	if (status & E1000_RXD_STAT_IXSM) {
4935		mp->m_pkthdr.csum_flags = 0;
4936		return;
4937	}
4938
4939	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4940	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4941		sctp = 1;
4942	else
4943		sctp = 0;
4944	if (status & E1000_RXD_STAT_IPCS) {
4945		/* Did it pass? */
4946		if (!(errors & E1000_RXD_ERR_IPE)) {
4947			/* IP Checksum Good */
4948			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4949			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4950		} else
4951			mp->m_pkthdr.csum_flags = 0;
4952	}
4953
4954	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4955		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4956#if __FreeBSD_version >= 800000
4957		if (sctp) /* reassign */
4958			type = CSUM_SCTP_VALID;
4959#endif
4960		/* Did it pass? */
4961		if (!(errors & E1000_RXD_ERR_TCPE)) {
4962			mp->m_pkthdr.csum_flags |= type;
4963			if (sctp == 0)
4964				mp->m_pkthdr.csum_data = htons(0xffff);
4965		}
4966	}
4967	return;
4968}
4969
4970/*
4971 * This routine is run via an vlan
4972 * config EVENT
4973 */
4974static void
4975igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4976{
4977	struct adapter	*adapter = ifp->if_softc;
4978	u32		index, bit;
4979
4980	if (ifp->if_softc !=  arg)   /* Not our event */
4981		return;
4982
4983	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4984                return;
4985
4986	IGB_CORE_LOCK(adapter);
4987	index = (vtag >> 5) & 0x7F;
4988	bit = vtag & 0x1F;
4989	adapter->shadow_vfta[index] |= (1 << bit);
4990	++adapter->num_vlans;
4991	/* Change hw filter setting */
4992	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4993		igb_setup_vlan_hw_support(adapter);
4994	IGB_CORE_UNLOCK(adapter);
4995}
4996
4997/*
4998 * This routine is run via an vlan
4999 * unconfig EVENT
5000 */
5001static void
5002igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5003{
5004	struct adapter	*adapter = ifp->if_softc;
5005	u32		index, bit;
5006
5007	if (ifp->if_softc !=  arg)
5008		return;
5009
5010	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5011                return;
5012
5013	IGB_CORE_LOCK(adapter);
5014	index = (vtag >> 5) & 0x7F;
5015	bit = vtag & 0x1F;
5016	adapter->shadow_vfta[index] &= ~(1 << bit);
5017	--adapter->num_vlans;
5018	/* Change hw filter setting */
5019	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5020		igb_setup_vlan_hw_support(adapter);
5021	IGB_CORE_UNLOCK(adapter);
5022}
5023
5024static void
5025igb_setup_vlan_hw_support(struct adapter *adapter)
5026{
5027	struct e1000_hw *hw = &adapter->hw;
5028	struct ifnet	*ifp = adapter->ifp;
5029	u32             reg;
5030
5031	if (adapter->vf_ifp) {
5032		e1000_rlpml_set_vf(hw,
5033		    adapter->max_frame_size + VLAN_TAG_SIZE);
5034		return;
5035	}
5036
5037	reg = E1000_READ_REG(hw, E1000_CTRL);
5038	reg |= E1000_CTRL_VME;
5039	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5040
5041	/* Enable the Filter Table */
5042	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5043		reg = E1000_READ_REG(hw, E1000_RCTL);
5044		reg &= ~E1000_RCTL_CFIEN;
5045		reg |= E1000_RCTL_VFE;
5046		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5047	}
5048
5049	/* Update the frame size */
5050	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5051	    adapter->max_frame_size + VLAN_TAG_SIZE);
5052
5053	/* Don't bother with table if no vlans */
5054	if ((adapter->num_vlans == 0) ||
5055	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5056                return;
5057	/*
5058	** A soft reset zero's out the VFTA, so
5059	** we need to repopulate it now.
5060	*/
5061	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5062                if (adapter->shadow_vfta[i] != 0) {
5063			if (adapter->vf_ifp)
5064				e1000_vfta_set_vf(hw,
5065				    adapter->shadow_vfta[i], TRUE);
5066			else
5067				e1000_write_vfta(hw,
5068				    i, adapter->shadow_vfta[i]);
5069		}
5070}
5071
5072static void
5073igb_enable_intr(struct adapter *adapter)
5074{
5075	/* With RSS set up what to auto clear */
5076	if (adapter->msix_mem) {
5077		u32 mask = (adapter->que_mask | adapter->link_mask);
5078		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5079		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5080		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5081		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5082		    E1000_IMS_LSC);
5083	} else {
5084		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5085		    IMS_ENABLE_MASK);
5086	}
5087	E1000_WRITE_FLUSH(&adapter->hw);
5088
5089	return;
5090}
5091
5092static void
5093igb_disable_intr(struct adapter *adapter)
5094{
5095	if (adapter->msix_mem) {
5096		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5097		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5098	}
5099	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5100	E1000_WRITE_FLUSH(&adapter->hw);
5101	return;
5102}
5103
5104/*
5105 * Bit of a misnomer, what this really means is
5106 * to enable OS management of the system... aka
5107 * to disable special hardware management features
5108 */
5109static void
5110igb_init_manageability(struct adapter *adapter)
5111{
5112	if (adapter->has_manage) {
5113		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5114		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5115
5116		/* disable hardware interception of ARP */
5117		manc &= ~(E1000_MANC_ARP_EN);
5118
5119                /* enable receiving management packets to the host */
5120		manc |= E1000_MANC_EN_MNG2HOST;
5121		manc2h |= 1 << 5;  /* Mng Port 623 */
5122		manc2h |= 1 << 6;  /* Mng Port 664 */
5123		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5124		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5125	}
5126}
5127
5128/*
5129 * Give control back to hardware management
5130 * controller if there is one.
5131 */
5132static void
5133igb_release_manageability(struct adapter *adapter)
5134{
5135	if (adapter->has_manage) {
5136		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5137
5138		/* re-enable hardware interception of ARP */
5139		manc |= E1000_MANC_ARP_EN;
5140		manc &= ~E1000_MANC_EN_MNG2HOST;
5141
5142		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5143	}
5144}
5145
5146/*
5147 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5148 * For ASF and Pass Through versions of f/w this means that
5149 * the driver is loaded.
5150 *
5151 */
5152static void
5153igb_get_hw_control(struct adapter *adapter)
5154{
5155	u32 ctrl_ext;
5156
5157	if (adapter->vf_ifp)
5158		return;
5159
5160	/* Let firmware know the driver has taken over */
5161	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5162	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5163	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5164}
5165
5166/*
5167 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5168 * For ASF and Pass Through versions of f/w this means that the
5169 * driver is no longer loaded.
5170 *
5171 */
5172static void
5173igb_release_hw_control(struct adapter *adapter)
5174{
5175	u32 ctrl_ext;
5176
5177	if (adapter->vf_ifp)
5178		return;
5179
5180	/* Let firmware taken over control of h/w */
5181	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5182	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5183	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5184}
5185
5186static int
5187igb_is_valid_ether_addr(uint8_t *addr)
5188{
5189	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5190
5191	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5192		return (FALSE);
5193	}
5194
5195	return (TRUE);
5196}
5197
5198
5199/*
5200 * Enable PCI Wake On Lan capability
5201 */
5202static void
5203igb_enable_wakeup(device_t dev)
5204{
5205	u16     cap, status;
5206	u8      id;
5207
5208	/* First find the capabilities pointer*/
5209	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5210	/* Read the PM Capabilities */
5211	id = pci_read_config(dev, cap, 1);
5212	if (id != PCIY_PMG)     /* Something wrong */
5213		return;
5214	/* OK, we have the power capabilities, so
5215	   now get the status register */
5216	cap += PCIR_POWER_STATUS;
5217	status = pci_read_config(dev, cap, 2);
5218	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5219	pci_write_config(dev, cap, status, 2);
5220	return;
5221}
5222
5223static void
5224igb_led_func(void *arg, int onoff)
5225{
5226	struct adapter	*adapter = arg;
5227
5228	IGB_CORE_LOCK(adapter);
5229	if (onoff) {
5230		e1000_setup_led(&adapter->hw);
5231		e1000_led_on(&adapter->hw);
5232	} else {
5233		e1000_led_off(&adapter->hw);
5234		e1000_cleanup_led(&adapter->hw);
5235	}
5236	IGB_CORE_UNLOCK(adapter);
5237}
5238
5239/**********************************************************************
5240 *
5241 *  Update the board statistics counters.
5242 *
5243 **********************************************************************/
5244static void
5245igb_update_stats_counters(struct adapter *adapter)
5246{
5247	struct ifnet		*ifp;
5248        struct e1000_hw		*hw = &adapter->hw;
5249	struct e1000_hw_stats	*stats;
5250
5251	/*
5252	** The virtual function adapter has only a
5253	** small controlled set of stats, do only
5254	** those and return.
5255	*/
5256	if (adapter->vf_ifp) {
5257		igb_update_vf_stats_counters(adapter);
5258		return;
5259	}
5260
5261	stats = (struct e1000_hw_stats	*)adapter->stats;
5262
5263	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5264	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5265		stats->symerrs +=
5266		    E1000_READ_REG(hw,E1000_SYMERRS);
5267		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5268	}
5269
5270	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5271	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5272	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5273	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5274
5275	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5276	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5277	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5278	stats->dc += E1000_READ_REG(hw, E1000_DC);
5279	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5280	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5281	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5282	/*
5283	** For watchdog management we need to know if we have been
5284	** paused during the last interval, so capture that here.
5285	*/
5286        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5287        stats->xoffrxc += adapter->pause_frames;
5288	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5289	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5290	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5291	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5292	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5293	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5294	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5295	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5296	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5297	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5298	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5299	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5300
5301	/* For the 64-bit byte counters the low dword must be read first. */
5302	/* Both registers clear on the read of the high dword */
5303
5304	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5305	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5306	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5307	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5308
5309	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5310	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5311	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5312	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5313	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5314
5315	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5316	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5317
5318	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5319	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5320	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5321	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5322	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5323	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5324	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5325	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5326	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5327	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5328
5329	/* Interrupt Counts */
5330
5331	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5332	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5333	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5334	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5335	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5336	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5337	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5338	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5339	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5340
5341	/* Host to Card Statistics */
5342
5343	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5344	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5345	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5346	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5347	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5348	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5349	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5350	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5351	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5352	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5353	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5354	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5355	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5356	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5357
5358	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5359	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5360	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5361	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5362	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5363	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5364
5365	ifp = adapter->ifp;
5366	ifp->if_collisions = stats->colc;
5367
5368	/* Rx Errors */
5369	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5370	    stats->crcerrs + stats->algnerrc +
5371	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5372
5373	/* Tx Errors */
5374	ifp->if_oerrors = stats->ecol +
5375	    stats->latecol + adapter->watchdog_events;
5376
5377	/* Driver specific counters */
5378	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5379	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5380	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5381	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5382	adapter->packet_buf_alloc_tx =
5383	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5384	adapter->packet_buf_alloc_rx =
5385	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5386}
5387
5388
5389/**********************************************************************
5390 *
5391 *  Initialize the VF board statistics counters.
5392 *
5393 **********************************************************************/
5394static void
5395igb_vf_init_stats(struct adapter *adapter)
5396{
5397        struct e1000_hw *hw = &adapter->hw;
5398	struct e1000_vf_stats	*stats;
5399
5400	stats = (struct e1000_vf_stats	*)adapter->stats;
5401	if (stats == NULL)
5402		return;
5403        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5404        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5405        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5406        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5407        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5408}
5409
5410/**********************************************************************
5411 *
5412 *  Update the VF board statistics counters.
5413 *
5414 **********************************************************************/
5415static void
5416igb_update_vf_stats_counters(struct adapter *adapter)
5417{
5418	struct e1000_hw *hw = &adapter->hw;
5419	struct e1000_vf_stats	*stats;
5420
5421	if (adapter->link_speed == 0)
5422		return;
5423
5424	stats = (struct e1000_vf_stats	*)adapter->stats;
5425
5426	UPDATE_VF_REG(E1000_VFGPRC,
5427	    stats->last_gprc, stats->gprc);
5428	UPDATE_VF_REG(E1000_VFGORC,
5429	    stats->last_gorc, stats->gorc);
5430	UPDATE_VF_REG(E1000_VFGPTC,
5431	    stats->last_gptc, stats->gptc);
5432	UPDATE_VF_REG(E1000_VFGOTC,
5433	    stats->last_gotc, stats->gotc);
5434	UPDATE_VF_REG(E1000_VFMPRC,
5435	    stats->last_mprc, stats->mprc);
5436}
5437
5438/* Export a single 32-bit register via a read-only sysctl. */
5439static int
5440igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5441{
5442	struct adapter *adapter;
5443	u_int val;
5444
5445	adapter = oidp->oid_arg1;
5446	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5447	return (sysctl_handle_int(oidp, &val, 0, req));
5448}
5449
5450/*
5451**  Tuneable interrupt rate handler
5452*/
5453static int
5454igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5455{
5456	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5457	int			error;
5458	u32			reg, usec, rate;
5459
5460	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5461	usec = ((reg & 0x7FFC) >> 2);
5462	if (usec > 0)
5463		rate = 1000000 / usec;
5464	else
5465		rate = 0;
5466	error = sysctl_handle_int(oidp, &rate, 0, req);
5467	if (error || !req->newptr)
5468		return error;
5469	return 0;
5470}
5471
5472/*
5473 * Add sysctl variables, one per statistic, to the system.
5474 */
5475static void
5476igb_add_hw_stats(struct adapter *adapter)
5477{
5478	device_t dev = adapter->dev;
5479
5480	struct tx_ring *txr = adapter->tx_rings;
5481	struct rx_ring *rxr = adapter->rx_rings;
5482
5483	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5484	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5485	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5486	struct e1000_hw_stats *stats = adapter->stats;
5487
5488	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5489	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5490
5491#define QUEUE_NAME_LEN 32
5492	char namebuf[QUEUE_NAME_LEN];
5493
5494	/* Driver Statistics */
5495	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5496			CTLFLAG_RD, &adapter->link_irq, 0,
5497			"Link MSIX IRQ Handled");
5498	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5499			CTLFLAG_RD, &adapter->dropped_pkts,
5500			"Driver dropped packets");
5501	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5502			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5503			"Driver tx dma failure in xmit");
5504	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5505			CTLFLAG_RD, &adapter->rx_overruns,
5506			"RX overruns");
5507	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5508			CTLFLAG_RD, &adapter->watchdog_events,
5509			"Watchdog timeouts");
5510
5511	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5512			CTLFLAG_RD, &adapter->device_control,
5513			"Device Control Register");
5514	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5515			CTLFLAG_RD, &adapter->rx_control,
5516			"Receiver Control Register");
5517	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5518			CTLFLAG_RD, &adapter->int_mask,
5519			"Interrupt Mask");
5520	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5521			CTLFLAG_RD, &adapter->eint_mask,
5522			"Extended Interrupt Mask");
5523	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5524			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5525			"Transmit Buffer Packet Allocation");
5526	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5527			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5528			"Receive Buffer Packet Allocation");
5529	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5530			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5531			"Flow Control High Watermark");
5532	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5533			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5534			"Flow Control Low Watermark");
5535
5536	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5537		struct lro_ctrl *lro = &rxr->lro;
5538
5539		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5540		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5541					    CTLFLAG_RD, NULL, "Queue Name");
5542		queue_list = SYSCTL_CHILDREN(queue_node);
5543
5544		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5545				CTLFLAG_RD, &adapter->queues[i],
5546				sizeof(&adapter->queues[i]),
5547				igb_sysctl_interrupt_rate_handler,
5548				"IU", "Interrupt Rate");
5549
5550		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5551				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5552				igb_sysctl_reg_handler, "IU",
5553 				"Transmit Descriptor Head");
5554		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5555				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5556				igb_sysctl_reg_handler, "IU",
5557 				"Transmit Descriptor Tail");
5558		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5559				CTLFLAG_RD, &txr->no_desc_avail,
5560				"Queue No Descriptor Available");
5561		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5562				CTLFLAG_RD, &txr->tx_packets,
5563				"Queue Packets Transmitted");
5564
5565		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5566				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5567				igb_sysctl_reg_handler, "IU",
5568				"Receive Descriptor Head");
5569		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5570				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5571				igb_sysctl_reg_handler, "IU",
5572				"Receive Descriptor Tail");
5573		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5574				CTLFLAG_RD, &rxr->rx_packets,
5575				"Queue Packets Received");
5576		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5577				CTLFLAG_RD, &rxr->rx_bytes,
5578				"Queue Bytes Received");
5579		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5580				CTLFLAG_RD, &lro->lro_queued, 0,
5581				"LRO Queued");
5582		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5583				CTLFLAG_RD, &lro->lro_flushed, 0,
5584				"LRO Flushed");
5585	}
5586
5587	/* MAC stats get their own sub node */
5588
5589	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5590				    CTLFLAG_RD, NULL, "MAC Statistics");
5591	stat_list = SYSCTL_CHILDREN(stat_node);
5592
5593	/*
5594	** VF adapter has a very limited set of stats
5595	** since its not managing the metal, so to speak.
5596	*/
5597	if (adapter->vf_ifp) {
5598	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5599			CTLFLAG_RD, &stats->gprc,
5600			"Good Packets Received");
5601	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5602			CTLFLAG_RD, &stats->gptc,
5603			"Good Packets Transmitted");
5604 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5605 			CTLFLAG_RD, &stats->gorc,
5606 			"Good Octets Received");
5607 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5608 			CTLFLAG_RD, &stats->gotc,
5609 			"Good Octets Transmitted");
5610	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5611			CTLFLAG_RD, &stats->mprc,
5612			"Multicast Packets Received");
5613		return;
5614	}
5615
5616	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5617			CTLFLAG_RD, &stats->ecol,
5618			"Excessive collisions");
5619	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5620			CTLFLAG_RD, &stats->scc,
5621			"Single collisions");
5622	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5623			CTLFLAG_RD, &stats->mcc,
5624			"Multiple collisions");
5625	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5626			CTLFLAG_RD, &stats->latecol,
5627			"Late collisions");
5628	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5629			CTLFLAG_RD, &stats->colc,
5630			"Collision Count");
5631	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5632			CTLFLAG_RD, &stats->symerrs,
5633			"Symbol Errors");
5634	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5635			CTLFLAG_RD, &stats->sec,
5636			"Sequence Errors");
5637	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5638			CTLFLAG_RD, &stats->dc,
5639			"Defer Count");
5640	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5641			CTLFLAG_RD, &stats->mpc,
5642			"Missed Packets");
5643	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5644			CTLFLAG_RD, &stats->rnbc,
5645			"Receive No Buffers");
5646	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5647			CTLFLAG_RD, &stats->ruc,
5648			"Receive Undersize");
5649	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5650			CTLFLAG_RD, &stats->rfc,
5651			"Fragmented Packets Received ");
5652	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5653			CTLFLAG_RD, &stats->roc,
5654			"Oversized Packets Received");
5655	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5656			CTLFLAG_RD, &stats->rjc,
5657			"Recevied Jabber");
5658	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5659			CTLFLAG_RD, &stats->rxerrc,
5660			"Receive Errors");
5661	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5662			CTLFLAG_RD, &stats->crcerrs,
5663			"CRC errors");
5664	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5665			CTLFLAG_RD, &stats->algnerrc,
5666			"Alignment Errors");
5667	/* On 82575 these are collision counts */
5668	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5669			CTLFLAG_RD, &stats->cexterr,
5670			"Collision/Carrier extension errors");
5671	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5672			CTLFLAG_RD, &stats->xonrxc,
5673			"XON Received");
5674	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5675			CTLFLAG_RD, &stats->xontxc,
5676			"XON Transmitted");
5677	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5678			CTLFLAG_RD, &stats->xoffrxc,
5679			"XOFF Received");
5680	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5681			CTLFLAG_RD, &stats->xofftxc,
5682			"XOFF Transmitted");
5683	/* Packet Reception Stats */
5684	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5685			CTLFLAG_RD, &stats->tpr,
5686			"Total Packets Received ");
5687	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5688			CTLFLAG_RD, &stats->gprc,
5689			"Good Packets Received");
5690	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5691			CTLFLAG_RD, &stats->bprc,
5692			"Broadcast Packets Received");
5693	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5694			CTLFLAG_RD, &stats->mprc,
5695			"Multicast Packets Received");
5696	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5697			CTLFLAG_RD, &stats->prc64,
5698			"64 byte frames received ");
5699	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5700			CTLFLAG_RD, &stats->prc127,
5701			"65-127 byte frames received");
5702	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5703			CTLFLAG_RD, &stats->prc255,
5704			"128-255 byte frames received");
5705	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5706			CTLFLAG_RD, &stats->prc511,
5707			"256-511 byte frames received");
5708	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5709			CTLFLAG_RD, &stats->prc1023,
5710			"512-1023 byte frames received");
5711	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5712			CTLFLAG_RD, &stats->prc1522,
5713			"1023-1522 byte frames received");
5714 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5715 			CTLFLAG_RD, &stats->gorc,
5716 			"Good Octets Received");
5717
5718	/* Packet Transmission Stats */
5719 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5720 			CTLFLAG_RD, &stats->gotc,
5721 			"Good Octets Transmitted");
5722	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5723			CTLFLAG_RD, &stats->tpt,
5724			"Total Packets Transmitted");
5725	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5726			CTLFLAG_RD, &stats->gptc,
5727			"Good Packets Transmitted");
5728	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5729			CTLFLAG_RD, &stats->bptc,
5730			"Broadcast Packets Transmitted");
5731	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5732			CTLFLAG_RD, &stats->mptc,
5733			"Multicast Packets Transmitted");
5734	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5735			CTLFLAG_RD, &stats->ptc64,
5736			"64 byte frames transmitted ");
5737	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5738			CTLFLAG_RD, &stats->ptc127,
5739			"65-127 byte frames transmitted");
5740	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5741			CTLFLAG_RD, &stats->ptc255,
5742			"128-255 byte frames transmitted");
5743	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5744			CTLFLAG_RD, &stats->ptc511,
5745			"256-511 byte frames transmitted");
5746	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5747			CTLFLAG_RD, &stats->ptc1023,
5748			"512-1023 byte frames transmitted");
5749	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5750			CTLFLAG_RD, &stats->ptc1522,
5751			"1024-1522 byte frames transmitted");
5752	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5753			CTLFLAG_RD, &stats->tsctc,
5754			"TSO Contexts Transmitted");
5755	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5756			CTLFLAG_RD, &stats->tsctfc,
5757			"TSO Contexts Failed");
5758
5759
5760	/* Interrupt Stats */
5761
5762	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5763				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5764	int_list = SYSCTL_CHILDREN(int_node);
5765
5766	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5767			CTLFLAG_RD, &stats->iac,
5768			"Interrupt Assertion Count");
5769
5770	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5771			CTLFLAG_RD, &stats->icrxptc,
5772			"Interrupt Cause Rx Pkt Timer Expire Count");
5773
5774	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5775			CTLFLAG_RD, &stats->icrxatc,
5776			"Interrupt Cause Rx Abs Timer Expire Count");
5777
5778	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5779			CTLFLAG_RD, &stats->ictxptc,
5780			"Interrupt Cause Tx Pkt Timer Expire Count");
5781
5782	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5783			CTLFLAG_RD, &stats->ictxatc,
5784			"Interrupt Cause Tx Abs Timer Expire Count");
5785
5786	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5787			CTLFLAG_RD, &stats->ictxqec,
5788			"Interrupt Cause Tx Queue Empty Count");
5789
5790	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5791			CTLFLAG_RD, &stats->ictxqmtc,
5792			"Interrupt Cause Tx Queue Min Thresh Count");
5793
5794	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5795			CTLFLAG_RD, &stats->icrxdmtc,
5796			"Interrupt Cause Rx Desc Min Thresh Count");
5797
5798	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5799			CTLFLAG_RD, &stats->icrxoc,
5800			"Interrupt Cause Receiver Overrun Count");
5801
5802	/* Host to Card Stats */
5803
5804	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5805				    CTLFLAG_RD, NULL,
5806				    "Host to Card Statistics");
5807
5808	host_list = SYSCTL_CHILDREN(host_node);
5809
5810	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5811			CTLFLAG_RD, &stats->cbtmpc,
5812			"Circuit Breaker Tx Packet Count");
5813
5814	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5815			CTLFLAG_RD, &stats->htdpmc,
5816			"Host Transmit Discarded Packets");
5817
5818	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5819			CTLFLAG_RD, &stats->rpthc,
5820			"Rx Packets To Host");
5821
5822	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5823			CTLFLAG_RD, &stats->cbrmpc,
5824			"Circuit Breaker Rx Packet Count");
5825
5826	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5827			CTLFLAG_RD, &stats->cbrdpc,
5828			"Circuit Breaker Rx Dropped Count");
5829
5830	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5831			CTLFLAG_RD, &stats->hgptc,
5832			"Host Good Packets Tx Count");
5833
5834	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5835			CTLFLAG_RD, &stats->htcbdpc,
5836			"Host Tx Circuit Breaker Dropped Count");
5837
5838	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5839			CTLFLAG_RD, &stats->hgorc,
5840			"Host Good Octets Received Count");
5841
5842	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5843			CTLFLAG_RD, &stats->hgotc,
5844			"Host Good Octets Transmit Count");
5845
5846	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5847			CTLFLAG_RD, &stats->lenerrs,
5848			"Length Errors");
5849
5850	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5851			CTLFLAG_RD, &stats->scvpc,
5852			"SerDes/SGMII Code Violation Pkt Count");
5853
5854	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5855			CTLFLAG_RD, &stats->hrmpc,
5856			"Header Redirection Missed Packet Count");
5857}
5858
5859
5860/**********************************************************************
5861 *
5862 *  This routine provides a way to dump out the adapter eeprom,
5863 *  often a useful debug/service tool. This only dumps the first
5864 *  32 words, stuff that matters is in that extent.
5865 *
5866 **********************************************************************/
5867static int
5868igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5869{
5870	struct adapter *adapter;
5871	int error;
5872	int result;
5873
5874	result = -1;
5875	error = sysctl_handle_int(oidp, &result, 0, req);
5876
5877	if (error || !req->newptr)
5878		return (error);
5879
5880	/*
5881	 * This value will cause a hex dump of the
5882	 * first 32 16-bit words of the EEPROM to
5883	 * the screen.
5884	 */
5885	if (result == 1) {
5886		adapter = (struct adapter *)arg1;
5887		igb_print_nvm_info(adapter);
5888        }
5889
5890	return (error);
5891}
5892
5893static void
5894igb_print_nvm_info(struct adapter *adapter)
5895{
5896	u16	eeprom_data;
5897	int	i, j, row = 0;
5898
5899	/* Its a bit crude, but it gets the job done */
5900	printf("\nInterface EEPROM Dump:\n");
5901	printf("Offset\n0x0000  ");
5902	for (i = 0, j = 0; i < 32; i++, j++) {
5903		if (j == 8) { /* Make the offset block */
5904			j = 0; ++row;
5905			printf("\n0x00%x0  ",row);
5906		}
5907		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5908		printf("%04x ", eeprom_data);
5909	}
5910	printf("\n");
5911}
5912
5913static void
5914igb_set_sysctl_value(struct adapter *adapter, const char *name,
5915	const char *description, int *limit, int value)
5916{
5917	*limit = value;
5918	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5919	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5920	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5921}
5922
5923/*
5924** Set flow control using sysctl:
5925** Flow control values:
5926** 	0 - off
5927**	1 - rx pause
5928**	2 - tx pause
5929**	3 - full
5930*/
5931static int
5932igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5933{
5934	int		error;
5935	static int	input = 3; /* default is full */
5936	struct adapter	*adapter = (struct adapter *) arg1;
5937
5938	error = sysctl_handle_int(oidp, &input, 0, req);
5939
5940	if ((error) || (req->newptr == NULL))
5941		return (error);
5942
5943	switch (input) {
5944		case e1000_fc_rx_pause:
5945		case e1000_fc_tx_pause:
5946		case e1000_fc_full:
5947		case e1000_fc_none:
5948			adapter->hw.fc.requested_mode = input;
5949			adapter->fc = input;
5950			break;
5951		default:
5952			/* Do nothing */
5953			return (error);
5954	}
5955
5956	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5957	e1000_force_mac_fc(&adapter->hw);
5958	return (error);
5959}
5960
5961/*
5962** Manage DMA Coalesce:
5963** Control values:
5964** 	0/1 - off/on
5965**	Legal timer values are:
5966**	250,500,1000-10000 in thousands
5967*/
5968static int
5969igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5970{
5971	struct adapter *adapter = (struct adapter *) arg1;
5972	int		error;
5973
5974	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5975
5976	if ((error) || (req->newptr == NULL))
5977		return (error);
5978
5979	switch (adapter->dmac) {
5980		case 0:
5981			/*Disabling */
5982			break;
5983		case 1: /* Just enable and use default */
5984			adapter->dmac = 1000;
5985			break;
5986		case 250:
5987		case 500:
5988		case 1000:
5989		case 2000:
5990		case 3000:
5991		case 4000:
5992		case 5000:
5993		case 6000:
5994		case 7000:
5995		case 8000:
5996		case 9000:
5997		case 10000:
5998			/* Legal values - allow */
5999			break;
6000		default:
6001			/* Do nothing, illegal value */
6002			adapter->dmac = 0;
6003			return (error);
6004	}
6005	/* Reinit the interface */
6006	igb_init(adapter);
6007	return (error);
6008}
6009
6010/*
6011** Manage Energy Efficient Ethernet:
6012** Control values:
6013**     0/1 - enabled/disabled
6014*/
6015static int
6016igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6017{
6018	struct adapter	*adapter = (struct adapter *) arg1;
6019	int		error, value;
6020
6021	value = adapter->hw.dev_spec._82575.eee_disable;
6022	error = sysctl_handle_int(oidp, &value, 0, req);
6023	if (error || req->newptr == NULL)
6024		return (error);
6025	IGB_CORE_LOCK(adapter);
6026	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6027	igb_init_locked(adapter);
6028	IGB_CORE_UNLOCK(adapter);
6029	return (0);
6030}
6031