if_igb.c revision 254264
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 254264 2013-08-13 00:25:39Z jfv $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#ifdef HAVE_KERNEL_OPTION_HEADERS
40#include "opt_device_polling.h"
41#include "opt_altq.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifndef IGB_LEGACY_TX
47#include <sys/buf_ring.h>
48#endif
49#include <sys/bus.h>
50#include <sys/endian.h>
51#include <sys/kernel.h>
52#include <sys/kthread.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/rman.h>
57#include <sys/socket.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/taskqueue.h>
61#include <sys/eventhandler.h>
62#include <sys/pcpu.h>
63#include <sys/smp.h>
64#include <machine/smp.h>
65#include <machine/bus.h>
66#include <machine/resource.h>
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_dl.h>
73#include <net/if_media.h>
74
75#include <net/if_types.h>
76#include <net/if_vlan_var.h>
77
78#include <netinet/in_systm.h>
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#include <netinet/ip.h>
82#include <netinet/ip6.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_lro.h>
85#include <netinet/udp.h>
86
87#include <machine/in_cksum.h>
88#include <dev/led/led.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 2.3.10";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134						PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148						PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	/* required last entry */
163	{ 0, 0, 0, 0, 0}
164};
165
166/*********************************************************************
167 *  Table of branding strings for all supported NICs.
168 *********************************************************************/
169
170static char *igb_strings[] = {
171	"Intel(R) PRO/1000 Network Connection"
172};
173
174/*********************************************************************
175 *  Function prototypes
176 *********************************************************************/
177static int	igb_probe(device_t);
178static int	igb_attach(device_t);
179static int	igb_detach(device_t);
180static int	igb_shutdown(device_t);
181static int	igb_suspend(device_t);
182static int	igb_resume(device_t);
183#ifndef IGB_LEGACY_TX
184static int	igb_mq_start(struct ifnet *, struct mbuf *);
185static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282#ifdef DEVICE_POLLING
283static poll_handler_t igb_poll;
284#endif /* POLLING */
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t igb_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, igb_probe),
293	DEVMETHOD(device_attach, igb_attach),
294	DEVMETHOD(device_detach, igb_detach),
295	DEVMETHOD(device_shutdown, igb_shutdown),
296	DEVMETHOD(device_suspend, igb_suspend),
297	DEVMETHOD(device_resume, igb_resume),
298	DEVMETHOD_END
299};
300
301static driver_t igb_driver = {
302	"igb", igb_methods, sizeof(struct adapter),
303};
304
305static devclass_t igb_devclass;
306DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307MODULE_DEPEND(igb, pci, 1, 1, 1);
308MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316/* Descriptor defaults */
317static int igb_rxd = IGB_DEFAULT_RXD;
318static int igb_txd = IGB_DEFAULT_TXD;
319TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320TUNABLE_INT("hw.igb.txd", &igb_txd);
321SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322    "Number of receive descriptors per queue");
323SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324    "Number of transmit descriptors per queue");
325
326/*
327** AIM: Adaptive Interrupt Moderation
328** which means that the interrupt rate
329** is varied over time based on the
330** traffic for that interrupt vector
331*/
332static int igb_enable_aim = TRUE;
333TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335    "Enable adaptive interrupt moderation");
336
337/*
338 * MSIX should be the default for best performance,
339 * but this allows it to be forced off for testing.
340 */
341static int igb_enable_msix = 1;
342TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344    "Enable MSI-X interrupts");
345
346/*
347** Tuneable Interrupt rate
348*/
349static int igb_max_interrupt_rate = 8000;
350TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354#if __FreeBSD_version >= 800000
355/*
356** Tuneable number of buffers in the buf-ring (drbr_xxx)
357*/
358static int igb_buf_ring_size = IGB_BR_SIZE;
359TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
360SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
361    &igb_buf_ring_size, 0, "Size of the bufring");
362#endif
363
364/*
365** Header split causes the packet header to
366** be dma'd to a seperate mbuf from the payload.
367** this can have memory alignment benefits. But
368** another plus is that small packets often fit
369** into the header and thus use no cluster. Its
370** a very workload dependent type feature.
371*/
372static int igb_header_split = FALSE;
373TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
374SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
375    "Enable receive mbuf header split");
376
377/*
378** This will autoconfigure based on the
379** number of CPUs and max supported
380** MSIX messages if left at 0.
381*/
382static int igb_num_queues = 0;
383TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
384SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
385    "Number of queues to configure, 0 indicates autoconfigure");
386
387/*
388** Global variable to store last used CPU when binding queues
389** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
390** queue is bound to a cpu.
391*/
392static int igb_last_bind_cpu = -1;
393
394/* How many packets rxeof tries to clean at a time */
395static int igb_rx_process_limit = 100;
396TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
397SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398    &igb_rx_process_limit, 0,
399    "Maximum number of received packets to process at a time, -1 means unlimited");
400
401#ifdef DEV_NETMAP	/* see ixgbe.c for details */
402#include <dev/netmap/if_igb_netmap.h>
403#endif /* DEV_NETMAP */
404/*********************************************************************
405 *  Device identification routine
406 *
407 *  igb_probe determines if the driver should be loaded on
408 *  adapter based on PCI vendor/device id of the adapter.
409 *
410 *  return BUS_PROBE_DEFAULT on success, positive on failure
411 *********************************************************************/
412
413static int
414igb_probe(device_t dev)
415{
416	char		adapter_name[60];
417	uint16_t	pci_vendor_id = 0;
418	uint16_t	pci_device_id = 0;
419	uint16_t	pci_subvendor_id = 0;
420	uint16_t	pci_subdevice_id = 0;
421	igb_vendor_info_t *ent;
422
423	INIT_DEBUGOUT("igb_probe: begin");
424
425	pci_vendor_id = pci_get_vendor(dev);
426	if (pci_vendor_id != IGB_VENDOR_ID)
427		return (ENXIO);
428
429	pci_device_id = pci_get_device(dev);
430	pci_subvendor_id = pci_get_subvendor(dev);
431	pci_subdevice_id = pci_get_subdevice(dev);
432
433	ent = igb_vendor_info_array;
434	while (ent->vendor_id != 0) {
435		if ((pci_vendor_id == ent->vendor_id) &&
436		    (pci_device_id == ent->device_id) &&
437
438		    ((pci_subvendor_id == ent->subvendor_id) ||
439		    (ent->subvendor_id == PCI_ANY_ID)) &&
440
441		    ((pci_subdevice_id == ent->subdevice_id) ||
442		    (ent->subdevice_id == PCI_ANY_ID))) {
443			sprintf(adapter_name, "%s %s",
444				igb_strings[ent->index],
445				igb_driver_version);
446			device_set_desc_copy(dev, adapter_name);
447			return (BUS_PROBE_DEFAULT);
448		}
449		ent++;
450	}
451
452	return (ENXIO);
453}
454
455/*********************************************************************
456 *  Device initialization routine
457 *
458 *  The attach entry point is called when the driver is being loaded.
459 *  This routine identifies the type of hardware, allocates all resources
460 *  and initializes the hardware.
461 *
462 *  return 0 on success, positive on failure
463 *********************************************************************/
464
465static int
466igb_attach(device_t dev)
467{
468	struct adapter	*adapter;
469	int		error = 0;
470	u16		eeprom_data;
471
472	INIT_DEBUGOUT("igb_attach: begin");
473
474	if (resource_disabled("igb", device_get_unit(dev))) {
475		device_printf(dev, "Disabled by device hint\n");
476		return (ENXIO);
477	}
478
479	adapter = device_get_softc(dev);
480	adapter->dev = adapter->osdep.dev = dev;
481	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483	/* SYSCTL stuff */
484	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487	    igb_sysctl_nvm_info, "I", "NVM Information");
488
489	igb_set_sysctl_value(adapter, "enable_aim",
490	    "Interrupt Moderation", &adapter->enable_aim,
491	    igb_enable_aim);
492
493	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
494	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
495	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
496	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
497
498	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
499
500	/* Determine hardware and mac info */
501	igb_identify_hardware(adapter);
502
503	/* Setup PCI resources */
504	if (igb_allocate_pci_resources(adapter)) {
505		device_printf(dev, "Allocation of PCI resources failed\n");
506		error = ENXIO;
507		goto err_pci;
508	}
509
510	/* Do Shared Code initialization */
511	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
512		device_printf(dev, "Setup of Shared code failed\n");
513		error = ENXIO;
514		goto err_pci;
515	}
516
517	e1000_get_bus_info(&adapter->hw);
518
519	/* Sysctl for limiting the amount of work done in the taskqueue */
520	igb_set_sysctl_value(adapter, "rx_processing_limit",
521	    "max number of rx packets to process",
522	    &adapter->rx_process_limit, igb_rx_process_limit);
523
524	/*
525	 * Validate number of transmit and receive descriptors. It
526	 * must not exceed hardware maximum, and must be multiple
527	 * of E1000_DBA_ALIGN.
528	 */
529	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
530	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
531		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
532		    IGB_DEFAULT_TXD, igb_txd);
533		adapter->num_tx_desc = IGB_DEFAULT_TXD;
534	} else
535		adapter->num_tx_desc = igb_txd;
536	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
537	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
538		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
539		    IGB_DEFAULT_RXD, igb_rxd);
540		adapter->num_rx_desc = IGB_DEFAULT_RXD;
541	} else
542		adapter->num_rx_desc = igb_rxd;
543
544	adapter->hw.mac.autoneg = DO_AUTO_NEG;
545	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
546	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
547
548	/* Copper options */
549	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
550		adapter->hw.phy.mdix = AUTO_ALL_MODES;
551		adapter->hw.phy.disable_polarity_correction = FALSE;
552		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
553	}
554
555	/*
556	 * Set the frame limits assuming
557	 * standard ethernet sized frames.
558	 */
559	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
561
562	/*
563	** Allocate and Setup Queues
564	*/
565	if (igb_allocate_queues(adapter)) {
566		error = ENOMEM;
567		goto err_pci;
568	}
569
570	/* Allocate the appropriate stats memory */
571	if (adapter->vf_ifp) {
572		adapter->stats =
573		    (struct e1000_vf_stats *)malloc(sizeof \
574		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
575		igb_vf_init_stats(adapter);
576	} else
577		adapter->stats =
578		    (struct e1000_hw_stats *)malloc(sizeof \
579		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
580	if (adapter->stats == NULL) {
581		device_printf(dev, "Can not allocate stats memory\n");
582		error = ENOMEM;
583		goto err_late;
584	}
585
586	/* Allocate multicast array memory. */
587	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
588	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
589	if (adapter->mta == NULL) {
590		device_printf(dev, "Can not allocate multicast setup array\n");
591		error = ENOMEM;
592		goto err_late;
593	}
594
595	/* Some adapter-specific advanced features */
596	if (adapter->hw.mac.type >= e1000_i350) {
597		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
598		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
599		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
600		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
601		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
602		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
603		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
604		    adapter, 0, igb_sysctl_eee, "I",
605		    "Disable Energy Efficient Ethernet");
606		if (adapter->hw.phy.media_type == e1000_media_type_copper)
607			e1000_set_eee_i350(&adapter->hw);
608	}
609
610	/*
611	** Start from a known state, this is
612	** important in reading the nvm and
613	** mac from that.
614	*/
615	e1000_reset_hw(&adapter->hw);
616
617	/* Make sure we have a good EEPROM before we read from it */
618	if (((adapter->hw.mac.type != e1000_i210) &&
619	    (adapter->hw.mac.type != e1000_i211)) &&
620	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
621		/*
622		** Some PCI-E parts fail the first check due to
623		** the link being in sleep state, call it again,
624		** if it fails a second time its a real issue.
625		*/
626		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
627			device_printf(dev,
628			    "The EEPROM Checksum Is Not Valid\n");
629			error = EIO;
630			goto err_late;
631		}
632	}
633
634	/*
635	** Copy the permanent MAC address out of the EEPROM
636	*/
637	if (e1000_read_mac_addr(&adapter->hw) < 0) {
638		device_printf(dev, "EEPROM read error while reading MAC"
639		    " address\n");
640		error = EIO;
641		goto err_late;
642	}
643	/* Check its sanity */
644	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
645		device_printf(dev, "Invalid MAC address\n");
646		error = EIO;
647		goto err_late;
648	}
649
650	/* Setup OS specific network interface */
651	if (igb_setup_interface(dev, adapter) != 0)
652		goto err_late;
653
654	/* Now get a good starting state */
655	igb_reset(adapter);
656
657	/* Initialize statistics */
658	igb_update_stats_counters(adapter);
659
660	adapter->hw.mac.get_link_status = 1;
661	igb_update_link_status(adapter);
662
663	/* Indicate SOL/IDER usage */
664	if (e1000_check_reset_block(&adapter->hw))
665		device_printf(dev,
666		    "PHY reset is blocked due to SOL/IDER session.\n");
667
668	/* Determine if we have to control management hardware */
669	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
670
671	/*
672	 * Setup Wake-on-Lan
673	 */
674	/* APME bit in EEPROM is mapped to WUC.APME */
675	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
676	if (eeprom_data)
677		adapter->wol = E1000_WUFC_MAG;
678
679	/* Register for VLAN events */
680	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
681	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
683	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
684
685	igb_add_hw_stats(adapter);
686
687	/* Tell the stack that the interface is not active */
688	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
689	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
690
691	adapter->led_dev = led_create(igb_led_func, adapter,
692	    device_get_nameunit(dev));
693
694	/*
695	** Configure Interrupts
696	*/
697	if ((adapter->msix > 1) && (igb_enable_msix))
698		error = igb_allocate_msix(adapter);
699	else /* MSI or Legacy */
700		error = igb_allocate_legacy(adapter);
701	if (error)
702		goto err_late;
703
704#ifdef DEV_NETMAP
705	igb_netmap_attach(adapter);
706#endif /* DEV_NETMAP */
707	INIT_DEBUGOUT("igb_attach: end");
708
709	return (0);
710
711err_late:
712	igb_detach(dev);
713	igb_free_transmit_structures(adapter);
714	igb_free_receive_structures(adapter);
715	igb_release_hw_control(adapter);
716err_pci:
717	igb_free_pci_resources(adapter);
718	if (adapter->ifp != NULL)
719		if_free(adapter->ifp);
720	free(adapter->mta, M_DEVBUF);
721	IGB_CORE_LOCK_DESTROY(adapter);
722
723	return (error);
724}
725
726/*********************************************************************
727 *  Device removal routine
728 *
729 *  The detach entry point is called when the driver is being removed.
730 *  This routine stops the adapter and deallocates all the resources
731 *  that were allocated for driver operation.
732 *
733 *  return 0 on success, positive on failure
734 *********************************************************************/
735
736static int
737igb_detach(device_t dev)
738{
739	struct adapter	*adapter = device_get_softc(dev);
740	struct ifnet	*ifp = adapter->ifp;
741
742	INIT_DEBUGOUT("igb_detach: begin");
743
744	/* Make sure VLANS are not using driver */
745	if (adapter->ifp->if_vlantrunk != NULL) {
746		device_printf(dev,"Vlan in use, detach first\n");
747		return (EBUSY);
748	}
749
750	ether_ifdetach(adapter->ifp);
751
752	if (adapter->led_dev != NULL)
753		led_destroy(adapter->led_dev);
754
755#ifdef DEVICE_POLLING
756	if (ifp->if_capenable & IFCAP_POLLING)
757		ether_poll_deregister(ifp);
758#endif
759
760	IGB_CORE_LOCK(adapter);
761	adapter->in_detach = 1;
762	igb_stop(adapter);
763	IGB_CORE_UNLOCK(adapter);
764
765	e1000_phy_hw_reset(&adapter->hw);
766
767	/* Give control back to firmware */
768	igb_release_manageability(adapter);
769	igb_release_hw_control(adapter);
770
771	if (adapter->wol) {
772		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774		igb_enable_wakeup(dev);
775	}
776
777	/* Unregister VLAN events */
778	if (adapter->vlan_attach != NULL)
779		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
780	if (adapter->vlan_detach != NULL)
781		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
782
783	callout_drain(&adapter->timer);
784
785#ifdef DEV_NETMAP
786	netmap_detach(adapter->ifp);
787#endif /* DEV_NETMAP */
788	igb_free_pci_resources(adapter);
789	bus_generic_detach(dev);
790	if_free(ifp);
791
792	igb_free_transmit_structures(adapter);
793	igb_free_receive_structures(adapter);
794	if (adapter->mta != NULL)
795		free(adapter->mta, M_DEVBUF);
796
797	IGB_CORE_LOCK_DESTROY(adapter);
798
799	return (0);
800}
801
802/*********************************************************************
803 *
804 *  Shutdown entry point
805 *
806 **********************************************************************/
807
808static int
809igb_shutdown(device_t dev)
810{
811	return igb_suspend(dev);
812}
813
814/*
815 * Suspend/resume device methods.
816 */
817static int
818igb_suspend(device_t dev)
819{
820	struct adapter *adapter = device_get_softc(dev);
821
822	IGB_CORE_LOCK(adapter);
823
824	igb_stop(adapter);
825
826        igb_release_manageability(adapter);
827	igb_release_hw_control(adapter);
828
829        if (adapter->wol) {
830                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
831                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
832                igb_enable_wakeup(dev);
833        }
834
835	IGB_CORE_UNLOCK(adapter);
836
837	return bus_generic_suspend(dev);
838}
839
840static int
841igb_resume(device_t dev)
842{
843	struct adapter *adapter = device_get_softc(dev);
844	struct tx_ring	*txr = adapter->tx_rings;
845	struct ifnet *ifp = adapter->ifp;
846
847	IGB_CORE_LOCK(adapter);
848	igb_init_locked(adapter);
849	igb_init_manageability(adapter);
850
851	if ((ifp->if_flags & IFF_UP) &&
852	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
853		for (int i = 0; i < adapter->num_queues; i++, txr++) {
854			IGB_TX_LOCK(txr);
855#ifndef IGB_LEGACY_TX
856			/* Process the stack queue only if not depleted */
857			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
858			    !drbr_empty(ifp, txr->br))
859				igb_mq_start_locked(ifp, txr);
860#else
861			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
862				igb_start_locked(txr, ifp);
863#endif
864			IGB_TX_UNLOCK(txr);
865		}
866	}
867	IGB_CORE_UNLOCK(adapter);
868
869	return bus_generic_resume(dev);
870}
871
872
873#ifdef IGB_LEGACY_TX
874
875/*********************************************************************
876 *  Transmit entry point
877 *
878 *  igb_start is called by the stack to initiate a transmit.
879 *  The driver will remain in this routine as long as there are
880 *  packets to transmit and transmit resources are available.
881 *  In case resources are not available stack is notified and
882 *  the packet is requeued.
883 **********************************************************************/
884
885static void
886igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
887{
888	struct adapter	*adapter = ifp->if_softc;
889	struct mbuf	*m_head;
890
891	IGB_TX_LOCK_ASSERT(txr);
892
893	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
894	    IFF_DRV_RUNNING)
895		return;
896	if (!adapter->link_active)
897		return;
898
899	/* Call cleanup if number of TX descriptors low */
900	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
901		igb_txeof(txr);
902
903	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
904		if (txr->tx_avail <= IGB_MAX_SCATTER) {
905			txr->queue_status |= IGB_QUEUE_DEPLETED;
906			break;
907		}
908		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
909		if (m_head == NULL)
910			break;
911		/*
912		 *  Encapsulation can modify our pointer, and or make it
913		 *  NULL on failure.  In that event, we can't requeue.
914		 */
915		if (igb_xmit(txr, &m_head)) {
916			if (m_head != NULL)
917				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
918			if (txr->tx_avail <= IGB_MAX_SCATTER)
919				txr->queue_status |= IGB_QUEUE_DEPLETED;
920			break;
921		}
922
923		/* Send a copy of the frame to the BPF listener */
924		ETHER_BPF_MTAP(ifp, m_head);
925
926		/* Set watchdog on */
927		txr->watchdog_time = ticks;
928		txr->queue_status |= IGB_QUEUE_WORKING;
929	}
930}
931
932/*
933 * Legacy TX driver routine, called from the
934 * stack, always uses tx[0], and spins for it.
935 * Should not be used with multiqueue tx
936 */
937static void
938igb_start(struct ifnet *ifp)
939{
940	struct adapter	*adapter = ifp->if_softc;
941	struct tx_ring	*txr = adapter->tx_rings;
942
943	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
944		IGB_TX_LOCK(txr);
945		igb_start_locked(txr, ifp);
946		IGB_TX_UNLOCK(txr);
947	}
948	return;
949}
950
951#else /* ~IGB_LEGACY_TX */
952
953/*
954** Multiqueue Transmit Entry:
955**  quick turnaround to the stack
956**
957*/
958static int
959igb_mq_start(struct ifnet *ifp, struct mbuf *m)
960{
961	struct adapter		*adapter = ifp->if_softc;
962	struct igb_queue	*que;
963	struct tx_ring		*txr;
964	int 			i, err = 0;
965
966	/* Which queue to use */
967	if ((m->m_flags & M_FLOWID) != 0)
968		i = m->m_pkthdr.flowid % adapter->num_queues;
969	else
970		i = curcpu % adapter->num_queues;
971	txr = &adapter->tx_rings[i];
972	que = &adapter->queues[i];
973
974	err = drbr_enqueue(ifp, txr->br, m);
975	if (err)
976		return (err);
977	if (IGB_TX_TRYLOCK(txr)) {
978		err = igb_mq_start_locked(ifp, txr);
979		IGB_TX_UNLOCK(txr);
980	} else
981		taskqueue_enqueue(que->tq, &txr->txq_task);
982
983	return (err);
984}
985
986static int
987igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
988{
989	struct adapter  *adapter = txr->adapter;
990        struct mbuf     *next;
991        int             err = 0, enq;
992
993	IGB_TX_LOCK_ASSERT(txr);
994
995	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
996	    adapter->link_active == 0)
997		return (ENETDOWN);
998
999	enq = 0;
1000
1001	/* Process the queue */
1002	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1003		if ((err = igb_xmit(txr, &next)) != 0) {
1004			if (next == NULL) {
1005				/* It was freed, move forward */
1006				drbr_advance(ifp, txr->br);
1007			} else {
1008				/*
1009				 * Still have one left, it may not be
1010				 * the same since the transmit function
1011				 * may have changed it.
1012				 */
1013				drbr_putback(ifp, txr->br, next);
1014			}
1015			break;
1016		}
1017		drbr_advance(ifp, txr->br);
1018		enq++;
1019		ifp->if_obytes += next->m_pkthdr.len;
1020		if (next->m_flags & M_MCAST)
1021			ifp->if_omcasts++;
1022		ETHER_BPF_MTAP(ifp, next);
1023		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1024			break;
1025	}
1026	if (enq > 0) {
1027		/* Set the watchdog */
1028		txr->queue_status |= IGB_QUEUE_WORKING;
1029		txr->watchdog_time = ticks;
1030	}
1031	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1032		igb_txeof(txr);
1033	if (txr->tx_avail <= IGB_MAX_SCATTER)
1034		txr->queue_status |= IGB_QUEUE_DEPLETED;
1035	return (err);
1036}
1037
1038/*
1039 * Called from a taskqueue to drain queued transmit packets.
1040 */
1041static void
1042igb_deferred_mq_start(void *arg, int pending)
1043{
1044	struct tx_ring *txr = arg;
1045	struct adapter *adapter = txr->adapter;
1046	struct ifnet *ifp = adapter->ifp;
1047
1048	IGB_TX_LOCK(txr);
1049	if (!drbr_empty(ifp, txr->br))
1050		igb_mq_start_locked(ifp, txr);
1051	IGB_TX_UNLOCK(txr);
1052}
1053
1054/*
1055** Flush all ring buffers
1056*/
1057static void
1058igb_qflush(struct ifnet *ifp)
1059{
1060	struct adapter	*adapter = ifp->if_softc;
1061	struct tx_ring	*txr = adapter->tx_rings;
1062	struct mbuf	*m;
1063
1064	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1065		IGB_TX_LOCK(txr);
1066		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1067			m_freem(m);
1068		IGB_TX_UNLOCK(txr);
1069	}
1070	if_qflush(ifp);
1071}
1072#endif /* ~IGB_LEGACY_TX */
1073
1074/*********************************************************************
1075 *  Ioctl entry point
1076 *
1077 *  igb_ioctl is called when the user wants to configure the
1078 *  interface.
1079 *
1080 *  return 0 on success, positive on failure
1081 **********************************************************************/
1082
1083static int
1084igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1085{
1086	struct adapter	*adapter = ifp->if_softc;
1087	struct ifreq	*ifr = (struct ifreq *)data;
1088#if defined(INET) || defined(INET6)
1089	struct ifaddr	*ifa = (struct ifaddr *)data;
1090#endif
1091	bool		avoid_reset = FALSE;
1092	int		error = 0;
1093
1094	if (adapter->in_detach)
1095		return (error);
1096
1097	switch (command) {
1098	case SIOCSIFADDR:
1099#ifdef INET
1100		if (ifa->ifa_addr->sa_family == AF_INET)
1101			avoid_reset = TRUE;
1102#endif
1103#ifdef INET6
1104		if (ifa->ifa_addr->sa_family == AF_INET6)
1105			avoid_reset = TRUE;
1106#endif
1107		/*
1108		** Calling init results in link renegotiation,
1109		** so we avoid doing it when possible.
1110		*/
1111		if (avoid_reset) {
1112			ifp->if_flags |= IFF_UP;
1113			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1114				igb_init(adapter);
1115#ifdef INET
1116			if (!(ifp->if_flags & IFF_NOARP))
1117				arp_ifinit(ifp, ifa);
1118#endif
1119		} else
1120			error = ether_ioctl(ifp, command, data);
1121		break;
1122	case SIOCSIFMTU:
1123	    {
1124		int max_frame_size;
1125
1126		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1127
1128		IGB_CORE_LOCK(adapter);
1129		max_frame_size = 9234;
1130		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1131		    ETHER_CRC_LEN) {
1132			IGB_CORE_UNLOCK(adapter);
1133			error = EINVAL;
1134			break;
1135		}
1136
1137		ifp->if_mtu = ifr->ifr_mtu;
1138		adapter->max_frame_size =
1139		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1140		igb_init_locked(adapter);
1141		IGB_CORE_UNLOCK(adapter);
1142		break;
1143	    }
1144	case SIOCSIFFLAGS:
1145		IOCTL_DEBUGOUT("ioctl rcv'd:\
1146		    SIOCSIFFLAGS (Set Interface Flags)");
1147		IGB_CORE_LOCK(adapter);
1148		if (ifp->if_flags & IFF_UP) {
1149			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1150				if ((ifp->if_flags ^ adapter->if_flags) &
1151				    (IFF_PROMISC | IFF_ALLMULTI)) {
1152					igb_disable_promisc(adapter);
1153					igb_set_promisc(adapter);
1154				}
1155			} else
1156				igb_init_locked(adapter);
1157		} else
1158			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1159				igb_stop(adapter);
1160		adapter->if_flags = ifp->if_flags;
1161		IGB_CORE_UNLOCK(adapter);
1162		break;
1163	case SIOCADDMULTI:
1164	case SIOCDELMULTI:
1165		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1166		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1167			IGB_CORE_LOCK(adapter);
1168			igb_disable_intr(adapter);
1169			igb_set_multi(adapter);
1170#ifdef DEVICE_POLLING
1171			if (!(ifp->if_capenable & IFCAP_POLLING))
1172#endif
1173				igb_enable_intr(adapter);
1174			IGB_CORE_UNLOCK(adapter);
1175		}
1176		break;
1177	case SIOCSIFMEDIA:
1178		/* Check SOL/IDER usage */
1179		IGB_CORE_LOCK(adapter);
1180		if (e1000_check_reset_block(&adapter->hw)) {
1181			IGB_CORE_UNLOCK(adapter);
1182			device_printf(adapter->dev, "Media change is"
1183			    " blocked due to SOL/IDER session.\n");
1184			break;
1185		}
1186		IGB_CORE_UNLOCK(adapter);
1187	case SIOCGIFMEDIA:
1188		IOCTL_DEBUGOUT("ioctl rcv'd: \
1189		    SIOCxIFMEDIA (Get/Set Interface Media)");
1190		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1191		break;
1192	case SIOCSIFCAP:
1193	    {
1194		int mask, reinit;
1195
1196		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1197		reinit = 0;
1198		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1199#ifdef DEVICE_POLLING
1200		if (mask & IFCAP_POLLING) {
1201			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1202				error = ether_poll_register(igb_poll, ifp);
1203				if (error)
1204					return (error);
1205				IGB_CORE_LOCK(adapter);
1206				igb_disable_intr(adapter);
1207				ifp->if_capenable |= IFCAP_POLLING;
1208				IGB_CORE_UNLOCK(adapter);
1209			} else {
1210				error = ether_poll_deregister(ifp);
1211				/* Enable interrupt even in error case */
1212				IGB_CORE_LOCK(adapter);
1213				igb_enable_intr(adapter);
1214				ifp->if_capenable &= ~IFCAP_POLLING;
1215				IGB_CORE_UNLOCK(adapter);
1216			}
1217		}
1218#endif
1219		if (mask & IFCAP_HWCSUM) {
1220			ifp->if_capenable ^= IFCAP_HWCSUM;
1221			reinit = 1;
1222		}
1223		if (mask & IFCAP_TSO4) {
1224			ifp->if_capenable ^= IFCAP_TSO4;
1225			reinit = 1;
1226		}
1227		if (mask & IFCAP_VLAN_HWTAGGING) {
1228			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1229			reinit = 1;
1230		}
1231		if (mask & IFCAP_VLAN_HWFILTER) {
1232			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_VLAN_HWTSO) {
1236			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_LRO) {
1240			ifp->if_capenable ^= IFCAP_LRO;
1241			reinit = 1;
1242		}
1243		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1244			igb_init(adapter);
1245		VLAN_CAPABILITIES(ifp);
1246		break;
1247	    }
1248
1249	default:
1250		error = ether_ioctl(ifp, command, data);
1251		break;
1252	}
1253
1254	return (error);
1255}
1256
1257
1258/*********************************************************************
1259 *  Init entry point
1260 *
1261 *  This routine is used in two ways. It is used by the stack as
1262 *  init entry point in network interface structure. It is also used
1263 *  by the driver as a hw/sw initialization routine to get to a
1264 *  consistent state.
1265 *
1266 *  return 0 on success, positive on failure
1267 **********************************************************************/
1268
1269static void
1270igb_init_locked(struct adapter *adapter)
1271{
1272	struct ifnet	*ifp = adapter->ifp;
1273	device_t	dev = adapter->dev;
1274
1275	INIT_DEBUGOUT("igb_init: begin");
1276
1277	IGB_CORE_LOCK_ASSERT(adapter);
1278
1279	igb_disable_intr(adapter);
1280	callout_stop(&adapter->timer);
1281
1282	/* Get the latest mac address, User can use a LAA */
1283        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1284              ETHER_ADDR_LEN);
1285
1286	/* Put the address into the Receive Address Array */
1287	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1288
1289	igb_reset(adapter);
1290	igb_update_link_status(adapter);
1291
1292	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1293
1294	/* Set hardware offload abilities */
1295	ifp->if_hwassist = 0;
1296	if (ifp->if_capenable & IFCAP_TXCSUM) {
1297		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1298#if __FreeBSD_version >= 800000
1299		if (adapter->hw.mac.type == e1000_82576)
1300			ifp->if_hwassist |= CSUM_SCTP;
1301#endif
1302	}
1303
1304	if (ifp->if_capenable & IFCAP_TSO4)
1305		ifp->if_hwassist |= CSUM_TSO;
1306
1307	/* Configure for OS presence */
1308	igb_init_manageability(adapter);
1309
1310	/* Prepare transmit descriptors and buffers */
1311	igb_setup_transmit_structures(adapter);
1312	igb_initialize_transmit_units(adapter);
1313
1314	/* Setup Multicast table */
1315	igb_set_multi(adapter);
1316
1317	/*
1318	** Figure out the desired mbuf pool
1319	** for doing jumbo/packetsplit
1320	*/
1321	if (adapter->max_frame_size <= 2048)
1322		adapter->rx_mbuf_sz = MCLBYTES;
1323	else if (adapter->max_frame_size <= 4096)
1324		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1325	else
1326		adapter->rx_mbuf_sz = MJUM9BYTES;
1327
1328	/* Prepare receive descriptors and buffers */
1329	if (igb_setup_receive_structures(adapter)) {
1330		device_printf(dev, "Could not setup receive structures\n");
1331		return;
1332	}
1333	igb_initialize_receive_units(adapter);
1334
1335        /* Enable VLAN support */
1336	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1337		igb_setup_vlan_hw_support(adapter);
1338
1339	/* Don't lose promiscuous settings */
1340	igb_set_promisc(adapter);
1341
1342	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1343	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1344
1345	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1346	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1347
1348	if (adapter->msix > 1) /* Set up queue routing */
1349		igb_configure_queues(adapter);
1350
1351	/* this clears any pending interrupts */
1352	E1000_READ_REG(&adapter->hw, E1000_ICR);
1353#ifdef DEVICE_POLLING
1354	/*
1355	 * Only enable interrupts if we are not polling, make sure
1356	 * they are off otherwise.
1357	 */
1358	if (ifp->if_capenable & IFCAP_POLLING)
1359		igb_disable_intr(adapter);
1360	else
1361#endif /* DEVICE_POLLING */
1362	{
1363		igb_enable_intr(adapter);
1364		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1365	}
1366
1367	/* Set Energy Efficient Ethernet */
1368	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1369		e1000_set_eee_i350(&adapter->hw);
1370}
1371
1372static void
1373igb_init(void *arg)
1374{
1375	struct adapter *adapter = arg;
1376
1377	IGB_CORE_LOCK(adapter);
1378	igb_init_locked(adapter);
1379	IGB_CORE_UNLOCK(adapter);
1380}
1381
1382
1383static void
1384igb_handle_que(void *context, int pending)
1385{
1386	struct igb_queue *que = context;
1387	struct adapter *adapter = que->adapter;
1388	struct tx_ring *txr = que->txr;
1389	struct ifnet	*ifp = adapter->ifp;
1390
1391	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1392		bool	more;
1393
1394		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1395
1396		IGB_TX_LOCK(txr);
1397		igb_txeof(txr);
1398#ifndef IGB_LEGACY_TX
1399		/* Process the stack queue only if not depleted */
1400		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1401		    !drbr_empty(ifp, txr->br))
1402			igb_mq_start_locked(ifp, txr);
1403#else
1404		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1405			igb_start_locked(txr, ifp);
1406#endif
1407		IGB_TX_UNLOCK(txr);
1408		/* Do we need another? */
1409		if (more) {
1410			taskqueue_enqueue(que->tq, &que->que_task);
1411			return;
1412		}
1413	}
1414
1415#ifdef DEVICE_POLLING
1416	if (ifp->if_capenable & IFCAP_POLLING)
1417		return;
1418#endif
1419	/* Reenable this interrupt */
1420	if (que->eims)
1421		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1422	else
1423		igb_enable_intr(adapter);
1424}
1425
1426/* Deal with link in a sleepable context */
1427static void
1428igb_handle_link(void *context, int pending)
1429{
1430	struct adapter *adapter = context;
1431
1432	IGB_CORE_LOCK(adapter);
1433	igb_handle_link_locked(adapter);
1434	IGB_CORE_UNLOCK(adapter);
1435}
1436
1437static void
1438igb_handle_link_locked(struct adapter *adapter)
1439{
1440	struct tx_ring	*txr = adapter->tx_rings;
1441	struct ifnet *ifp = adapter->ifp;
1442
1443	IGB_CORE_LOCK_ASSERT(adapter);
1444	adapter->hw.mac.get_link_status = 1;
1445	igb_update_link_status(adapter);
1446	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1447		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1448			IGB_TX_LOCK(txr);
1449#ifndef IGB_LEGACY_TX
1450			/* Process the stack queue only if not depleted */
1451			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1452			    !drbr_empty(ifp, txr->br))
1453				igb_mq_start_locked(ifp, txr);
1454#else
1455			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1456				igb_start_locked(txr, ifp);
1457#endif
1458			IGB_TX_UNLOCK(txr);
1459		}
1460	}
1461}
1462
1463/*********************************************************************
1464 *
1465 *  MSI/Legacy Deferred
1466 *  Interrupt Service routine
1467 *
1468 *********************************************************************/
1469static int
1470igb_irq_fast(void *arg)
1471{
1472	struct adapter		*adapter = arg;
1473	struct igb_queue	*que = adapter->queues;
1474	u32			reg_icr;
1475
1476
1477	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1478
1479	/* Hot eject?  */
1480	if (reg_icr == 0xffffffff)
1481		return FILTER_STRAY;
1482
1483	/* Definitely not our interrupt.  */
1484	if (reg_icr == 0x0)
1485		return FILTER_STRAY;
1486
1487	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1488		return FILTER_STRAY;
1489
1490	/*
1491	 * Mask interrupts until the taskqueue is finished running.  This is
1492	 * cheap, just assume that it is needed.  This also works around the
1493	 * MSI message reordering errata on certain systems.
1494	 */
1495	igb_disable_intr(adapter);
1496	taskqueue_enqueue(que->tq, &que->que_task);
1497
1498	/* Link status change */
1499	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1500		taskqueue_enqueue(que->tq, &adapter->link_task);
1501
1502	if (reg_icr & E1000_ICR_RXO)
1503		adapter->rx_overruns++;
1504	return FILTER_HANDLED;
1505}
1506
1507#ifdef DEVICE_POLLING
1508#if __FreeBSD_version >= 800000
1509#define POLL_RETURN_COUNT(a) (a)
1510static int
1511#else
1512#define POLL_RETURN_COUNT(a)
1513static void
1514#endif
1515igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1516{
1517	struct adapter		*adapter = ifp->if_softc;
1518	struct igb_queue	*que;
1519	struct tx_ring		*txr;
1520	u32			reg_icr, rx_done = 0;
1521	u32			loop = IGB_MAX_LOOP;
1522	bool			more;
1523
1524	IGB_CORE_LOCK(adapter);
1525	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1526		IGB_CORE_UNLOCK(adapter);
1527		return POLL_RETURN_COUNT(rx_done);
1528	}
1529
1530	if (cmd == POLL_AND_CHECK_STATUS) {
1531		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1532		/* Link status change */
1533		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1534			igb_handle_link_locked(adapter);
1535
1536		if (reg_icr & E1000_ICR_RXO)
1537			adapter->rx_overruns++;
1538	}
1539	IGB_CORE_UNLOCK(adapter);
1540
1541	for (int i = 0; i < adapter->num_queues; i++) {
1542		que = &adapter->queues[i];
1543		txr = que->txr;
1544
1545		igb_rxeof(que, count, &rx_done);
1546
1547		IGB_TX_LOCK(txr);
1548		do {
1549			more = igb_txeof(txr);
1550		} while (loop-- && more);
1551#ifndef IGB_LEGACY_TX
1552		if (!drbr_empty(ifp, txr->br))
1553			igb_mq_start_locked(ifp, txr);
1554#else
1555		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1556			igb_start_locked(txr, ifp);
1557#endif
1558		IGB_TX_UNLOCK(txr);
1559	}
1560
1561	return POLL_RETURN_COUNT(rx_done);
1562}
1563#endif /* DEVICE_POLLING */
1564
1565/*********************************************************************
1566 *
1567 *  MSIX Que Interrupt Service routine
1568 *
1569 **********************************************************************/
1570static void
1571igb_msix_que(void *arg)
1572{
1573	struct igb_queue *que = arg;
1574	struct adapter *adapter = que->adapter;
1575	struct ifnet   *ifp = adapter->ifp;
1576	struct tx_ring *txr = que->txr;
1577	struct rx_ring *rxr = que->rxr;
1578	u32		newitr = 0;
1579	bool		more_rx;
1580
1581	/* Ignore spurious interrupts */
1582	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1583		return;
1584
1585	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1586	++que->irqs;
1587
1588	IGB_TX_LOCK(txr);
1589	igb_txeof(txr);
1590#ifndef IGB_LEGACY_TX
1591	/* Process the stack queue only if not depleted */
1592	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1593	    !drbr_empty(ifp, txr->br))
1594		igb_mq_start_locked(ifp, txr);
1595#else
1596	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1597		igb_start_locked(txr, ifp);
1598#endif
1599	IGB_TX_UNLOCK(txr);
1600
1601	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1602
1603	if (adapter->enable_aim == FALSE)
1604		goto no_calc;
1605	/*
1606	** Do Adaptive Interrupt Moderation:
1607        **  - Write out last calculated setting
1608	**  - Calculate based on average size over
1609	**    the last interval.
1610	*/
1611        if (que->eitr_setting)
1612                E1000_WRITE_REG(&adapter->hw,
1613                    E1000_EITR(que->msix), que->eitr_setting);
1614
1615        que->eitr_setting = 0;
1616
1617        /* Idle, do nothing */
1618        if ((txr->bytes == 0) && (rxr->bytes == 0))
1619                goto no_calc;
1620
1621        /* Used half Default if sub-gig */
1622        if (adapter->link_speed != 1000)
1623                newitr = IGB_DEFAULT_ITR / 2;
1624        else {
1625		if ((txr->bytes) && (txr->packets))
1626                	newitr = txr->bytes/txr->packets;
1627		if ((rxr->bytes) && (rxr->packets))
1628			newitr = max(newitr,
1629			    (rxr->bytes / rxr->packets));
1630                newitr += 24; /* account for hardware frame, crc */
1631		/* set an upper boundary */
1632		newitr = min(newitr, 3000);
1633		/* Be nice to the mid range */
1634                if ((newitr > 300) && (newitr < 1200))
1635                        newitr = (newitr / 3);
1636                else
1637                        newitr = (newitr / 2);
1638        }
1639        newitr &= 0x7FFC;  /* Mask invalid bits */
1640        if (adapter->hw.mac.type == e1000_82575)
1641                newitr |= newitr << 16;
1642        else
1643                newitr |= E1000_EITR_CNT_IGNR;
1644
1645        /* save for next interrupt */
1646        que->eitr_setting = newitr;
1647
1648        /* Reset state */
1649        txr->bytes = 0;
1650        txr->packets = 0;
1651        rxr->bytes = 0;
1652        rxr->packets = 0;
1653
1654no_calc:
1655	/* Schedule a clean task if needed*/
1656	if (more_rx)
1657		taskqueue_enqueue(que->tq, &que->que_task);
1658	else
1659		/* Reenable this interrupt */
1660		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1661	return;
1662}
1663
1664
1665/*********************************************************************
1666 *
1667 *  MSIX Link Interrupt Service routine
1668 *
1669 **********************************************************************/
1670
1671static void
1672igb_msix_link(void *arg)
1673{
1674	struct adapter	*adapter = arg;
1675	u32       	icr;
1676
1677	++adapter->link_irq;
1678	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1679	if (!(icr & E1000_ICR_LSC))
1680		goto spurious;
1681	igb_handle_link(adapter, 0);
1682
1683spurious:
1684	/* Rearm */
1685	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1686	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1687	return;
1688}
1689
1690
1691/*********************************************************************
1692 *
1693 *  Media Ioctl callback
1694 *
1695 *  This routine is called whenever the user queries the status of
1696 *  the interface using ifconfig.
1697 *
1698 **********************************************************************/
1699static void
1700igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1701{
1702	struct adapter *adapter = ifp->if_softc;
1703
1704	INIT_DEBUGOUT("igb_media_status: begin");
1705
1706	IGB_CORE_LOCK(adapter);
1707	igb_update_link_status(adapter);
1708
1709	ifmr->ifm_status = IFM_AVALID;
1710	ifmr->ifm_active = IFM_ETHER;
1711
1712	if (!adapter->link_active) {
1713		IGB_CORE_UNLOCK(adapter);
1714		return;
1715	}
1716
1717	ifmr->ifm_status |= IFM_ACTIVE;
1718
1719	switch (adapter->link_speed) {
1720	case 10:
1721		ifmr->ifm_active |= IFM_10_T;
1722		break;
1723	case 100:
1724		/*
1725		** Support for 100Mb SFP - these are Fiber
1726		** but the media type appears as serdes
1727		*/
1728		if (adapter->hw.phy.media_type ==
1729		    e1000_media_type_internal_serdes)
1730			ifmr->ifm_active |= IFM_100_FX;
1731		else
1732			ifmr->ifm_active |= IFM_100_TX;
1733		break;
1734	case 1000:
1735		ifmr->ifm_active |= IFM_1000_T;
1736		break;
1737	}
1738
1739	if (adapter->link_duplex == FULL_DUPLEX)
1740		ifmr->ifm_active |= IFM_FDX;
1741	else
1742		ifmr->ifm_active |= IFM_HDX;
1743
1744	IGB_CORE_UNLOCK(adapter);
1745}
1746
1747/*********************************************************************
1748 *
1749 *  Media Ioctl callback
1750 *
1751 *  This routine is called when the user changes speed/duplex using
1752 *  media/mediopt option with ifconfig.
1753 *
1754 **********************************************************************/
1755static int
1756igb_media_change(struct ifnet *ifp)
1757{
1758	struct adapter *adapter = ifp->if_softc;
1759	struct ifmedia  *ifm = &adapter->media;
1760
1761	INIT_DEBUGOUT("igb_media_change: begin");
1762
1763	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1764		return (EINVAL);
1765
1766	IGB_CORE_LOCK(adapter);
1767	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1768	case IFM_AUTO:
1769		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1770		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1771		break;
1772	case IFM_1000_LX:
1773	case IFM_1000_SX:
1774	case IFM_1000_T:
1775		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1776		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1777		break;
1778	case IFM_100_TX:
1779		adapter->hw.mac.autoneg = FALSE;
1780		adapter->hw.phy.autoneg_advertised = 0;
1781		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1783		else
1784			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1785		break;
1786	case IFM_10_T:
1787		adapter->hw.mac.autoneg = FALSE;
1788		adapter->hw.phy.autoneg_advertised = 0;
1789		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1791		else
1792			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1793		break;
1794	default:
1795		device_printf(adapter->dev, "Unsupported media type\n");
1796	}
1797
1798	igb_init_locked(adapter);
1799	IGB_CORE_UNLOCK(adapter);
1800
1801	return (0);
1802}
1803
1804
1805/*********************************************************************
1806 *
1807 *  This routine maps the mbufs to Advanced TX descriptors.
1808 *
1809 **********************************************************************/
1810static int
1811igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1812{
1813	struct adapter		*adapter = txr->adapter;
1814	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1815	bus_dmamap_t		map;
1816	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1817	union e1000_adv_tx_desc	*txd = NULL;
1818	struct mbuf		*m_head = *m_headp;
1819	struct ether_vlan_header *eh = NULL;
1820	struct ip		*ip = NULL;
1821	struct tcphdr		*th = NULL;
1822	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1823	int			ehdrlen, poff;
1824	int			nsegs, i, first, last = 0;
1825	int			error, do_tso, remap = 1;
1826
1827	/* Set basic descriptor constants */
1828	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1829	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1830	if (m_head->m_flags & M_VLANTAG)
1831		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1832
1833retry:
1834	m_head = *m_headp;
1835	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1836	hdrlen = ehdrlen = poff = 0;
1837
1838	/*
1839	 * Intel recommends entire IP/TCP header length reside in a single
1840	 * buffer. If multiple descriptors are used to describe the IP and
1841	 * TCP header, each descriptor should describe one or more
1842	 * complete headers; descriptors referencing only parts of headers
1843	 * are not supported. If all layer headers are not coalesced into
1844	 * a single buffer, each buffer should not cross a 4KB boundary,
1845	 * or be larger than the maximum read request size.
1846	 * Controller also requires modifing IP/TCP header to make TSO work
1847	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1848	 * IP/TCP header into a single buffer to meet the requirement of
1849	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1850	 * which also has similiar restrictions.
1851	 */
1852	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1853		if (do_tso || (m_head->m_next != NULL &&
1854		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1855			if (M_WRITABLE(*m_headp) == 0) {
1856				m_head = m_dup(*m_headp, M_NOWAIT);
1857				m_freem(*m_headp);
1858				if (m_head == NULL) {
1859					*m_headp = NULL;
1860					return (ENOBUFS);
1861				}
1862				*m_headp = m_head;
1863			}
1864		}
1865		/*
1866		 * Assume IPv4, we don't have TSO/checksum offload support
1867		 * for IPv6 yet.
1868		 */
1869		ehdrlen = sizeof(struct ether_header);
1870		m_head = m_pullup(m_head, ehdrlen);
1871		if (m_head == NULL) {
1872			*m_headp = NULL;
1873			return (ENOBUFS);
1874		}
1875		eh = mtod(m_head, struct ether_vlan_header *);
1876		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1877			ehdrlen = sizeof(struct ether_vlan_header);
1878			m_head = m_pullup(m_head, ehdrlen);
1879			if (m_head == NULL) {
1880				*m_headp = NULL;
1881				return (ENOBUFS);
1882			}
1883		}
1884		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1885		if (m_head == NULL) {
1886			*m_headp = NULL;
1887			return (ENOBUFS);
1888		}
1889		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1890		poff = ehdrlen + (ip->ip_hl << 2);
1891		if (do_tso) {
1892			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1893			if (m_head == NULL) {
1894				*m_headp = NULL;
1895				return (ENOBUFS);
1896			}
1897			/*
1898			 * The pseudo TCP checksum does not include TCP payload
1899			 * length so driver should recompute the checksum here
1900			 * what hardware expect to see. This is adherence of
1901			 * Microsoft's Large Send specification.
1902			 */
1903			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1905			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1906			/* Keep track of the full header length */
1907			hdrlen = poff + (th->th_off << 2);
1908		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1909			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1910			if (m_head == NULL) {
1911				*m_headp = NULL;
1912				return (ENOBUFS);
1913			}
1914			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1915			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1916			if (m_head == NULL) {
1917				*m_headp = NULL;
1918				return (ENOBUFS);
1919			}
1920			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1921			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1923			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1924			if (m_head == NULL) {
1925				*m_headp = NULL;
1926				return (ENOBUFS);
1927			}
1928			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1929		}
1930		*m_headp = m_head;
1931	}
1932
1933	/*
1934	 * Map the packet for DMA
1935	 *
1936	 * Capture the first descriptor index,
1937	 * this descriptor will have the index
1938	 * of the EOP which is the only one that
1939	 * now gets a DONE bit writeback.
1940	 */
1941	first = txr->next_avail_desc;
1942	tx_buffer = &txr->tx_buffers[first];
1943	tx_buffer_mapped = tx_buffer;
1944	map = tx_buffer->map;
1945
1946	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1947	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1948
1949	/*
1950	 * There are two types of errors we can (try) to handle:
1951	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1952	 *   out of segments.  Defragment the mbuf chain and try again.
1953	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1954	 *   at this point in time.  Defer sending and try again later.
1955	 * All other errors, in particular EINVAL, are fatal and prevent the
1956	 * mbuf chain from ever going through.  Drop it and report error.
1957	 */
1958	if (error == EFBIG && remap) {
1959		struct mbuf *m;
1960
1961		m = m_defrag(*m_headp, M_NOWAIT);
1962		if (m == NULL) {
1963			adapter->mbuf_defrag_failed++;
1964			m_freem(*m_headp);
1965			*m_headp = NULL;
1966			return (ENOBUFS);
1967		}
1968		*m_headp = m;
1969
1970		/* Try it again, but only once */
1971		remap = 0;
1972		goto retry;
1973	} else if (error == ENOMEM) {
1974		adapter->no_tx_dma_setup++;
1975		return (error);
1976	} else if (error != 0) {
1977		adapter->no_tx_dma_setup++;
1978		m_freem(*m_headp);
1979		*m_headp = NULL;
1980		return (error);
1981	}
1982
1983	/*
1984	** Make sure we don't overrun the ring,
1985	** we need nsegs descriptors and one for
1986	** the context descriptor used for the
1987	** offloads.
1988	*/
1989        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1990                txr->no_desc_avail++;
1991		bus_dmamap_unload(txr->txtag, map);
1992		return (ENOBUFS);
1993        }
1994	m_head = *m_headp;
1995
1996	/* Do hardware assists:
1997         * Set up the context descriptor, used
1998         * when any hardware offload is done.
1999         * This includes CSUM, VLAN, and TSO.
2000         * It will use the first descriptor.
2001         */
2002
2003	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2004		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
2005			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2006			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2007			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2008		} else
2009			return (ENXIO);
2010	} else if (igb_tx_ctx_setup(txr, m_head))
2011			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2012
2013	/* Calculate payload length */
2014	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2015	    << E1000_ADVTXD_PAYLEN_SHIFT);
2016
2017	/* 82575 needs the queue index added */
2018	if (adapter->hw.mac.type == e1000_82575)
2019		olinfo_status |= txr->me << 4;
2020
2021	/* Set up our transmit descriptors */
2022	i = txr->next_avail_desc;
2023	for (int j = 0; j < nsegs; j++) {
2024		bus_size_t seg_len;
2025		bus_addr_t seg_addr;
2026
2027		tx_buffer = &txr->tx_buffers[i];
2028		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2029		seg_addr = segs[j].ds_addr;
2030		seg_len  = segs[j].ds_len;
2031
2032		txd->read.buffer_addr = htole64(seg_addr);
2033		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2034		txd->read.olinfo_status = htole32(olinfo_status);
2035		last = i;
2036		if (++i == adapter->num_tx_desc)
2037			i = 0;
2038		tx_buffer->m_head = NULL;
2039		tx_buffer->next_eop = -1;
2040	}
2041
2042	txr->next_avail_desc = i;
2043	txr->tx_avail -= nsegs;
2044        tx_buffer->m_head = m_head;
2045
2046	/*
2047	** Here we swap the map so the last descriptor,
2048	** which gets the completion interrupt has the
2049	** real map, and the first descriptor gets the
2050	** unused map from this descriptor.
2051	*/
2052	tx_buffer_mapped->map = tx_buffer->map;
2053	tx_buffer->map = map;
2054        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2055
2056        /*
2057         * Last Descriptor of Packet
2058	 * needs End Of Packet (EOP)
2059	 * and Report Status (RS)
2060         */
2061        txd->read.cmd_type_len |=
2062	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2063	/*
2064	 * Keep track in the first buffer which
2065	 * descriptor will be written back
2066	 */
2067	tx_buffer = &txr->tx_buffers[first];
2068	tx_buffer->next_eop = last;
2069	/* Update the watchdog time early and often */
2070	txr->watchdog_time = ticks;
2071
2072	/*
2073	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2074	 * that this frame is available to transmit.
2075	 */
2076	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2077	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2078	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2079	++txr->tx_packets;
2080
2081	return (0);
2082}
2083static void
2084igb_set_promisc(struct adapter *adapter)
2085{
2086	struct ifnet	*ifp = adapter->ifp;
2087	struct e1000_hw *hw = &adapter->hw;
2088	u32		reg;
2089
2090	if (adapter->vf_ifp) {
2091		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2092		return;
2093	}
2094
2095	reg = E1000_READ_REG(hw, E1000_RCTL);
2096	if (ifp->if_flags & IFF_PROMISC) {
2097		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2098		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2099	} else if (ifp->if_flags & IFF_ALLMULTI) {
2100		reg |= E1000_RCTL_MPE;
2101		reg &= ~E1000_RCTL_UPE;
2102		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2103	}
2104}
2105
2106static void
2107igb_disable_promisc(struct adapter *adapter)
2108{
2109	struct e1000_hw *hw = &adapter->hw;
2110	struct ifnet	*ifp = adapter->ifp;
2111	u32		reg;
2112	int		mcnt = 0;
2113
2114	if (adapter->vf_ifp) {
2115		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2116		return;
2117	}
2118	reg = E1000_READ_REG(hw, E1000_RCTL);
2119	reg &=  (~E1000_RCTL_UPE);
2120	if (ifp->if_flags & IFF_ALLMULTI)
2121		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2122	else {
2123		struct  ifmultiaddr *ifma;
2124#if __FreeBSD_version < 800000
2125		IF_ADDR_LOCK(ifp);
2126#else
2127		if_maddr_rlock(ifp);
2128#endif
2129		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130			if (ifma->ifma_addr->sa_family != AF_LINK)
2131				continue;
2132			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2133				break;
2134			mcnt++;
2135		}
2136#if __FreeBSD_version < 800000
2137		IF_ADDR_UNLOCK(ifp);
2138#else
2139		if_maddr_runlock(ifp);
2140#endif
2141	}
2142	/* Don't disable if in MAX groups */
2143	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2144		reg &=  (~E1000_RCTL_MPE);
2145	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2146}
2147
2148
2149/*********************************************************************
2150 *  Multicast Update
2151 *
2152 *  This routine is called whenever multicast address list is updated.
2153 *
2154 **********************************************************************/
2155
2156static void
2157igb_set_multi(struct adapter *adapter)
2158{
2159	struct ifnet	*ifp = adapter->ifp;
2160	struct ifmultiaddr *ifma;
2161	u32 reg_rctl = 0;
2162	u8  *mta;
2163
2164	int mcnt = 0;
2165
2166	IOCTL_DEBUGOUT("igb_set_multi: begin");
2167
2168	mta = adapter->mta;
2169	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2170	    MAX_NUM_MULTICAST_ADDRESSES);
2171
2172#if __FreeBSD_version < 800000
2173	IF_ADDR_LOCK(ifp);
2174#else
2175	if_maddr_rlock(ifp);
2176#endif
2177	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2178		if (ifma->ifma_addr->sa_family != AF_LINK)
2179			continue;
2180
2181		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2182			break;
2183
2184		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2185		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2186		mcnt++;
2187	}
2188#if __FreeBSD_version < 800000
2189	IF_ADDR_UNLOCK(ifp);
2190#else
2191	if_maddr_runlock(ifp);
2192#endif
2193
2194	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2195		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2196		reg_rctl |= E1000_RCTL_MPE;
2197		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2198	} else
2199		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2200}
2201
2202
2203/*********************************************************************
2204 *  Timer routine:
2205 *  	This routine checks for link status,
2206 *	updates statistics, and does the watchdog.
2207 *
2208 **********************************************************************/
2209
2210static void
2211igb_local_timer(void *arg)
2212{
2213	struct adapter		*adapter = arg;
2214	device_t		dev = adapter->dev;
2215	struct ifnet		*ifp = adapter->ifp;
2216	struct tx_ring		*txr = adapter->tx_rings;
2217	struct igb_queue	*que = adapter->queues;
2218	int			hung = 0, busy = 0;
2219
2220
2221	IGB_CORE_LOCK_ASSERT(adapter);
2222
2223	igb_update_link_status(adapter);
2224	igb_update_stats_counters(adapter);
2225
2226        /*
2227        ** Check the TX queues status
2228	**	- central locked handling of OACTIVE
2229	**	- watchdog only if all queues show hung
2230        */
2231	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2232		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2233		    (adapter->pause_frames == 0))
2234			++hung;
2235		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2236			++busy;
2237		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2238			taskqueue_enqueue(que->tq, &que->que_task);
2239	}
2240	if (hung == adapter->num_queues)
2241		goto timeout;
2242	if (busy == adapter->num_queues)
2243		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2244	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2245	    (busy < adapter->num_queues))
2246		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2247
2248	adapter->pause_frames = 0;
2249	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2250#ifndef DEVICE_POLLING
2251	/* Schedule all queue interrupts - deadlock protection */
2252	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2253#endif
2254	return;
2255
2256timeout:
2257	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2258	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2259            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2260            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2261	device_printf(dev,"TX(%d) desc avail = %d,"
2262            "Next TX to Clean = %d\n",
2263            txr->me, txr->tx_avail, txr->next_to_clean);
2264	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2265	adapter->watchdog_events++;
2266	igb_init_locked(adapter);
2267}
2268
2269static void
2270igb_update_link_status(struct adapter *adapter)
2271{
2272	struct e1000_hw		*hw = &adapter->hw;
2273	struct e1000_fc_info	*fc = &hw->fc;
2274	struct ifnet		*ifp = adapter->ifp;
2275	device_t		dev = adapter->dev;
2276	struct tx_ring		*txr = adapter->tx_rings;
2277	u32			link_check, thstat, ctrl;
2278	char			*flowctl = NULL;
2279
2280	link_check = thstat = ctrl = 0;
2281
2282	/* Get the cached link value or read for real */
2283        switch (hw->phy.media_type) {
2284        case e1000_media_type_copper:
2285                if (hw->mac.get_link_status) {
2286			/* Do the work to read phy */
2287                        e1000_check_for_link(hw);
2288                        link_check = !hw->mac.get_link_status;
2289                } else
2290                        link_check = TRUE;
2291                break;
2292        case e1000_media_type_fiber:
2293                e1000_check_for_link(hw);
2294                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2295                                 E1000_STATUS_LU);
2296                break;
2297        case e1000_media_type_internal_serdes:
2298                e1000_check_for_link(hw);
2299                link_check = adapter->hw.mac.serdes_has_link;
2300                break;
2301	/* VF device is type_unknown */
2302        case e1000_media_type_unknown:
2303                e1000_check_for_link(hw);
2304		link_check = !hw->mac.get_link_status;
2305		/* Fall thru */
2306        default:
2307                break;
2308        }
2309
2310	/* Check for thermal downshift or shutdown */
2311	if (hw->mac.type == e1000_i350) {
2312		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2313		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2314	}
2315
2316	/* Get the flow control for display */
2317	switch (fc->current_mode) {
2318	case e1000_fc_rx_pause:
2319		flowctl = "RX";
2320		break;
2321	case e1000_fc_tx_pause:
2322		flowctl = "TX";
2323		break;
2324	case e1000_fc_full:
2325		flowctl = "Full";
2326		break;
2327	case e1000_fc_none:
2328	default:
2329		flowctl = "None";
2330		break;
2331	}
2332
2333	/* Now we check if a transition has happened */
2334	if (link_check && (adapter->link_active == 0)) {
2335		e1000_get_speed_and_duplex(&adapter->hw,
2336		    &adapter->link_speed, &adapter->link_duplex);
2337		if (bootverbose)
2338			device_printf(dev, "Link is up %d Mbps %s,"
2339			    " Flow Control: %s\n",
2340			    adapter->link_speed,
2341			    ((adapter->link_duplex == FULL_DUPLEX) ?
2342			    "Full Duplex" : "Half Duplex"), flowctl);
2343		adapter->link_active = 1;
2344		ifp->if_baudrate = adapter->link_speed * 1000000;
2345		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2346		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2347			device_printf(dev, "Link: thermal downshift\n");
2348		/* This can sleep */
2349		if_link_state_change(ifp, LINK_STATE_UP);
2350	} else if (!link_check && (adapter->link_active == 1)) {
2351		ifp->if_baudrate = adapter->link_speed = 0;
2352		adapter->link_duplex = 0;
2353		if (bootverbose)
2354			device_printf(dev, "Link is Down\n");
2355		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2356		    (thstat & E1000_THSTAT_PWR_DOWN))
2357			device_printf(dev, "Link: thermal shutdown\n");
2358		adapter->link_active = 0;
2359		/* This can sleep */
2360		if_link_state_change(ifp, LINK_STATE_DOWN);
2361		/* Reset queue state */
2362		for (int i = 0; i < adapter->num_queues; i++, txr++)
2363			txr->queue_status = IGB_QUEUE_IDLE;
2364	}
2365}
2366
2367/*********************************************************************
2368 *
2369 *  This routine disables all traffic on the adapter by issuing a
2370 *  global reset on the MAC and deallocates TX/RX buffers.
2371 *
2372 **********************************************************************/
2373
2374static void
2375igb_stop(void *arg)
2376{
2377	struct adapter	*adapter = arg;
2378	struct ifnet	*ifp = adapter->ifp;
2379	struct tx_ring *txr = adapter->tx_rings;
2380
2381	IGB_CORE_LOCK_ASSERT(adapter);
2382
2383	INIT_DEBUGOUT("igb_stop: begin");
2384
2385	igb_disable_intr(adapter);
2386
2387	callout_stop(&adapter->timer);
2388
2389	/* Tell the stack that the interface is no longer active */
2390	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2391	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2392
2393	/* Disarm watchdog timer. */
2394	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395		IGB_TX_LOCK(txr);
2396		txr->queue_status = IGB_QUEUE_IDLE;
2397		IGB_TX_UNLOCK(txr);
2398	}
2399
2400	e1000_reset_hw(&adapter->hw);
2401	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2402
2403	e1000_led_off(&adapter->hw);
2404	e1000_cleanup_led(&adapter->hw);
2405}
2406
2407
2408/*********************************************************************
2409 *
2410 *  Determine hardware revision.
2411 *
2412 **********************************************************************/
2413static void
2414igb_identify_hardware(struct adapter *adapter)
2415{
2416	device_t dev = adapter->dev;
2417
2418	/* Make sure our PCI config space has the necessary stuff set */
2419	pci_enable_busmaster(dev);
2420	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2421
2422	/* Save off the information about this board */
2423	adapter->hw.vendor_id = pci_get_vendor(dev);
2424	adapter->hw.device_id = pci_get_device(dev);
2425	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2426	adapter->hw.subsystem_vendor_id =
2427	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2428	adapter->hw.subsystem_device_id =
2429	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2430
2431	/* Set MAC type early for PCI setup */
2432	e1000_set_mac_type(&adapter->hw);
2433
2434	/* Are we a VF device? */
2435	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2436	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2437		adapter->vf_ifp = 1;
2438	else
2439		adapter->vf_ifp = 0;
2440}
2441
2442static int
2443igb_allocate_pci_resources(struct adapter *adapter)
2444{
2445	device_t	dev = adapter->dev;
2446	int		rid;
2447
2448	rid = PCIR_BAR(0);
2449	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2450	    &rid, RF_ACTIVE);
2451	if (adapter->pci_mem == NULL) {
2452		device_printf(dev, "Unable to allocate bus resource: memory\n");
2453		return (ENXIO);
2454	}
2455	adapter->osdep.mem_bus_space_tag =
2456	    rman_get_bustag(adapter->pci_mem);
2457	adapter->osdep.mem_bus_space_handle =
2458	    rman_get_bushandle(adapter->pci_mem);
2459	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2460
2461	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2462
2463	/* This will setup either MSI/X or MSI */
2464	adapter->msix = igb_setup_msix(adapter);
2465	adapter->hw.back = &adapter->osdep;
2466
2467	return (0);
2468}
2469
2470/*********************************************************************
2471 *
2472 *  Setup the Legacy or MSI Interrupt handler
2473 *
2474 **********************************************************************/
2475static int
2476igb_allocate_legacy(struct adapter *adapter)
2477{
2478	device_t		dev = adapter->dev;
2479	struct igb_queue	*que = adapter->queues;
2480	int			error, rid = 0;
2481
2482	/* Turn off all interrupts */
2483	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2484
2485	/* MSI RID is 1 */
2486	if (adapter->msix == 1)
2487		rid = 1;
2488
2489	/* We allocate a single interrupt resource */
2490	adapter->res = bus_alloc_resource_any(dev,
2491	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2492	if (adapter->res == NULL) {
2493		device_printf(dev, "Unable to allocate bus resource: "
2494		    "interrupt\n");
2495		return (ENXIO);
2496	}
2497
2498#ifndef IGB_LEGACY_TX
2499	TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
2500#endif
2501
2502	/*
2503	 * Try allocating a fast interrupt and the associated deferred
2504	 * processing contexts.
2505	 */
2506	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2507	/* Make tasklet for deferred link handling */
2508	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2509	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2510	    taskqueue_thread_enqueue, &que->tq);
2511	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2512	    device_get_nameunit(adapter->dev));
2513	if ((error = bus_setup_intr(dev, adapter->res,
2514	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2515	    adapter, &adapter->tag)) != 0) {
2516		device_printf(dev, "Failed to register fast interrupt "
2517			    "handler: %d\n", error);
2518		taskqueue_free(que->tq);
2519		que->tq = NULL;
2520		return (error);
2521	}
2522
2523	return (0);
2524}
2525
2526
2527/*********************************************************************
2528 *
2529 *  Setup the MSIX Queue Interrupt handlers:
2530 *
2531 **********************************************************************/
2532static int
2533igb_allocate_msix(struct adapter *adapter)
2534{
2535	device_t		dev = adapter->dev;
2536	struct igb_queue	*que = adapter->queues;
2537	int			error, rid, vector = 0;
2538
2539	/* Be sure to start with all interrupts disabled */
2540	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2541	E1000_WRITE_FLUSH(&adapter->hw);
2542
2543	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2544		rid = vector +1;
2545		que->res = bus_alloc_resource_any(dev,
2546		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2547		if (que->res == NULL) {
2548			device_printf(dev,
2549			    "Unable to allocate bus resource: "
2550			    "MSIX Queue Interrupt\n");
2551			return (ENXIO);
2552		}
2553		error = bus_setup_intr(dev, que->res,
2554	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2555		    igb_msix_que, que, &que->tag);
2556		if (error) {
2557			que->res = NULL;
2558			device_printf(dev, "Failed to register Queue handler");
2559			return (error);
2560		}
2561#if __FreeBSD_version >= 800504
2562		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2563#endif
2564		que->msix = vector;
2565		if (adapter->hw.mac.type == e1000_82575)
2566			que->eims = E1000_EICR_TX_QUEUE0 << i;
2567		else
2568			que->eims = 1 << vector;
2569		/*
2570		** Bind the msix vector, and thus the
2571		** rings to the corresponding cpu.
2572		*/
2573		if (adapter->num_queues > 1) {
2574			if (igb_last_bind_cpu < 0)
2575				igb_last_bind_cpu = CPU_FIRST();
2576			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2577			device_printf(dev,
2578				"Bound queue %d to cpu %d\n",
2579				i,igb_last_bind_cpu);
2580			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2581		}
2582#ifndef IGB_LEGACY_TX
2583		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2584		    que->txr);
2585#endif
2586		/* Make tasklet for deferred handling */
2587		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2588		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2589		    taskqueue_thread_enqueue, &que->tq);
2590		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2591		    device_get_nameunit(adapter->dev));
2592	}
2593
2594	/* And Link */
2595	rid = vector + 1;
2596	adapter->res = bus_alloc_resource_any(dev,
2597	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2598	if (adapter->res == NULL) {
2599		device_printf(dev,
2600		    "Unable to allocate bus resource: "
2601		    "MSIX Link Interrupt\n");
2602		return (ENXIO);
2603	}
2604	if ((error = bus_setup_intr(dev, adapter->res,
2605	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2606	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2607		device_printf(dev, "Failed to register Link handler");
2608		return (error);
2609	}
2610#if __FreeBSD_version >= 800504
2611	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2612#endif
2613	adapter->linkvec = vector;
2614
2615	return (0);
2616}
2617
2618
2619static void
2620igb_configure_queues(struct adapter *adapter)
2621{
2622	struct	e1000_hw	*hw = &adapter->hw;
2623	struct	igb_queue	*que;
2624	u32			tmp, ivar = 0, newitr = 0;
2625
2626	/* First turn on RSS capability */
2627	if (adapter->hw.mac.type != e1000_82575)
2628		E1000_WRITE_REG(hw, E1000_GPIE,
2629		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2630		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2631
2632	/* Turn on MSIX */
2633	switch (adapter->hw.mac.type) {
2634	case e1000_82580:
2635	case e1000_i350:
2636	case e1000_i210:
2637	case e1000_i211:
2638	case e1000_vfadapt:
2639	case e1000_vfadapt_i350:
2640		/* RX entries */
2641		for (int i = 0; i < adapter->num_queues; i++) {
2642			u32 index = i >> 1;
2643			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2644			que = &adapter->queues[i];
2645			if (i & 1) {
2646				ivar &= 0xFF00FFFF;
2647				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648			} else {
2649				ivar &= 0xFFFFFF00;
2650				ivar |= que->msix | E1000_IVAR_VALID;
2651			}
2652			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2653		}
2654		/* TX entries */
2655		for (int i = 0; i < adapter->num_queues; i++) {
2656			u32 index = i >> 1;
2657			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2658			que = &adapter->queues[i];
2659			if (i & 1) {
2660				ivar &= 0x00FFFFFF;
2661				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2662			} else {
2663				ivar &= 0xFFFF00FF;
2664				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2665			}
2666			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2667			adapter->que_mask |= que->eims;
2668		}
2669
2670		/* And for the link interrupt */
2671		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2672		adapter->link_mask = 1 << adapter->linkvec;
2673		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2674		break;
2675	case e1000_82576:
2676		/* RX entries */
2677		for (int i = 0; i < adapter->num_queues; i++) {
2678			u32 index = i & 0x7; /* Each IVAR has two entries */
2679			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2680			que = &adapter->queues[i];
2681			if (i < 8) {
2682				ivar &= 0xFFFFFF00;
2683				ivar |= que->msix | E1000_IVAR_VALID;
2684			} else {
2685				ivar &= 0xFF00FFFF;
2686				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2687			}
2688			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2689			adapter->que_mask |= que->eims;
2690		}
2691		/* TX entries */
2692		for (int i = 0; i < adapter->num_queues; i++) {
2693			u32 index = i & 0x7; /* Each IVAR has two entries */
2694			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2695			que = &adapter->queues[i];
2696			if (i < 8) {
2697				ivar &= 0xFFFF00FF;
2698				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2699			} else {
2700				ivar &= 0x00FFFFFF;
2701				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2702			}
2703			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2704			adapter->que_mask |= que->eims;
2705		}
2706
2707		/* And for the link interrupt */
2708		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2709		adapter->link_mask = 1 << adapter->linkvec;
2710		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2711		break;
2712
2713	case e1000_82575:
2714                /* enable MSI-X support*/
2715		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2716                tmp |= E1000_CTRL_EXT_PBA_CLR;
2717                /* Auto-Mask interrupts upon ICR read. */
2718                tmp |= E1000_CTRL_EXT_EIAME;
2719                tmp |= E1000_CTRL_EXT_IRCA;
2720                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2721
2722		/* Queues */
2723		for (int i = 0; i < adapter->num_queues; i++) {
2724			que = &adapter->queues[i];
2725			tmp = E1000_EICR_RX_QUEUE0 << i;
2726			tmp |= E1000_EICR_TX_QUEUE0 << i;
2727			que->eims = tmp;
2728			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2729			    i, que->eims);
2730			adapter->que_mask |= que->eims;
2731		}
2732
2733		/* Link */
2734		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2735		    E1000_EIMS_OTHER);
2736		adapter->link_mask |= E1000_EIMS_OTHER;
2737	default:
2738		break;
2739	}
2740
2741	/* Set the starting interrupt rate */
2742	if (igb_max_interrupt_rate > 0)
2743		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2744
2745        if (hw->mac.type == e1000_82575)
2746                newitr |= newitr << 16;
2747        else
2748                newitr |= E1000_EITR_CNT_IGNR;
2749
2750	for (int i = 0; i < adapter->num_queues; i++) {
2751		que = &adapter->queues[i];
2752		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2753	}
2754
2755	return;
2756}
2757
2758
2759static void
2760igb_free_pci_resources(struct adapter *adapter)
2761{
2762	struct		igb_queue *que = adapter->queues;
2763	device_t	dev = adapter->dev;
2764	int		rid;
2765
2766	/*
2767	** There is a slight possibility of a failure mode
2768	** in attach that will result in entering this function
2769	** before interrupt resources have been initialized, and
2770	** in that case we do not want to execute the loops below
2771	** We can detect this reliably by the state of the adapter
2772	** res pointer.
2773	*/
2774	if (adapter->res == NULL)
2775		goto mem;
2776
2777	/*
2778	 * First release all the interrupt resources:
2779	 */
2780	for (int i = 0; i < adapter->num_queues; i++, que++) {
2781		rid = que->msix + 1;
2782		if (que->tag != NULL) {
2783			bus_teardown_intr(dev, que->res, que->tag);
2784			que->tag = NULL;
2785		}
2786		if (que->res != NULL)
2787			bus_release_resource(dev,
2788			    SYS_RES_IRQ, rid, que->res);
2789	}
2790
2791	/* Clean the Legacy or Link interrupt last */
2792	if (adapter->linkvec) /* we are doing MSIX */
2793		rid = adapter->linkvec + 1;
2794	else
2795		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2796
2797	que = adapter->queues;
2798	if (adapter->tag != NULL) {
2799		taskqueue_drain(que->tq, &adapter->link_task);
2800		bus_teardown_intr(dev, adapter->res, adapter->tag);
2801		adapter->tag = NULL;
2802	}
2803	if (adapter->res != NULL)
2804		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2805
2806	for (int i = 0; i < adapter->num_queues; i++, que++) {
2807		if (que->tq != NULL) {
2808#ifndef IGB_LEGACY_TX
2809			taskqueue_drain(que->tq, &que->txr->txq_task);
2810#endif
2811			taskqueue_drain(que->tq, &que->que_task);
2812			taskqueue_free(que->tq);
2813		}
2814	}
2815mem:
2816	if (adapter->msix)
2817		pci_release_msi(dev);
2818
2819	if (adapter->msix_mem != NULL)
2820		bus_release_resource(dev, SYS_RES_MEMORY,
2821		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2822
2823	if (adapter->pci_mem != NULL)
2824		bus_release_resource(dev, SYS_RES_MEMORY,
2825		    PCIR_BAR(0), adapter->pci_mem);
2826
2827}
2828
2829/*
2830 * Setup Either MSI/X or MSI
2831 */
2832static int
2833igb_setup_msix(struct adapter *adapter)
2834{
2835	device_t dev = adapter->dev;
2836	int rid, want, queues, msgs, maxqueues;
2837
2838	/* tuneable override */
2839	if (igb_enable_msix == 0)
2840		goto msi;
2841
2842	/* First try MSI/X */
2843	msgs = pci_msix_count(dev);
2844	if (msgs == 0)
2845		goto msi;
2846	rid = PCIR_BAR(IGB_MSIX_BAR);
2847	adapter->msix_mem = bus_alloc_resource_any(dev,
2848	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2849       	if (adapter->msix_mem == NULL) {
2850		/* May not be enabled */
2851		device_printf(adapter->dev,
2852		    "Unable to map MSIX table \n");
2853		goto msi;
2854	}
2855
2856	/* Figure out a reasonable auto config value */
2857	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2858
2859	/* Manual override */
2860	if (igb_num_queues != 0)
2861		queues = igb_num_queues;
2862
2863	/* Sanity check based on HW */
2864	switch (adapter->hw.mac.type) {
2865		case e1000_82575:
2866			maxqueues = 4;
2867			break;
2868		case e1000_82576:
2869		case e1000_82580:
2870		case e1000_i350:
2871			maxqueues = 8;
2872			break;
2873		case e1000_i210:
2874			maxqueues = 4;
2875			break;
2876		case e1000_i211:
2877			maxqueues = 2;
2878			break;
2879		default:  /* VF interfaces */
2880			maxqueues = 1;
2881			break;
2882	}
2883	if (queues > maxqueues)
2884		queues = maxqueues;
2885
2886	/*
2887	** One vector (RX/TX pair) per queue
2888	** plus an additional for Link interrupt
2889	*/
2890	want = queues + 1;
2891	if (msgs >= want)
2892		msgs = want;
2893	else {
2894               	device_printf(adapter->dev,
2895		    "MSIX Configuration Problem, "
2896		    "%d vectors configured, but %d queues wanted!\n",
2897		    msgs, want);
2898		goto msi;
2899	}
2900	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2901               	device_printf(adapter->dev,
2902		    "Using MSIX interrupts with %d vectors\n", msgs);
2903		adapter->num_queues = queues;
2904		return (msgs);
2905	}
2906	/*
2907	** If MSIX alloc failed or provided us with
2908	** less than needed, free and fall through to MSI
2909	*/
2910	pci_release_msi(dev);
2911
2912msi:
2913       	if (adapter->msix_mem != NULL) {
2914		bus_release_resource(dev, SYS_RES_MEMORY,
2915		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2916		adapter->msix_mem = NULL;
2917	}
2918       	msgs = 1;
2919	if (pci_alloc_msi(dev, &msgs) == 0) {
2920		device_printf(adapter->dev," Using an MSI interrupt\n");
2921		return (msgs);
2922	}
2923	device_printf(adapter->dev," Using a Legacy interrupt\n");
2924	return (0);
2925}
2926
2927/*********************************************************************
2928 *
2929 *  Set up an fresh starting state
2930 *
2931 **********************************************************************/
2932static void
2933igb_reset(struct adapter *adapter)
2934{
2935	device_t	dev = adapter->dev;
2936	struct e1000_hw *hw = &adapter->hw;
2937	struct e1000_fc_info *fc = &hw->fc;
2938	struct ifnet	*ifp = adapter->ifp;
2939	u32		pba = 0;
2940	u16		hwm;
2941
2942	INIT_DEBUGOUT("igb_reset: begin");
2943
2944	/* Let the firmware know the OS is in control */
2945	igb_get_hw_control(adapter);
2946
2947	/*
2948	 * Packet Buffer Allocation (PBA)
2949	 * Writing PBA sets the receive portion of the buffer
2950	 * the remainder is used for the transmit buffer.
2951	 */
2952	switch (hw->mac.type) {
2953	case e1000_82575:
2954		pba = E1000_PBA_32K;
2955		break;
2956	case e1000_82576:
2957	case e1000_vfadapt:
2958		pba = E1000_READ_REG(hw, E1000_RXPBS);
2959		pba &= E1000_RXPBS_SIZE_MASK_82576;
2960		break;
2961	case e1000_82580:
2962	case e1000_i350:
2963	case e1000_vfadapt_i350:
2964		pba = E1000_READ_REG(hw, E1000_RXPBS);
2965		pba = e1000_rxpbs_adjust_82580(pba);
2966		break;
2967	case e1000_i210:
2968	case e1000_i211:
2969		pba = E1000_PBA_34K;
2970	default:
2971		break;
2972	}
2973
2974	/* Special needs in case of Jumbo frames */
2975	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2976		u32 tx_space, min_tx, min_rx;
2977		pba = E1000_READ_REG(hw, E1000_PBA);
2978		tx_space = pba >> 16;
2979		pba &= 0xffff;
2980		min_tx = (adapter->max_frame_size +
2981		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2982		min_tx = roundup2(min_tx, 1024);
2983		min_tx >>= 10;
2984                min_rx = adapter->max_frame_size;
2985                min_rx = roundup2(min_rx, 1024);
2986                min_rx >>= 10;
2987		if (tx_space < min_tx &&
2988		    ((min_tx - tx_space) < pba)) {
2989			pba = pba - (min_tx - tx_space);
2990			/*
2991                         * if short on rx space, rx wins
2992                         * and must trump tx adjustment
2993			 */
2994                        if (pba < min_rx)
2995                                pba = min_rx;
2996		}
2997		E1000_WRITE_REG(hw, E1000_PBA, pba);
2998	}
2999
3000	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3001
3002	/*
3003	 * These parameters control the automatic generation (Tx) and
3004	 * response (Rx) to Ethernet PAUSE frames.
3005	 * - High water mark should allow for at least two frames to be
3006	 *   received after sending an XOFF.
3007	 * - Low water mark works best when it is very near the high water mark.
3008	 *   This allows the receiver to restart by sending XON when it has
3009	 *   drained a bit.
3010	 */
3011	hwm = min(((pba << 10) * 9 / 10),
3012	    ((pba << 10) - 2 * adapter->max_frame_size));
3013
3014	if (hw->mac.type < e1000_82576) {
3015		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3016		fc->low_water = fc->high_water - 8;
3017	} else {
3018		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3019		fc->low_water = fc->high_water - 16;
3020	}
3021
3022	fc->pause_time = IGB_FC_PAUSE_TIME;
3023	fc->send_xon = TRUE;
3024	if (adapter->fc)
3025		fc->requested_mode = adapter->fc;
3026	else
3027		fc->requested_mode = e1000_fc_default;
3028
3029	/* Issue a global reset */
3030	e1000_reset_hw(hw);
3031	E1000_WRITE_REG(hw, E1000_WUC, 0);
3032
3033	if (e1000_init_hw(hw) < 0)
3034		device_printf(dev, "Hardware Initialization Failed\n");
3035
3036	/* Setup DMA Coalescing */
3037	if ((hw->mac.type > e1000_82580) &&
3038	    (hw->mac.type != e1000_i211)) {
3039		u32 dmac;
3040		u32 reg = ~E1000_DMACR_DMAC_EN;
3041
3042		if (adapter->dmac == 0) { /* Disabling it */
3043			E1000_WRITE_REG(hw, E1000_DMACR, reg);
3044			goto reset_out;
3045		}
3046
3047		/* Set starting thresholds */
3048		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3049		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3050
3051		hwm = 64 * pba - adapter->max_frame_size / 16;
3052		if (hwm < 64 * (pba - 6))
3053			hwm = 64 * (pba - 6);
3054		reg = E1000_READ_REG(hw, E1000_FCRTC);
3055		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3056		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3057		    & E1000_FCRTC_RTH_COAL_MASK);
3058		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3059
3060
3061		dmac = pba - adapter->max_frame_size / 512;
3062		if (dmac < pba - 10)
3063			dmac = pba - 10;
3064		reg = E1000_READ_REG(hw, E1000_DMACR);
3065		reg &= ~E1000_DMACR_DMACTHR_MASK;
3066		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3067		    & E1000_DMACR_DMACTHR_MASK);
3068		/* transition to L0x or L1 if available..*/
3069		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3070		/* timer = value in adapter->dmac in 32usec intervals */
3071		reg |= (adapter->dmac >> 5);
3072		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3073
3074		/* Set the interval before transition */
3075		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3076		reg |= 0x80000004;
3077		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3078
3079		/* free space in tx packet buffer to wake from DMA coal */
3080		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3081		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3082
3083		/* make low power state decision controlled by DMA coal */
3084		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3085		reg &= ~E1000_PCIEMISC_LX_DECISION;
3086		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3087		device_printf(dev, "DMA Coalescing enabled\n");
3088
3089	} else if (hw->mac.type == e1000_82580) {
3090		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3091		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3092		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3093		    reg & ~E1000_PCIEMISC_LX_DECISION);
3094	}
3095
3096reset_out:
3097	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3098	e1000_get_phy_info(hw);
3099	e1000_check_for_link(hw);
3100	return;
3101}
3102
3103/*********************************************************************
3104 *
3105 *  Setup networking device structure and register an interface.
3106 *
3107 **********************************************************************/
3108static int
3109igb_setup_interface(device_t dev, struct adapter *adapter)
3110{
3111	struct ifnet   *ifp;
3112
3113	INIT_DEBUGOUT("igb_setup_interface: begin");
3114
3115	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3116	if (ifp == NULL) {
3117		device_printf(dev, "can not allocate ifnet structure\n");
3118		return (-1);
3119	}
3120	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3121	ifp->if_init =  igb_init;
3122	ifp->if_softc = adapter;
3123	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3124	ifp->if_ioctl = igb_ioctl;
3125#ifndef IGB_LEGACY_TX
3126	ifp->if_transmit = igb_mq_start;
3127	ifp->if_qflush = igb_qflush;
3128#else
3129	ifp->if_start = igb_start;
3130	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3131	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3132	IFQ_SET_READY(&ifp->if_snd);
3133#endif
3134
3135	ether_ifattach(ifp, adapter->hw.mac.addr);
3136
3137	ifp->if_capabilities = ifp->if_capenable = 0;
3138
3139	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3140	ifp->if_capabilities |= IFCAP_TSO4;
3141	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3142	ifp->if_capenable = ifp->if_capabilities;
3143
3144	/* Don't enable LRO by default */
3145	ifp->if_capabilities |= IFCAP_LRO;
3146
3147#ifdef DEVICE_POLLING
3148	ifp->if_capabilities |= IFCAP_POLLING;
3149#endif
3150
3151	/*
3152	 * Tell the upper layer(s) we
3153	 * support full VLAN capability.
3154	 */
3155	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3156	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3157			     |  IFCAP_VLAN_HWTSO
3158			     |  IFCAP_VLAN_MTU;
3159	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3160			  |  IFCAP_VLAN_HWTSO
3161			  |  IFCAP_VLAN_MTU;
3162
3163	/*
3164	** Don't turn this on by default, if vlans are
3165	** created on another pseudo device (eg. lagg)
3166	** then vlan events are not passed thru, breaking
3167	** operation, but with HW FILTER off it works. If
3168	** using vlans directly on the igb driver you can
3169	** enable this and get full hardware tag filtering.
3170	*/
3171	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3172
3173	/*
3174	 * Specify the media types supported by this adapter and register
3175	 * callbacks to update media and link information
3176	 */
3177	ifmedia_init(&adapter->media, IFM_IMASK,
3178	    igb_media_change, igb_media_status);
3179	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3180	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3181		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3182			    0, NULL);
3183		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3184	} else {
3185		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3186		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3187			    0, NULL);
3188		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3189			    0, NULL);
3190		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3191			    0, NULL);
3192		if (adapter->hw.phy.type != e1000_phy_ife) {
3193			ifmedia_add(&adapter->media,
3194				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3195			ifmedia_add(&adapter->media,
3196				IFM_ETHER | IFM_1000_T, 0, NULL);
3197		}
3198	}
3199	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3200	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3201	return (0);
3202}
3203
3204
3205/*
3206 * Manage DMA'able memory.
3207 */
3208static void
3209igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3210{
3211	if (error)
3212		return;
3213	*(bus_addr_t *) arg = segs[0].ds_addr;
3214}
3215
3216static int
3217igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3218        struct igb_dma_alloc *dma, int mapflags)
3219{
3220	int error;
3221
3222	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3223				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3224				BUS_SPACE_MAXADDR,	/* lowaddr */
3225				BUS_SPACE_MAXADDR,	/* highaddr */
3226				NULL, NULL,		/* filter, filterarg */
3227				size,			/* maxsize */
3228				1,			/* nsegments */
3229				size,			/* maxsegsize */
3230				0,			/* flags */
3231				NULL,			/* lockfunc */
3232				NULL,			/* lockarg */
3233				&dma->dma_tag);
3234	if (error) {
3235		device_printf(adapter->dev,
3236		    "%s: bus_dma_tag_create failed: %d\n",
3237		    __func__, error);
3238		goto fail_0;
3239	}
3240
3241	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3242	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3243	if (error) {
3244		device_printf(adapter->dev,
3245		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3246		    __func__, (uintmax_t)size, error);
3247		goto fail_2;
3248	}
3249
3250	dma->dma_paddr = 0;
3251	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3252	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3253	if (error || dma->dma_paddr == 0) {
3254		device_printf(adapter->dev,
3255		    "%s: bus_dmamap_load failed: %d\n",
3256		    __func__, error);
3257		goto fail_3;
3258	}
3259
3260	return (0);
3261
3262fail_3:
3263	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3264fail_2:
3265	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3266	bus_dma_tag_destroy(dma->dma_tag);
3267fail_0:
3268	dma->dma_map = NULL;
3269	dma->dma_tag = NULL;
3270
3271	return (error);
3272}
3273
3274static void
3275igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3276{
3277	if (dma->dma_tag == NULL)
3278		return;
3279	if (dma->dma_map != NULL) {
3280		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3281		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3282		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3283		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3284		dma->dma_map = NULL;
3285	}
3286	bus_dma_tag_destroy(dma->dma_tag);
3287	dma->dma_tag = NULL;
3288}
3289
3290
3291/*********************************************************************
3292 *
3293 *  Allocate memory for the transmit and receive rings, and then
3294 *  the descriptors associated with each, called only once at attach.
3295 *
3296 **********************************************************************/
3297static int
3298igb_allocate_queues(struct adapter *adapter)
3299{
3300	device_t dev = adapter->dev;
3301	struct igb_queue	*que = NULL;
3302	struct tx_ring		*txr = NULL;
3303	struct rx_ring		*rxr = NULL;
3304	int rsize, tsize, error = E1000_SUCCESS;
3305	int txconf = 0, rxconf = 0;
3306
3307	/* First allocate the top level queue structs */
3308	if (!(adapter->queues =
3309	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3310	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3311		device_printf(dev, "Unable to allocate queue memory\n");
3312		error = ENOMEM;
3313		goto fail;
3314	}
3315
3316	/* Next allocate the TX ring struct memory */
3317	if (!(adapter->tx_rings =
3318	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3319	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320		device_printf(dev, "Unable to allocate TX ring memory\n");
3321		error = ENOMEM;
3322		goto tx_fail;
3323	}
3324
3325	/* Now allocate the RX */
3326	if (!(adapter->rx_rings =
3327	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3328	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3329		device_printf(dev, "Unable to allocate RX ring memory\n");
3330		error = ENOMEM;
3331		goto rx_fail;
3332	}
3333
3334	tsize = roundup2(adapter->num_tx_desc *
3335	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3336	/*
3337	 * Now set up the TX queues, txconf is needed to handle the
3338	 * possibility that things fail midcourse and we need to
3339	 * undo memory gracefully
3340	 */
3341	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3342		/* Set up some basics */
3343		txr = &adapter->tx_rings[i];
3344		txr->adapter = adapter;
3345		txr->me = i;
3346
3347		/* Initialize the TX lock */
3348		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3349		    device_get_nameunit(dev), txr->me);
3350		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3351
3352		if (igb_dma_malloc(adapter, tsize,
3353			&txr->txdma, BUS_DMA_NOWAIT)) {
3354			device_printf(dev,
3355			    "Unable to allocate TX Descriptor memory\n");
3356			error = ENOMEM;
3357			goto err_tx_desc;
3358		}
3359		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3360		bzero((void *)txr->tx_base, tsize);
3361
3362        	/* Now allocate transmit buffers for the ring */
3363        	if (igb_allocate_transmit_buffers(txr)) {
3364			device_printf(dev,
3365			    "Critical Failure setting up transmit buffers\n");
3366			error = ENOMEM;
3367			goto err_tx_desc;
3368        	}
3369#ifndef IGB_LEGACY_TX
3370		/* Allocate a buf ring */
3371		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3372		    M_WAITOK, &txr->tx_mtx);
3373#endif
3374	}
3375
3376	/*
3377	 * Next the RX queues...
3378	 */
3379	rsize = roundup2(adapter->num_rx_desc *
3380	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3381	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3382		rxr = &adapter->rx_rings[i];
3383		rxr->adapter = adapter;
3384		rxr->me = i;
3385
3386		/* Initialize the RX lock */
3387		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3388		    device_get_nameunit(dev), txr->me);
3389		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3390
3391		if (igb_dma_malloc(adapter, rsize,
3392			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3393			device_printf(dev,
3394			    "Unable to allocate RxDescriptor memory\n");
3395			error = ENOMEM;
3396			goto err_rx_desc;
3397		}
3398		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3399		bzero((void *)rxr->rx_base, rsize);
3400
3401        	/* Allocate receive buffers for the ring*/
3402		if (igb_allocate_receive_buffers(rxr)) {
3403			device_printf(dev,
3404			    "Critical Failure setting up receive buffers\n");
3405			error = ENOMEM;
3406			goto err_rx_desc;
3407		}
3408	}
3409
3410	/*
3411	** Finally set up the queue holding structs
3412	*/
3413	for (int i = 0; i < adapter->num_queues; i++) {
3414		que = &adapter->queues[i];
3415		que->adapter = adapter;
3416		que->txr = &adapter->tx_rings[i];
3417		que->rxr = &adapter->rx_rings[i];
3418	}
3419
3420	return (0);
3421
3422err_rx_desc:
3423	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3424		igb_dma_free(adapter, &rxr->rxdma);
3425err_tx_desc:
3426	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3427		igb_dma_free(adapter, &txr->txdma);
3428	free(adapter->rx_rings, M_DEVBUF);
3429rx_fail:
3430#ifndef IGB_LEGACY_TX
3431	buf_ring_free(txr->br, M_DEVBUF);
3432#endif
3433	free(adapter->tx_rings, M_DEVBUF);
3434tx_fail:
3435	free(adapter->queues, M_DEVBUF);
3436fail:
3437	return (error);
3438}
3439
3440/*********************************************************************
3441 *
3442 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3443 *  the information needed to transmit a packet on the wire. This is
3444 *  called only once at attach, setup is done every reset.
3445 *
3446 **********************************************************************/
3447static int
3448igb_allocate_transmit_buffers(struct tx_ring *txr)
3449{
3450	struct adapter *adapter = txr->adapter;
3451	device_t dev = adapter->dev;
3452	struct igb_tx_buffer *txbuf;
3453	int error, i;
3454
3455	/*
3456	 * Setup DMA descriptor areas.
3457	 */
3458	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3459			       1, 0,			/* alignment, bounds */
3460			       BUS_SPACE_MAXADDR,	/* lowaddr */
3461			       BUS_SPACE_MAXADDR,	/* highaddr */
3462			       NULL, NULL,		/* filter, filterarg */
3463			       IGB_TSO_SIZE,		/* maxsize */
3464			       IGB_MAX_SCATTER,		/* nsegments */
3465			       PAGE_SIZE,		/* maxsegsize */
3466			       0,			/* flags */
3467			       NULL,			/* lockfunc */
3468			       NULL,			/* lockfuncarg */
3469			       &txr->txtag))) {
3470		device_printf(dev,"Unable to allocate TX DMA tag\n");
3471		goto fail;
3472	}
3473
3474	if (!(txr->tx_buffers =
3475	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3476	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3477		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3478		error = ENOMEM;
3479		goto fail;
3480	}
3481
3482        /* Create the descriptor buffer dma maps */
3483	txbuf = txr->tx_buffers;
3484	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3485		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3486		if (error != 0) {
3487			device_printf(dev, "Unable to create TX DMA map\n");
3488			goto fail;
3489		}
3490	}
3491
3492	return 0;
3493fail:
3494	/* We free all, it handles case where we are in the middle */
3495	igb_free_transmit_structures(adapter);
3496	return (error);
3497}
3498
3499/*********************************************************************
3500 *
3501 *  Initialize a transmit ring.
3502 *
3503 **********************************************************************/
3504static void
3505igb_setup_transmit_ring(struct tx_ring *txr)
3506{
3507	struct adapter *adapter = txr->adapter;
3508	struct igb_tx_buffer *txbuf;
3509	int i;
3510#ifdef DEV_NETMAP
3511	struct netmap_adapter *na = NA(adapter->ifp);
3512	struct netmap_slot *slot;
3513#endif /* DEV_NETMAP */
3514
3515	/* Clear the old descriptor contents */
3516	IGB_TX_LOCK(txr);
3517#ifdef DEV_NETMAP
3518	slot = netmap_reset(na, NR_TX, txr->me, 0);
3519#endif /* DEV_NETMAP */
3520	bzero((void *)txr->tx_base,
3521	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3522	/* Reset indices */
3523	txr->next_avail_desc = 0;
3524	txr->next_to_clean = 0;
3525
3526	/* Free any existing tx buffers. */
3527        txbuf = txr->tx_buffers;
3528	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3529		if (txbuf->m_head != NULL) {
3530			bus_dmamap_sync(txr->txtag, txbuf->map,
3531			    BUS_DMASYNC_POSTWRITE);
3532			bus_dmamap_unload(txr->txtag, txbuf->map);
3533			m_freem(txbuf->m_head);
3534			txbuf->m_head = NULL;
3535		}
3536#ifdef DEV_NETMAP
3537		if (slot) {
3538			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3539			/* no need to set the address */
3540			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3541		}
3542#endif /* DEV_NETMAP */
3543		/* clear the watch index */
3544		txbuf->next_eop = -1;
3545        }
3546
3547	/* Set number of descriptors available */
3548	txr->tx_avail = adapter->num_tx_desc;
3549
3550	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3551	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3552	IGB_TX_UNLOCK(txr);
3553}
3554
3555/*********************************************************************
3556 *
3557 *  Initialize all transmit rings.
3558 *
3559 **********************************************************************/
3560static void
3561igb_setup_transmit_structures(struct adapter *adapter)
3562{
3563	struct tx_ring *txr = adapter->tx_rings;
3564
3565	for (int i = 0; i < adapter->num_queues; i++, txr++)
3566		igb_setup_transmit_ring(txr);
3567
3568	return;
3569}
3570
3571/*********************************************************************
3572 *
3573 *  Enable transmit unit.
3574 *
3575 **********************************************************************/
3576static void
3577igb_initialize_transmit_units(struct adapter *adapter)
3578{
3579	struct tx_ring	*txr = adapter->tx_rings;
3580	struct e1000_hw *hw = &adapter->hw;
3581	u32		tctl, txdctl;
3582
3583	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3584	tctl = txdctl = 0;
3585
3586	/* Setup the Tx Descriptor Rings */
3587	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3588		u64 bus_addr = txr->txdma.dma_paddr;
3589
3590		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3591		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3592		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3593		    (uint32_t)(bus_addr >> 32));
3594		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3595		    (uint32_t)bus_addr);
3596
3597		/* Setup the HW Tx Head and Tail descriptor pointers */
3598		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3599		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3600
3601		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3602		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3603		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3604
3605		txr->queue_status = IGB_QUEUE_IDLE;
3606
3607		txdctl |= IGB_TX_PTHRESH;
3608		txdctl |= IGB_TX_HTHRESH << 8;
3609		txdctl |= IGB_TX_WTHRESH << 16;
3610		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3611		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3612	}
3613
3614	if (adapter->vf_ifp)
3615		return;
3616
3617	e1000_config_collision_dist(hw);
3618
3619	/* Program the Transmit Control Register */
3620	tctl = E1000_READ_REG(hw, E1000_TCTL);
3621	tctl &= ~E1000_TCTL_CT;
3622	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3623		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3624
3625	/* This write will effectively turn on the transmit unit. */
3626	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3627}
3628
3629/*********************************************************************
3630 *
3631 *  Free all transmit rings.
3632 *
3633 **********************************************************************/
3634static void
3635igb_free_transmit_structures(struct adapter *adapter)
3636{
3637	struct tx_ring *txr = adapter->tx_rings;
3638
3639	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3640		IGB_TX_LOCK(txr);
3641		igb_free_transmit_buffers(txr);
3642		igb_dma_free(adapter, &txr->txdma);
3643		IGB_TX_UNLOCK(txr);
3644		IGB_TX_LOCK_DESTROY(txr);
3645	}
3646	free(adapter->tx_rings, M_DEVBUF);
3647}
3648
3649/*********************************************************************
3650 *
3651 *  Free transmit ring related data structures.
3652 *
3653 **********************************************************************/
3654static void
3655igb_free_transmit_buffers(struct tx_ring *txr)
3656{
3657	struct adapter *adapter = txr->adapter;
3658	struct igb_tx_buffer *tx_buffer;
3659	int             i;
3660
3661	INIT_DEBUGOUT("free_transmit_ring: begin");
3662
3663	if (txr->tx_buffers == NULL)
3664		return;
3665
3666	tx_buffer = txr->tx_buffers;
3667	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3668		if (tx_buffer->m_head != NULL) {
3669			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3670			    BUS_DMASYNC_POSTWRITE);
3671			bus_dmamap_unload(txr->txtag,
3672			    tx_buffer->map);
3673			m_freem(tx_buffer->m_head);
3674			tx_buffer->m_head = NULL;
3675			if (tx_buffer->map != NULL) {
3676				bus_dmamap_destroy(txr->txtag,
3677				    tx_buffer->map);
3678				tx_buffer->map = NULL;
3679			}
3680		} else if (tx_buffer->map != NULL) {
3681			bus_dmamap_unload(txr->txtag,
3682			    tx_buffer->map);
3683			bus_dmamap_destroy(txr->txtag,
3684			    tx_buffer->map);
3685			tx_buffer->map = NULL;
3686		}
3687	}
3688#ifndef IGB_LEGACY_TX
3689	if (txr->br != NULL)
3690		buf_ring_free(txr->br, M_DEVBUF);
3691#endif
3692	if (txr->tx_buffers != NULL) {
3693		free(txr->tx_buffers, M_DEVBUF);
3694		txr->tx_buffers = NULL;
3695	}
3696	if (txr->txtag != NULL) {
3697		bus_dma_tag_destroy(txr->txtag);
3698		txr->txtag = NULL;
3699	}
3700	return;
3701}
3702
3703/**********************************************************************
3704 *
3705 *  Setup work for hardware segmentation offload (TSO)
3706 *
3707 **********************************************************************/
3708static bool
3709igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3710	struct ip *ip, struct tcphdr *th)
3711{
3712	struct adapter *adapter = txr->adapter;
3713	struct e1000_adv_tx_context_desc *TXD;
3714	struct igb_tx_buffer        *tx_buffer;
3715	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3716	u32 mss_l4len_idx = 0;
3717	u16 vtag = 0;
3718	int ctxd, ip_hlen, tcp_hlen;
3719
3720	ctxd = txr->next_avail_desc;
3721	tx_buffer = &txr->tx_buffers[ctxd];
3722	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3723
3724	ip->ip_sum = 0;
3725	ip_hlen = ip->ip_hl << 2;
3726	tcp_hlen = th->th_off << 2;
3727
3728	/* VLAN MACLEN IPLEN */
3729	if (mp->m_flags & M_VLANTAG) {
3730		vtag = htole16(mp->m_pkthdr.ether_vtag);
3731		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3732	}
3733
3734	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3735	vlan_macip_lens |= ip_hlen;
3736	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3737
3738	/* ADV DTYPE TUCMD */
3739	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3740	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3741	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3742	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3743
3744	/* MSS L4LEN IDX */
3745	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3746	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3747	/* 82575 needs the queue index added */
3748	if (adapter->hw.mac.type == e1000_82575)
3749		mss_l4len_idx |= txr->me << 4;
3750	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3751
3752	TXD->seqnum_seed = htole32(0);
3753	tx_buffer->m_head = NULL;
3754	tx_buffer->next_eop = -1;
3755
3756	if (++ctxd == adapter->num_tx_desc)
3757		ctxd = 0;
3758
3759	txr->tx_avail--;
3760	txr->next_avail_desc = ctxd;
3761	return TRUE;
3762}
3763
3764
3765/*********************************************************************
3766 *
3767 *  Context Descriptor setup for VLAN or CSUM
3768 *
3769 **********************************************************************/
3770
3771static bool
3772igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3773{
3774	struct adapter *adapter = txr->adapter;
3775	struct e1000_adv_tx_context_desc *TXD;
3776	struct igb_tx_buffer        *tx_buffer;
3777	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3778	struct ether_vlan_header *eh;
3779	struct ip *ip = NULL;
3780	struct ip6_hdr *ip6;
3781	int  ehdrlen, ctxd, ip_hlen = 0;
3782	u16	etype, vtag = 0;
3783	u8	ipproto = 0;
3784	bool	offload = TRUE;
3785
3786	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3787		offload = FALSE;
3788
3789	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3790	ctxd = txr->next_avail_desc;
3791	tx_buffer = &txr->tx_buffers[ctxd];
3792	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3793
3794	/*
3795	** In advanced descriptors the vlan tag must
3796	** be placed into the context descriptor, thus
3797	** we need to be here just for that setup.
3798	*/
3799	if (mp->m_flags & M_VLANTAG) {
3800		vtag = htole16(mp->m_pkthdr.ether_vtag);
3801		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3802	} else if (offload == FALSE)
3803		return FALSE;
3804
3805	/*
3806	 * Determine where frame payload starts.
3807	 * Jump over vlan headers if already present,
3808	 * helpful for QinQ too.
3809	 */
3810	eh = mtod(mp, struct ether_vlan_header *);
3811	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3812		etype = ntohs(eh->evl_proto);
3813		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3814	} else {
3815		etype = ntohs(eh->evl_encap_proto);
3816		ehdrlen = ETHER_HDR_LEN;
3817	}
3818
3819	/* Set the ether header length */
3820	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3821
3822	switch (etype) {
3823		case ETHERTYPE_IP:
3824			ip = (struct ip *)(mp->m_data + ehdrlen);
3825			ip_hlen = ip->ip_hl << 2;
3826			if (mp->m_len < ehdrlen + ip_hlen) {
3827				offload = FALSE;
3828				break;
3829			}
3830			ipproto = ip->ip_p;
3831			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3832			break;
3833		case ETHERTYPE_IPV6:
3834			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3835			ip_hlen = sizeof(struct ip6_hdr);
3836			ipproto = ip6->ip6_nxt;
3837			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3838			break;
3839		default:
3840			offload = FALSE;
3841			break;
3842	}
3843
3844	vlan_macip_lens |= ip_hlen;
3845	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3846
3847	switch (ipproto) {
3848		case IPPROTO_TCP:
3849			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3850				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3851			break;
3852		case IPPROTO_UDP:
3853			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3854				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3855			break;
3856#if __FreeBSD_version >= 800000
3857		case IPPROTO_SCTP:
3858			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3859				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3860			break;
3861#endif
3862		default:
3863			offload = FALSE;
3864			break;
3865	}
3866
3867	/* 82575 needs the queue index added */
3868	if (adapter->hw.mac.type == e1000_82575)
3869		mss_l4len_idx = txr->me << 4;
3870
3871	/* Now copy bits into descriptor */
3872	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3873	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3874	TXD->seqnum_seed = htole32(0);
3875	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3876
3877	tx_buffer->m_head = NULL;
3878	tx_buffer->next_eop = -1;
3879
3880	/* We've consumed the first desc, adjust counters */
3881	if (++ctxd == adapter->num_tx_desc)
3882		ctxd = 0;
3883	txr->next_avail_desc = ctxd;
3884	--txr->tx_avail;
3885
3886        return (offload);
3887}
3888
3889
3890/**********************************************************************
3891 *
3892 *  Examine each tx_buffer in the used queue. If the hardware is done
3893 *  processing the packet then free associated resources. The
3894 *  tx_buffer is put back on the free queue.
3895 *
3896 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3897 **********************************************************************/
3898static bool
3899igb_txeof(struct tx_ring *txr)
3900{
3901	struct adapter	*adapter = txr->adapter;
3902        int first, last, done, processed;
3903        struct igb_tx_buffer *tx_buffer;
3904        struct e1000_tx_desc   *tx_desc, *eop_desc;
3905	struct ifnet   *ifp = adapter->ifp;
3906
3907	IGB_TX_LOCK_ASSERT(txr);
3908
3909#ifdef DEV_NETMAP
3910	if (netmap_tx_irq(ifp, txr->me |
3911	    (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3912		return (FALSE);
3913#endif /* DEV_NETMAP */
3914        if (txr->tx_avail == adapter->num_tx_desc) {
3915		txr->queue_status = IGB_QUEUE_IDLE;
3916                return FALSE;
3917	}
3918
3919	processed = 0;
3920        first = txr->next_to_clean;
3921        tx_desc = &txr->tx_base[first];
3922        tx_buffer = &txr->tx_buffers[first];
3923	last = tx_buffer->next_eop;
3924        eop_desc = &txr->tx_base[last];
3925
3926	/*
3927	 * What this does is get the index of the
3928	 * first descriptor AFTER the EOP of the
3929	 * first packet, that way we can do the
3930	 * simple comparison on the inner while loop.
3931	 */
3932	if (++last == adapter->num_tx_desc)
3933 		last = 0;
3934	done = last;
3935
3936        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3937            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3938
3939        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3940		/* We clean the range of the packet */
3941		while (first != done) {
3942                	tx_desc->upper.data = 0;
3943                	tx_desc->lower.data = 0;
3944                	tx_desc->buffer_addr = 0;
3945                	++txr->tx_avail;
3946			++processed;
3947
3948			if (tx_buffer->m_head) {
3949				txr->bytes +=
3950				    tx_buffer->m_head->m_pkthdr.len;
3951				bus_dmamap_sync(txr->txtag,
3952				    tx_buffer->map,
3953				    BUS_DMASYNC_POSTWRITE);
3954				bus_dmamap_unload(txr->txtag,
3955				    tx_buffer->map);
3956
3957                        	m_freem(tx_buffer->m_head);
3958                        	tx_buffer->m_head = NULL;
3959                	}
3960			tx_buffer->next_eop = -1;
3961			txr->watchdog_time = ticks;
3962
3963	                if (++first == adapter->num_tx_desc)
3964				first = 0;
3965
3966	                tx_buffer = &txr->tx_buffers[first];
3967			tx_desc = &txr->tx_base[first];
3968		}
3969		++txr->packets;
3970		++ifp->if_opackets;
3971		/* See if we can continue to the next packet */
3972		last = tx_buffer->next_eop;
3973		if (last != -1) {
3974        		eop_desc = &txr->tx_base[last];
3975			/* Get new done point */
3976			if (++last == adapter->num_tx_desc) last = 0;
3977			done = last;
3978		} else
3979			break;
3980        }
3981        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3982            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3983
3984        txr->next_to_clean = first;
3985
3986	/*
3987	** Watchdog calculation, we know there's
3988	** work outstanding or the first return
3989	** would have been taken, so none processed
3990	** for too long indicates a hang.
3991	*/
3992	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3993		txr->queue_status |= IGB_QUEUE_HUNG;
3994        /*
3995         * If we have a minimum free,
3996         * clear depleted state bit
3997         */
3998        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3999                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4000
4001	/* All clean, turn off the watchdog */
4002	if (txr->tx_avail == adapter->num_tx_desc) {
4003		txr->queue_status = IGB_QUEUE_IDLE;
4004		return (FALSE);
4005        }
4006
4007	return (TRUE);
4008}
4009
4010/*********************************************************************
4011 *
4012 *  Refresh mbuf buffers for RX descriptor rings
4013 *   - now keeps its own state so discards due to resource
4014 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4015 *     it just returns, keeping its placeholder, thus it can simply
4016 *     be recalled to try again.
4017 *
4018 **********************************************************************/
4019static void
4020igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4021{
4022	struct adapter		*adapter = rxr->adapter;
4023	bus_dma_segment_t	hseg[1];
4024	bus_dma_segment_t	pseg[1];
4025	struct igb_rx_buf	*rxbuf;
4026	struct mbuf		*mh, *mp;
4027	int			i, j, nsegs, error;
4028	bool			refreshed = FALSE;
4029
4030	i = j = rxr->next_to_refresh;
4031	/*
4032	** Get one descriptor beyond
4033	** our work mark to control
4034	** the loop.
4035        */
4036	if (++j == adapter->num_rx_desc)
4037		j = 0;
4038
4039	while (j != limit) {
4040		rxbuf = &rxr->rx_buffers[i];
4041		/* No hdr mbuf used with header split off */
4042		if (rxr->hdr_split == FALSE)
4043			goto no_split;
4044		if (rxbuf->m_head == NULL) {
4045			mh = m_gethdr(M_NOWAIT, MT_DATA);
4046			if (mh == NULL)
4047				goto update;
4048		} else
4049			mh = rxbuf->m_head;
4050
4051		mh->m_pkthdr.len = mh->m_len = MHLEN;
4052		mh->m_len = MHLEN;
4053		mh->m_flags |= M_PKTHDR;
4054		/* Get the memory mapping */
4055		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4056		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4057		if (error != 0) {
4058			printf("Refresh mbufs: hdr dmamap load"
4059			    " failure - %d\n", error);
4060			m_free(mh);
4061			rxbuf->m_head = NULL;
4062			goto update;
4063		}
4064		rxbuf->m_head = mh;
4065		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4066		    BUS_DMASYNC_PREREAD);
4067		rxr->rx_base[i].read.hdr_addr =
4068		    htole64(hseg[0].ds_addr);
4069no_split:
4070		if (rxbuf->m_pack == NULL) {
4071			mp = m_getjcl(M_NOWAIT, MT_DATA,
4072			    M_PKTHDR, adapter->rx_mbuf_sz);
4073			if (mp == NULL)
4074				goto update;
4075		} else
4076			mp = rxbuf->m_pack;
4077
4078		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4079		/* Get the memory mapping */
4080		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4081		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4082		if (error != 0) {
4083			printf("Refresh mbufs: payload dmamap load"
4084			    " failure - %d\n", error);
4085			m_free(mp);
4086			rxbuf->m_pack = NULL;
4087			goto update;
4088		}
4089		rxbuf->m_pack = mp;
4090		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4091		    BUS_DMASYNC_PREREAD);
4092		rxr->rx_base[i].read.pkt_addr =
4093		    htole64(pseg[0].ds_addr);
4094		refreshed = TRUE; /* I feel wefreshed :) */
4095
4096		i = j; /* our next is precalculated */
4097		rxr->next_to_refresh = i;
4098		if (++j == adapter->num_rx_desc)
4099			j = 0;
4100	}
4101update:
4102	if (refreshed) /* update tail */
4103		E1000_WRITE_REG(&adapter->hw,
4104		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4105	return;
4106}
4107
4108
4109/*********************************************************************
4110 *
4111 *  Allocate memory for rx_buffer structures. Since we use one
4112 *  rx_buffer per received packet, the maximum number of rx_buffer's
4113 *  that we'll need is equal to the number of receive descriptors
4114 *  that we've allocated.
4115 *
4116 **********************************************************************/
4117static int
4118igb_allocate_receive_buffers(struct rx_ring *rxr)
4119{
4120	struct	adapter 	*adapter = rxr->adapter;
4121	device_t 		dev = adapter->dev;
4122	struct igb_rx_buf	*rxbuf;
4123	int             	i, bsize, error;
4124
4125	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4126	if (!(rxr->rx_buffers =
4127	    (struct igb_rx_buf *) malloc(bsize,
4128	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4129		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4130		error = ENOMEM;
4131		goto fail;
4132	}
4133
4134	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4135				   1, 0,		/* alignment, bounds */
4136				   BUS_SPACE_MAXADDR,	/* lowaddr */
4137				   BUS_SPACE_MAXADDR,	/* highaddr */
4138				   NULL, NULL,		/* filter, filterarg */
4139				   MSIZE,		/* maxsize */
4140				   1,			/* nsegments */
4141				   MSIZE,		/* maxsegsize */
4142				   0,			/* flags */
4143				   NULL,		/* lockfunc */
4144				   NULL,		/* lockfuncarg */
4145				   &rxr->htag))) {
4146		device_printf(dev, "Unable to create RX DMA tag\n");
4147		goto fail;
4148	}
4149
4150	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4151				   1, 0,		/* alignment, bounds */
4152				   BUS_SPACE_MAXADDR,	/* lowaddr */
4153				   BUS_SPACE_MAXADDR,	/* highaddr */
4154				   NULL, NULL,		/* filter, filterarg */
4155				   MJUM9BYTES,		/* maxsize */
4156				   1,			/* nsegments */
4157				   MJUM9BYTES,		/* maxsegsize */
4158				   0,			/* flags */
4159				   NULL,		/* lockfunc */
4160				   NULL,		/* lockfuncarg */
4161				   &rxr->ptag))) {
4162		device_printf(dev, "Unable to create RX payload DMA tag\n");
4163		goto fail;
4164	}
4165
4166	for (i = 0; i < adapter->num_rx_desc; i++) {
4167		rxbuf = &rxr->rx_buffers[i];
4168		error = bus_dmamap_create(rxr->htag,
4169		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4170		if (error) {
4171			device_printf(dev,
4172			    "Unable to create RX head DMA maps\n");
4173			goto fail;
4174		}
4175		error = bus_dmamap_create(rxr->ptag,
4176		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4177		if (error) {
4178			device_printf(dev,
4179			    "Unable to create RX packet DMA maps\n");
4180			goto fail;
4181		}
4182	}
4183
4184	return (0);
4185
4186fail:
4187	/* Frees all, but can handle partial completion */
4188	igb_free_receive_structures(adapter);
4189	return (error);
4190}
4191
4192
4193static void
4194igb_free_receive_ring(struct rx_ring *rxr)
4195{
4196	struct	adapter		*adapter = rxr->adapter;
4197	struct igb_rx_buf	*rxbuf;
4198
4199
4200	for (int i = 0; i < adapter->num_rx_desc; i++) {
4201		rxbuf = &rxr->rx_buffers[i];
4202		if (rxbuf->m_head != NULL) {
4203			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4204			    BUS_DMASYNC_POSTREAD);
4205			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4206			rxbuf->m_head->m_flags |= M_PKTHDR;
4207			m_freem(rxbuf->m_head);
4208		}
4209		if (rxbuf->m_pack != NULL) {
4210			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4211			    BUS_DMASYNC_POSTREAD);
4212			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4213			rxbuf->m_pack->m_flags |= M_PKTHDR;
4214			m_freem(rxbuf->m_pack);
4215		}
4216		rxbuf->m_head = NULL;
4217		rxbuf->m_pack = NULL;
4218	}
4219}
4220
4221
4222/*********************************************************************
4223 *
4224 *  Initialize a receive ring and its buffers.
4225 *
4226 **********************************************************************/
4227static int
4228igb_setup_receive_ring(struct rx_ring *rxr)
4229{
4230	struct	adapter		*adapter;
4231	struct  ifnet		*ifp;
4232	device_t		dev;
4233	struct igb_rx_buf	*rxbuf;
4234	bus_dma_segment_t	pseg[1], hseg[1];
4235	struct lro_ctrl		*lro = &rxr->lro;
4236	int			rsize, nsegs, error = 0;
4237#ifdef DEV_NETMAP
4238	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4239	struct netmap_slot *slot;
4240#endif /* DEV_NETMAP */
4241
4242	adapter = rxr->adapter;
4243	dev = adapter->dev;
4244	ifp = adapter->ifp;
4245
4246	/* Clear the ring contents */
4247	IGB_RX_LOCK(rxr);
4248#ifdef DEV_NETMAP
4249	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4250#endif /* DEV_NETMAP */
4251	rsize = roundup2(adapter->num_rx_desc *
4252	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4253	bzero((void *)rxr->rx_base, rsize);
4254
4255	/*
4256	** Free current RX buffer structures and their mbufs
4257	*/
4258	igb_free_receive_ring(rxr);
4259
4260	/* Configure for header split? */
4261	if (igb_header_split)
4262		rxr->hdr_split = TRUE;
4263
4264        /* Now replenish the ring mbufs */
4265	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4266		struct mbuf	*mh, *mp;
4267
4268		rxbuf = &rxr->rx_buffers[j];
4269#ifdef DEV_NETMAP
4270		if (slot) {
4271			/* slot sj is mapped to the i-th NIC-ring entry */
4272			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4273			uint64_t paddr;
4274			void *addr;
4275
4276			addr = PNMB(slot + sj, &paddr);
4277			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4278			/* Update descriptor */
4279			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4280			continue;
4281		}
4282#endif /* DEV_NETMAP */
4283		if (rxr->hdr_split == FALSE)
4284			goto skip_head;
4285
4286		/* First the header */
4287		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4288		if (rxbuf->m_head == NULL) {
4289			error = ENOBUFS;
4290                        goto fail;
4291		}
4292		m_adj(rxbuf->m_head, ETHER_ALIGN);
4293		mh = rxbuf->m_head;
4294		mh->m_len = mh->m_pkthdr.len = MHLEN;
4295		mh->m_flags |= M_PKTHDR;
4296		/* Get the memory mapping */
4297		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4298		    rxbuf->hmap, rxbuf->m_head, hseg,
4299		    &nsegs, BUS_DMA_NOWAIT);
4300		if (error != 0) /* Nothing elegant to do here */
4301                        goto fail;
4302		bus_dmamap_sync(rxr->htag,
4303		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4304		/* Update descriptor */
4305		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4306
4307skip_head:
4308		/* Now the payload cluster */
4309		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4310		    M_PKTHDR, adapter->rx_mbuf_sz);
4311		if (rxbuf->m_pack == NULL) {
4312			error = ENOBUFS;
4313                        goto fail;
4314		}
4315		mp = rxbuf->m_pack;
4316		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4317		/* Get the memory mapping */
4318		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4319		    rxbuf->pmap, mp, pseg,
4320		    &nsegs, BUS_DMA_NOWAIT);
4321		if (error != 0)
4322                        goto fail;
4323		bus_dmamap_sync(rxr->ptag,
4324		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4325		/* Update descriptor */
4326		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4327        }
4328
4329	/* Setup our descriptor indices */
4330	rxr->next_to_check = 0;
4331	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4332	rxr->lro_enabled = FALSE;
4333	rxr->rx_split_packets = 0;
4334	rxr->rx_bytes = 0;
4335
4336	rxr->fmp = NULL;
4337	rxr->lmp = NULL;
4338	rxr->discard = FALSE;
4339
4340	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4341	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4342
4343	/*
4344	** Now set up the LRO interface, we
4345	** also only do head split when LRO
4346	** is enabled, since so often they
4347	** are undesireable in similar setups.
4348	*/
4349	if (ifp->if_capenable & IFCAP_LRO) {
4350		error = tcp_lro_init(lro);
4351		if (error) {
4352			device_printf(dev, "LRO Initialization failed!\n");
4353			goto fail;
4354		}
4355		INIT_DEBUGOUT("RX LRO Initialized\n");
4356		rxr->lro_enabled = TRUE;
4357		lro->ifp = adapter->ifp;
4358	}
4359
4360	IGB_RX_UNLOCK(rxr);
4361	return (0);
4362
4363fail:
4364	igb_free_receive_ring(rxr);
4365	IGB_RX_UNLOCK(rxr);
4366	return (error);
4367}
4368
4369
4370/*********************************************************************
4371 *
4372 *  Initialize all receive rings.
4373 *
4374 **********************************************************************/
4375static int
4376igb_setup_receive_structures(struct adapter *adapter)
4377{
4378	struct rx_ring *rxr = adapter->rx_rings;
4379	int i;
4380
4381	for (i = 0; i < adapter->num_queues; i++, rxr++)
4382		if (igb_setup_receive_ring(rxr))
4383			goto fail;
4384
4385	return (0);
4386fail:
4387	/*
4388	 * Free RX buffers allocated so far, we will only handle
4389	 * the rings that completed, the failing case will have
4390	 * cleaned up for itself. 'i' is the endpoint.
4391	 */
4392	for (int j = 0; j < i; ++j) {
4393		rxr = &adapter->rx_rings[j];
4394		IGB_RX_LOCK(rxr);
4395		igb_free_receive_ring(rxr);
4396		IGB_RX_UNLOCK(rxr);
4397	}
4398
4399	return (ENOBUFS);
4400}
4401
4402/*********************************************************************
4403 *
4404 *  Enable receive unit.
4405 *
4406 **********************************************************************/
4407static void
4408igb_initialize_receive_units(struct adapter *adapter)
4409{
4410	struct rx_ring	*rxr = adapter->rx_rings;
4411	struct ifnet	*ifp = adapter->ifp;
4412	struct e1000_hw *hw = &adapter->hw;
4413	u32		rctl, rxcsum, psize, srrctl = 0;
4414
4415	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4416
4417	/*
4418	 * Make sure receives are disabled while setting
4419	 * up the descriptor ring
4420	 */
4421	rctl = E1000_READ_REG(hw, E1000_RCTL);
4422	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4423
4424	/*
4425	** Set up for header split
4426	*/
4427	if (igb_header_split) {
4428		/* Use a standard mbuf for the header */
4429		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4430		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4431	} else
4432		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4433
4434	/*
4435	** Set up for jumbo frames
4436	*/
4437	if (ifp->if_mtu > ETHERMTU) {
4438		rctl |= E1000_RCTL_LPE;
4439		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4440			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4441			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4442		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4443			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4444			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4445		}
4446		/* Set maximum packet len */
4447		psize = adapter->max_frame_size;
4448		/* are we on a vlan? */
4449		if (adapter->ifp->if_vlantrunk != NULL)
4450			psize += VLAN_TAG_SIZE;
4451		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4452	} else {
4453		rctl &= ~E1000_RCTL_LPE;
4454		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4455		rctl |= E1000_RCTL_SZ_2048;
4456	}
4457
4458	/* Setup the Base and Length of the Rx Descriptor Rings */
4459	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4460		u64 bus_addr = rxr->rxdma.dma_paddr;
4461		u32 rxdctl;
4462
4463		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4464		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4465		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4466		    (uint32_t)(bus_addr >> 32));
4467		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4468		    (uint32_t)bus_addr);
4469		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4470		/* Enable this Queue */
4471		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4472		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4473		rxdctl &= 0xFFF00000;
4474		rxdctl |= IGB_RX_PTHRESH;
4475		rxdctl |= IGB_RX_HTHRESH << 8;
4476		rxdctl |= IGB_RX_WTHRESH << 16;
4477		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4478	}
4479
4480	/*
4481	** Setup for RX MultiQueue
4482	*/
4483	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4484	if (adapter->num_queues >1) {
4485		u32 random[10], mrqc, shift = 0;
4486		union igb_reta {
4487			u32 dword;
4488			u8  bytes[4];
4489		} reta;
4490
4491		arc4rand(&random, sizeof(random), 0);
4492		if (adapter->hw.mac.type == e1000_82575)
4493			shift = 6;
4494		/* Warning FM follows */
4495		for (int i = 0; i < 128; i++) {
4496			reta.bytes[i & 3] =
4497			    (i % adapter->num_queues) << shift;
4498			if ((i & 3) == 3)
4499				E1000_WRITE_REG(hw,
4500				    E1000_RETA(i >> 2), reta.dword);
4501		}
4502		/* Now fill in hash table */
4503		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4504		for (int i = 0; i < 10; i++)
4505			E1000_WRITE_REG_ARRAY(hw,
4506			    E1000_RSSRK(0), i, random[i]);
4507
4508		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4509		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4510		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4511		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4512		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4513		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4514		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4515		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4516
4517		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4518
4519		/*
4520		** NOTE: Receive Full-Packet Checksum Offload
4521		** is mutually exclusive with Multiqueue. However
4522		** this is not the same as TCP/IP checksums which
4523		** still work.
4524		*/
4525		rxcsum |= E1000_RXCSUM_PCSD;
4526#if __FreeBSD_version >= 800000
4527		/* For SCTP Offload */
4528		if ((hw->mac.type == e1000_82576)
4529		    && (ifp->if_capenable & IFCAP_RXCSUM))
4530			rxcsum |= E1000_RXCSUM_CRCOFL;
4531#endif
4532	} else {
4533		/* Non RSS setup */
4534		if (ifp->if_capenable & IFCAP_RXCSUM) {
4535			rxcsum |= E1000_RXCSUM_IPPCSE;
4536#if __FreeBSD_version >= 800000
4537			if (adapter->hw.mac.type == e1000_82576)
4538				rxcsum |= E1000_RXCSUM_CRCOFL;
4539#endif
4540		} else
4541			rxcsum &= ~E1000_RXCSUM_TUOFL;
4542	}
4543	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4544
4545	/* Setup the Receive Control Register */
4546	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4547	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4548		   E1000_RCTL_RDMTS_HALF |
4549		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4550	/* Strip CRC bytes. */
4551	rctl |= E1000_RCTL_SECRC;
4552	/* Make sure VLAN Filters are off */
4553	rctl &= ~E1000_RCTL_VFE;
4554	/* Don't store bad packets */
4555	rctl &= ~E1000_RCTL_SBP;
4556
4557	/* Enable Receives */
4558	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4559
4560	/*
4561	 * Setup the HW Rx Head and Tail Descriptor Pointers
4562	 *   - needs to be after enable
4563	 */
4564	for (int i = 0; i < adapter->num_queues; i++) {
4565		rxr = &adapter->rx_rings[i];
4566		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4567#ifdef DEV_NETMAP
4568		/*
4569		 * an init() while a netmap client is active must
4570		 * preserve the rx buffers passed to userspace.
4571		 * In this driver it means we adjust RDT to
4572		 * somthing different from next_to_refresh
4573		 * (which is not used in netmap mode).
4574		 */
4575		if (ifp->if_capenable & IFCAP_NETMAP) {
4576			struct netmap_adapter *na = NA(adapter->ifp);
4577			struct netmap_kring *kring = &na->rx_rings[i];
4578			int t = rxr->next_to_refresh - kring->nr_hwavail;
4579
4580			if (t >= adapter->num_rx_desc)
4581				t -= adapter->num_rx_desc;
4582			else if (t < 0)
4583				t += adapter->num_rx_desc;
4584			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4585		} else
4586#endif /* DEV_NETMAP */
4587		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4588	}
4589	return;
4590}
4591
4592/*********************************************************************
4593 *
4594 *  Free receive rings.
4595 *
4596 **********************************************************************/
4597static void
4598igb_free_receive_structures(struct adapter *adapter)
4599{
4600	struct rx_ring *rxr = adapter->rx_rings;
4601
4602	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4603		struct lro_ctrl	*lro = &rxr->lro;
4604		igb_free_receive_buffers(rxr);
4605		tcp_lro_free(lro);
4606		igb_dma_free(adapter, &rxr->rxdma);
4607	}
4608
4609	free(adapter->rx_rings, M_DEVBUF);
4610}
4611
4612/*********************************************************************
4613 *
4614 *  Free receive ring data structures.
4615 *
4616 **********************************************************************/
4617static void
4618igb_free_receive_buffers(struct rx_ring *rxr)
4619{
4620	struct adapter		*adapter = rxr->adapter;
4621	struct igb_rx_buf	*rxbuf;
4622	int i;
4623
4624	INIT_DEBUGOUT("free_receive_structures: begin");
4625
4626	/* Cleanup any existing buffers */
4627	if (rxr->rx_buffers != NULL) {
4628		for (i = 0; i < adapter->num_rx_desc; i++) {
4629			rxbuf = &rxr->rx_buffers[i];
4630			if (rxbuf->m_head != NULL) {
4631				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4632				    BUS_DMASYNC_POSTREAD);
4633				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4634				rxbuf->m_head->m_flags |= M_PKTHDR;
4635				m_freem(rxbuf->m_head);
4636			}
4637			if (rxbuf->m_pack != NULL) {
4638				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4639				    BUS_DMASYNC_POSTREAD);
4640				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4641				rxbuf->m_pack->m_flags |= M_PKTHDR;
4642				m_freem(rxbuf->m_pack);
4643			}
4644			rxbuf->m_head = NULL;
4645			rxbuf->m_pack = NULL;
4646			if (rxbuf->hmap != NULL) {
4647				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4648				rxbuf->hmap = NULL;
4649			}
4650			if (rxbuf->pmap != NULL) {
4651				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4652				rxbuf->pmap = NULL;
4653			}
4654		}
4655		if (rxr->rx_buffers != NULL) {
4656			free(rxr->rx_buffers, M_DEVBUF);
4657			rxr->rx_buffers = NULL;
4658		}
4659	}
4660
4661	if (rxr->htag != NULL) {
4662		bus_dma_tag_destroy(rxr->htag);
4663		rxr->htag = NULL;
4664	}
4665	if (rxr->ptag != NULL) {
4666		bus_dma_tag_destroy(rxr->ptag);
4667		rxr->ptag = NULL;
4668	}
4669}
4670
4671static __inline void
4672igb_rx_discard(struct rx_ring *rxr, int i)
4673{
4674	struct igb_rx_buf	*rbuf;
4675
4676	rbuf = &rxr->rx_buffers[i];
4677
4678	/* Partially received? Free the chain */
4679	if (rxr->fmp != NULL) {
4680		rxr->fmp->m_flags |= M_PKTHDR;
4681		m_freem(rxr->fmp);
4682		rxr->fmp = NULL;
4683		rxr->lmp = NULL;
4684	}
4685
4686	/*
4687	** With advanced descriptors the writeback
4688	** clobbers the buffer addrs, so its easier
4689	** to just free the existing mbufs and take
4690	** the normal refresh path to get new buffers
4691	** and mapping.
4692	*/
4693	if (rbuf->m_head) {
4694		m_free(rbuf->m_head);
4695		rbuf->m_head = NULL;
4696	}
4697
4698	if (rbuf->m_pack) {
4699		m_free(rbuf->m_pack);
4700		rbuf->m_pack = NULL;
4701	}
4702
4703	return;
4704}
4705
4706static __inline void
4707igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4708{
4709
4710	/*
4711	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4712	 * should be computed by hardware. Also it should not have VLAN tag in
4713	 * ethernet header.
4714	 */
4715	if (rxr->lro_enabled &&
4716	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4717	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4718	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4719	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4720	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4721	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4722		/*
4723		 * Send to the stack if:
4724		 **  - LRO not enabled, or
4725		 **  - no LRO resources, or
4726		 **  - lro enqueue fails
4727		 */
4728		if (rxr->lro.lro_cnt != 0)
4729			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4730				return;
4731	}
4732	IGB_RX_UNLOCK(rxr);
4733	(*ifp->if_input)(ifp, m);
4734	IGB_RX_LOCK(rxr);
4735}
4736
4737/*********************************************************************
4738 *
4739 *  This routine executes in interrupt context. It replenishes
4740 *  the mbufs in the descriptor and sends data which has been
4741 *  dma'ed into host memory to upper layer.
4742 *
4743 *  We loop at most count times if count is > 0, or until done if
4744 *  count < 0.
4745 *
4746 *  Return TRUE if more to clean, FALSE otherwise
4747 *********************************************************************/
4748static bool
4749igb_rxeof(struct igb_queue *que, int count, int *done)
4750{
4751	struct adapter		*adapter = que->adapter;
4752	struct rx_ring		*rxr = que->rxr;
4753	struct ifnet		*ifp = adapter->ifp;
4754	struct lro_ctrl		*lro = &rxr->lro;
4755	struct lro_entry	*queued;
4756	int			i, processed = 0, rxdone = 0;
4757	u32			ptype, staterr = 0;
4758	union e1000_adv_rx_desc	*cur;
4759
4760	IGB_RX_LOCK(rxr);
4761	/* Sync the ring. */
4762	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4763	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4764
4765#ifdef DEV_NETMAP
4766	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4767		return (FALSE);
4768#endif /* DEV_NETMAP */
4769
4770	/* Main clean loop */
4771	for (i = rxr->next_to_check; count != 0;) {
4772		struct mbuf		*sendmp, *mh, *mp;
4773		struct igb_rx_buf	*rxbuf;
4774		u16			hlen, plen, hdr, vtag;
4775		bool			eop = FALSE;
4776
4777		cur = &rxr->rx_base[i];
4778		staterr = le32toh(cur->wb.upper.status_error);
4779		if ((staterr & E1000_RXD_STAT_DD) == 0)
4780			break;
4781		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4782			break;
4783		count--;
4784		sendmp = mh = mp = NULL;
4785		cur->wb.upper.status_error = 0;
4786		rxbuf = &rxr->rx_buffers[i];
4787		plen = le16toh(cur->wb.upper.length);
4788		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4789		if ((adapter->hw.mac.type == e1000_i350) &&
4790		    (staterr & E1000_RXDEXT_STATERR_LB))
4791			vtag = be16toh(cur->wb.upper.vlan);
4792		else
4793			vtag = le16toh(cur->wb.upper.vlan);
4794		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4795		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4796
4797		/* Make sure all segments of a bad packet are discarded */
4798		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4799		    (rxr->discard)) {
4800			adapter->dropped_pkts++;
4801			++rxr->rx_discarded;
4802			if (!eop) /* Catch subsequent segs */
4803				rxr->discard = TRUE;
4804			else
4805				rxr->discard = FALSE;
4806			igb_rx_discard(rxr, i);
4807			goto next_desc;
4808		}
4809
4810		/*
4811		** The way the hardware is configured to
4812		** split, it will ONLY use the header buffer
4813		** when header split is enabled, otherwise we
4814		** get normal behavior, ie, both header and
4815		** payload are DMA'd into the payload buffer.
4816		**
4817		** The fmp test is to catch the case where a
4818		** packet spans multiple descriptors, in that
4819		** case only the first header is valid.
4820		*/
4821		if (rxr->hdr_split && rxr->fmp == NULL) {
4822			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4823			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4824			if (hlen > IGB_HDR_BUF)
4825				hlen = IGB_HDR_BUF;
4826			mh = rxr->rx_buffers[i].m_head;
4827			mh->m_len = hlen;
4828			/* clear buf pointer for refresh */
4829			rxbuf->m_head = NULL;
4830			/*
4831			** Get the payload length, this
4832			** could be zero if its a small
4833			** packet.
4834			*/
4835			if (plen > 0) {
4836				mp = rxr->rx_buffers[i].m_pack;
4837				mp->m_len = plen;
4838				mh->m_next = mp;
4839				/* clear buf pointer */
4840				rxbuf->m_pack = NULL;
4841				rxr->rx_split_packets++;
4842			}
4843		} else {
4844			/*
4845			** Either no header split, or a
4846			** secondary piece of a fragmented
4847			** split packet.
4848			*/
4849			mh = rxr->rx_buffers[i].m_pack;
4850			mh->m_len = plen;
4851			/* clear buf info for refresh */
4852			rxbuf->m_pack = NULL;
4853		}
4854
4855		++processed; /* So we know when to refresh */
4856
4857		/* Initial frame - setup */
4858		if (rxr->fmp == NULL) {
4859			mh->m_pkthdr.len = mh->m_len;
4860			/* Save the head of the chain */
4861			rxr->fmp = mh;
4862			rxr->lmp = mh;
4863			if (mp != NULL) {
4864				/* Add payload if split */
4865				mh->m_pkthdr.len += mp->m_len;
4866				rxr->lmp = mh->m_next;
4867			}
4868		} else {
4869			/* Chain mbuf's together */
4870			rxr->lmp->m_next = mh;
4871			rxr->lmp = rxr->lmp->m_next;
4872			rxr->fmp->m_pkthdr.len += mh->m_len;
4873		}
4874
4875		if (eop) {
4876			rxr->fmp->m_pkthdr.rcvif = ifp;
4877			ifp->if_ipackets++;
4878			rxr->rx_packets++;
4879			/* capture data for AIM */
4880			rxr->packets++;
4881			rxr->bytes += rxr->fmp->m_pkthdr.len;
4882			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4883
4884			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4885				igb_rx_checksum(staterr, rxr->fmp, ptype);
4886
4887			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4888			    (staterr & E1000_RXD_STAT_VP) != 0) {
4889				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4890				rxr->fmp->m_flags |= M_VLANTAG;
4891			}
4892#ifndef IGB_LEGACY_TX
4893			rxr->fmp->m_pkthdr.flowid = que->msix;
4894			rxr->fmp->m_flags |= M_FLOWID;
4895#endif
4896			sendmp = rxr->fmp;
4897			/* Make sure to set M_PKTHDR. */
4898			sendmp->m_flags |= M_PKTHDR;
4899			rxr->fmp = NULL;
4900			rxr->lmp = NULL;
4901		}
4902
4903next_desc:
4904		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4905		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4906
4907		/* Advance our pointers to the next descriptor. */
4908		if (++i == adapter->num_rx_desc)
4909			i = 0;
4910		/*
4911		** Send to the stack or LRO
4912		*/
4913		if (sendmp != NULL) {
4914			rxr->next_to_check = i;
4915			igb_rx_input(rxr, ifp, sendmp, ptype);
4916			i = rxr->next_to_check;
4917			rxdone++;
4918		}
4919
4920		/* Every 8 descriptors we go to refresh mbufs */
4921		if (processed == 8) {
4922                        igb_refresh_mbufs(rxr, i);
4923                        processed = 0;
4924		}
4925	}
4926
4927	/* Catch any remainders */
4928	if (igb_rx_unrefreshed(rxr))
4929		igb_refresh_mbufs(rxr, i);
4930
4931	rxr->next_to_check = i;
4932
4933	/*
4934	 * Flush any outstanding LRO work
4935	 */
4936	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4937		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4938		tcp_lro_flush(lro, queued);
4939	}
4940
4941	if (done != NULL)
4942		*done += rxdone;
4943
4944	IGB_RX_UNLOCK(rxr);
4945	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4946}
4947
4948/*********************************************************************
4949 *
4950 *  Verify that the hardware indicated that the checksum is valid.
4951 *  Inform the stack about the status of checksum so that stack
4952 *  doesn't spend time verifying the checksum.
4953 *
4954 *********************************************************************/
4955static void
4956igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4957{
4958	u16 status = (u16)staterr;
4959	u8  errors = (u8) (staterr >> 24);
4960	int sctp;
4961
4962	/* Ignore Checksum bit is set */
4963	if (status & E1000_RXD_STAT_IXSM) {
4964		mp->m_pkthdr.csum_flags = 0;
4965		return;
4966	}
4967
4968	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4969	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4970		sctp = 1;
4971	else
4972		sctp = 0;
4973	if (status & E1000_RXD_STAT_IPCS) {
4974		/* Did it pass? */
4975		if (!(errors & E1000_RXD_ERR_IPE)) {
4976			/* IP Checksum Good */
4977			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4978			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4979		} else
4980			mp->m_pkthdr.csum_flags = 0;
4981	}
4982
4983	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4984		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4985#if __FreeBSD_version >= 800000
4986		if (sctp) /* reassign */
4987			type = CSUM_SCTP_VALID;
4988#endif
4989		/* Did it pass? */
4990		if (!(errors & E1000_RXD_ERR_TCPE)) {
4991			mp->m_pkthdr.csum_flags |= type;
4992			if (sctp == 0)
4993				mp->m_pkthdr.csum_data = htons(0xffff);
4994		}
4995	}
4996	return;
4997}
4998
4999/*
5000 * This routine is run via an vlan
5001 * config EVENT
5002 */
5003static void
5004igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5005{
5006	struct adapter	*adapter = ifp->if_softc;
5007	u32		index, bit;
5008
5009	if (ifp->if_softc !=  arg)   /* Not our event */
5010		return;
5011
5012	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5013                return;
5014
5015	IGB_CORE_LOCK(adapter);
5016	index = (vtag >> 5) & 0x7F;
5017	bit = vtag & 0x1F;
5018	adapter->shadow_vfta[index] |= (1 << bit);
5019	++adapter->num_vlans;
5020	/* Change hw filter setting */
5021	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5022		igb_setup_vlan_hw_support(adapter);
5023	IGB_CORE_UNLOCK(adapter);
5024}
5025
5026/*
5027 * This routine is run via an vlan
5028 * unconfig EVENT
5029 */
5030static void
5031igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5032{
5033	struct adapter	*adapter = ifp->if_softc;
5034	u32		index, bit;
5035
5036	if (ifp->if_softc !=  arg)
5037		return;
5038
5039	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5040                return;
5041
5042	IGB_CORE_LOCK(adapter);
5043	index = (vtag >> 5) & 0x7F;
5044	bit = vtag & 0x1F;
5045	adapter->shadow_vfta[index] &= ~(1 << bit);
5046	--adapter->num_vlans;
5047	/* Change hw filter setting */
5048	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5049		igb_setup_vlan_hw_support(adapter);
5050	IGB_CORE_UNLOCK(adapter);
5051}
5052
5053static void
5054igb_setup_vlan_hw_support(struct adapter *adapter)
5055{
5056	struct e1000_hw *hw = &adapter->hw;
5057	struct ifnet	*ifp = adapter->ifp;
5058	u32             reg;
5059
5060	if (adapter->vf_ifp) {
5061		e1000_rlpml_set_vf(hw,
5062		    adapter->max_frame_size + VLAN_TAG_SIZE);
5063		return;
5064	}
5065
5066	reg = E1000_READ_REG(hw, E1000_CTRL);
5067	reg |= E1000_CTRL_VME;
5068	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5069
5070	/* Enable the Filter Table */
5071	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5072		reg = E1000_READ_REG(hw, E1000_RCTL);
5073		reg &= ~E1000_RCTL_CFIEN;
5074		reg |= E1000_RCTL_VFE;
5075		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5076	}
5077
5078	/* Update the frame size */
5079	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5080	    adapter->max_frame_size + VLAN_TAG_SIZE);
5081
5082	/* Don't bother with table if no vlans */
5083	if ((adapter->num_vlans == 0) ||
5084	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5085                return;
5086	/*
5087	** A soft reset zero's out the VFTA, so
5088	** we need to repopulate it now.
5089	*/
5090	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5091                if (adapter->shadow_vfta[i] != 0) {
5092			if (adapter->vf_ifp)
5093				e1000_vfta_set_vf(hw,
5094				    adapter->shadow_vfta[i], TRUE);
5095			else
5096				e1000_write_vfta(hw,
5097				    i, adapter->shadow_vfta[i]);
5098		}
5099}
5100
5101static void
5102igb_enable_intr(struct adapter *adapter)
5103{
5104	/* With RSS set up what to auto clear */
5105	if (adapter->msix_mem) {
5106		u32 mask = (adapter->que_mask | adapter->link_mask);
5107		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5108		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5109		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5110		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5111		    E1000_IMS_LSC);
5112	} else {
5113		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5114		    IMS_ENABLE_MASK);
5115	}
5116	E1000_WRITE_FLUSH(&adapter->hw);
5117
5118	return;
5119}
5120
5121static void
5122igb_disable_intr(struct adapter *adapter)
5123{
5124	if (adapter->msix_mem) {
5125		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5126		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5127	}
5128	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5129	E1000_WRITE_FLUSH(&adapter->hw);
5130	return;
5131}
5132
5133/*
5134 * Bit of a misnomer, what this really means is
5135 * to enable OS management of the system... aka
5136 * to disable special hardware management features
5137 */
5138static void
5139igb_init_manageability(struct adapter *adapter)
5140{
5141	if (adapter->has_manage) {
5142		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5143		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5144
5145		/* disable hardware interception of ARP */
5146		manc &= ~(E1000_MANC_ARP_EN);
5147
5148                /* enable receiving management packets to the host */
5149		manc |= E1000_MANC_EN_MNG2HOST;
5150		manc2h |= 1 << 5;  /* Mng Port 623 */
5151		manc2h |= 1 << 6;  /* Mng Port 664 */
5152		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5153		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5154	}
5155}
5156
5157/*
5158 * Give control back to hardware management
5159 * controller if there is one.
5160 */
5161static void
5162igb_release_manageability(struct adapter *adapter)
5163{
5164	if (adapter->has_manage) {
5165		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5166
5167		/* re-enable hardware interception of ARP */
5168		manc |= E1000_MANC_ARP_EN;
5169		manc &= ~E1000_MANC_EN_MNG2HOST;
5170
5171		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5172	}
5173}
5174
5175/*
5176 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5177 * For ASF and Pass Through versions of f/w this means that
5178 * the driver is loaded.
5179 *
5180 */
5181static void
5182igb_get_hw_control(struct adapter *adapter)
5183{
5184	u32 ctrl_ext;
5185
5186	if (adapter->vf_ifp)
5187		return;
5188
5189	/* Let firmware know the driver has taken over */
5190	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5191	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5192	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5193}
5194
5195/*
5196 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5197 * For ASF and Pass Through versions of f/w this means that the
5198 * driver is no longer loaded.
5199 *
5200 */
5201static void
5202igb_release_hw_control(struct adapter *adapter)
5203{
5204	u32 ctrl_ext;
5205
5206	if (adapter->vf_ifp)
5207		return;
5208
5209	/* Let firmware taken over control of h/w */
5210	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5211	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5212	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5213}
5214
5215static int
5216igb_is_valid_ether_addr(uint8_t *addr)
5217{
5218	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5219
5220	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5221		return (FALSE);
5222	}
5223
5224	return (TRUE);
5225}
5226
5227
5228/*
5229 * Enable PCI Wake On Lan capability
5230 */
5231static void
5232igb_enable_wakeup(device_t dev)
5233{
5234	u16     cap, status;
5235	u8      id;
5236
5237	/* First find the capabilities pointer*/
5238	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5239	/* Read the PM Capabilities */
5240	id = pci_read_config(dev, cap, 1);
5241	if (id != PCIY_PMG)     /* Something wrong */
5242		return;
5243	/* OK, we have the power capabilities, so
5244	   now get the status register */
5245	cap += PCIR_POWER_STATUS;
5246	status = pci_read_config(dev, cap, 2);
5247	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5248	pci_write_config(dev, cap, status, 2);
5249	return;
5250}
5251
5252static void
5253igb_led_func(void *arg, int onoff)
5254{
5255	struct adapter	*adapter = arg;
5256
5257	IGB_CORE_LOCK(adapter);
5258	if (onoff) {
5259		e1000_setup_led(&adapter->hw);
5260		e1000_led_on(&adapter->hw);
5261	} else {
5262		e1000_led_off(&adapter->hw);
5263		e1000_cleanup_led(&adapter->hw);
5264	}
5265	IGB_CORE_UNLOCK(adapter);
5266}
5267
5268/**********************************************************************
5269 *
5270 *  Update the board statistics counters.
5271 *
5272 **********************************************************************/
5273static void
5274igb_update_stats_counters(struct adapter *adapter)
5275{
5276	struct ifnet		*ifp;
5277        struct e1000_hw		*hw = &adapter->hw;
5278	struct e1000_hw_stats	*stats;
5279
5280	/*
5281	** The virtual function adapter has only a
5282	** small controlled set of stats, do only
5283	** those and return.
5284	*/
5285	if (adapter->vf_ifp) {
5286		igb_update_vf_stats_counters(adapter);
5287		return;
5288	}
5289
5290	stats = (struct e1000_hw_stats	*)adapter->stats;
5291
5292	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5293	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5294		stats->symerrs +=
5295		    E1000_READ_REG(hw,E1000_SYMERRS);
5296		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5297	}
5298
5299	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5300	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5301	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5302	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5303
5304	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5305	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5306	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5307	stats->dc += E1000_READ_REG(hw, E1000_DC);
5308	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5309	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5310	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5311	/*
5312	** For watchdog management we need to know if we have been
5313	** paused during the last interval, so capture that here.
5314	*/
5315        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5316        stats->xoffrxc += adapter->pause_frames;
5317	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5318	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5319	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5320	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5321	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5322	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5323	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5324	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5325	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5326	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5327	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5328	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5329
5330	/* For the 64-bit byte counters the low dword must be read first. */
5331	/* Both registers clear on the read of the high dword */
5332
5333	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5334	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5335	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5336	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5337
5338	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5339	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5340	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5341	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5342	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5343
5344	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5345	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5346
5347	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5348	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5349	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5350	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5351	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5352	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5353	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5354	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5355	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5356	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5357
5358	/* Interrupt Counts */
5359
5360	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5361	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5362	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5363	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5364	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5365	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5366	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5367	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5368	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5369
5370	/* Host to Card Statistics */
5371
5372	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5373	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5374	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5375	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5376	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5377	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5378	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5379	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5380	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5381	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5382	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5383	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5384	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5385	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5386
5387	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5388	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5389	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5390	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5391	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5392	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5393
5394	ifp = adapter->ifp;
5395	ifp->if_collisions = stats->colc;
5396
5397	/* Rx Errors */
5398	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5399	    stats->crcerrs + stats->algnerrc +
5400	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5401
5402	/* Tx Errors */
5403	ifp->if_oerrors = stats->ecol +
5404	    stats->latecol + adapter->watchdog_events;
5405
5406	/* Driver specific counters */
5407	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5408	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5409	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5410	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5411	adapter->packet_buf_alloc_tx =
5412	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5413	adapter->packet_buf_alloc_rx =
5414	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5415}
5416
5417
5418/**********************************************************************
5419 *
5420 *  Initialize the VF board statistics counters.
5421 *
5422 **********************************************************************/
5423static void
5424igb_vf_init_stats(struct adapter *adapter)
5425{
5426        struct e1000_hw *hw = &adapter->hw;
5427	struct e1000_vf_stats	*stats;
5428
5429	stats = (struct e1000_vf_stats	*)adapter->stats;
5430	if (stats == NULL)
5431		return;
5432        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5433        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5434        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5435        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5436        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5437}
5438
5439/**********************************************************************
5440 *
5441 *  Update the VF board statistics counters.
5442 *
5443 **********************************************************************/
5444static void
5445igb_update_vf_stats_counters(struct adapter *adapter)
5446{
5447	struct e1000_hw *hw = &adapter->hw;
5448	struct e1000_vf_stats	*stats;
5449
5450	if (adapter->link_speed == 0)
5451		return;
5452
5453	stats = (struct e1000_vf_stats	*)adapter->stats;
5454
5455	UPDATE_VF_REG(E1000_VFGPRC,
5456	    stats->last_gprc, stats->gprc);
5457	UPDATE_VF_REG(E1000_VFGORC,
5458	    stats->last_gorc, stats->gorc);
5459	UPDATE_VF_REG(E1000_VFGPTC,
5460	    stats->last_gptc, stats->gptc);
5461	UPDATE_VF_REG(E1000_VFGOTC,
5462	    stats->last_gotc, stats->gotc);
5463	UPDATE_VF_REG(E1000_VFMPRC,
5464	    stats->last_mprc, stats->mprc);
5465}
5466
5467/* Export a single 32-bit register via a read-only sysctl. */
5468static int
5469igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5470{
5471	struct adapter *adapter;
5472	u_int val;
5473
5474	adapter = oidp->oid_arg1;
5475	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5476	return (sysctl_handle_int(oidp, &val, 0, req));
5477}
5478
5479/*
5480**  Tuneable interrupt rate handler
5481*/
5482static int
5483igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5484{
5485	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5486	int			error;
5487	u32			reg, usec, rate;
5488
5489	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5490	usec = ((reg & 0x7FFC) >> 2);
5491	if (usec > 0)
5492		rate = 1000000 / usec;
5493	else
5494		rate = 0;
5495	error = sysctl_handle_int(oidp, &rate, 0, req);
5496	if (error || !req->newptr)
5497		return error;
5498	return 0;
5499}
5500
5501/*
5502 * Add sysctl variables, one per statistic, to the system.
5503 */
5504static void
5505igb_add_hw_stats(struct adapter *adapter)
5506{
5507	device_t dev = adapter->dev;
5508
5509	struct tx_ring *txr = adapter->tx_rings;
5510	struct rx_ring *rxr = adapter->rx_rings;
5511
5512	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5513	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5514	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5515	struct e1000_hw_stats *stats = adapter->stats;
5516
5517	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5518	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5519
5520#define QUEUE_NAME_LEN 32
5521	char namebuf[QUEUE_NAME_LEN];
5522
5523	/* Driver Statistics */
5524	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5525			CTLFLAG_RD, &adapter->link_irq, 0,
5526			"Link MSIX IRQ Handled");
5527	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5528			CTLFLAG_RD, &adapter->dropped_pkts,
5529			"Driver dropped packets");
5530	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5531			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5532			"Driver tx dma failure in xmit");
5533	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5534			CTLFLAG_RD, &adapter->rx_overruns,
5535			"RX overruns");
5536	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5537			CTLFLAG_RD, &adapter->watchdog_events,
5538			"Watchdog timeouts");
5539
5540	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5541			CTLFLAG_RD, &adapter->device_control,
5542			"Device Control Register");
5543	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5544			CTLFLAG_RD, &adapter->rx_control,
5545			"Receiver Control Register");
5546	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5547			CTLFLAG_RD, &adapter->int_mask,
5548			"Interrupt Mask");
5549	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5550			CTLFLAG_RD, &adapter->eint_mask,
5551			"Extended Interrupt Mask");
5552	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5553			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5554			"Transmit Buffer Packet Allocation");
5555	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5556			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5557			"Receive Buffer Packet Allocation");
5558	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5559			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5560			"Flow Control High Watermark");
5561	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5562			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5563			"Flow Control Low Watermark");
5564
5565	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5566		struct lro_ctrl *lro = &rxr->lro;
5567
5568		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5569		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5570					    CTLFLAG_RD, NULL, "Queue Name");
5571		queue_list = SYSCTL_CHILDREN(queue_node);
5572
5573		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5574				CTLFLAG_RD, &adapter->queues[i],
5575				sizeof(&adapter->queues[i]),
5576				igb_sysctl_interrupt_rate_handler,
5577				"IU", "Interrupt Rate");
5578
5579		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5580				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5581				igb_sysctl_reg_handler, "IU",
5582 				"Transmit Descriptor Head");
5583		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5584				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5585				igb_sysctl_reg_handler, "IU",
5586 				"Transmit Descriptor Tail");
5587		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5588				CTLFLAG_RD, &txr->no_desc_avail,
5589				"Queue No Descriptor Available");
5590		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5591				CTLFLAG_RD, &txr->tx_packets,
5592				"Queue Packets Transmitted");
5593
5594		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5595				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5596				igb_sysctl_reg_handler, "IU",
5597				"Receive Descriptor Head");
5598		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5599				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5600				igb_sysctl_reg_handler, "IU",
5601				"Receive Descriptor Tail");
5602		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5603				CTLFLAG_RD, &rxr->rx_packets,
5604				"Queue Packets Received");
5605		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5606				CTLFLAG_RD, &rxr->rx_bytes,
5607				"Queue Bytes Received");
5608		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5609				CTLFLAG_RD, &lro->lro_queued, 0,
5610				"LRO Queued");
5611		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5612				CTLFLAG_RD, &lro->lro_flushed, 0,
5613				"LRO Flushed");
5614	}
5615
5616	/* MAC stats get their own sub node */
5617
5618	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5619				    CTLFLAG_RD, NULL, "MAC Statistics");
5620	stat_list = SYSCTL_CHILDREN(stat_node);
5621
5622	/*
5623	** VF adapter has a very limited set of stats
5624	** since its not managing the metal, so to speak.
5625	*/
5626	if (adapter->vf_ifp) {
5627	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5628			CTLFLAG_RD, &stats->gprc,
5629			"Good Packets Received");
5630	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5631			CTLFLAG_RD, &stats->gptc,
5632			"Good Packets Transmitted");
5633 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5634 			CTLFLAG_RD, &stats->gorc,
5635 			"Good Octets Received");
5636 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5637 			CTLFLAG_RD, &stats->gotc,
5638 			"Good Octets Transmitted");
5639	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5640			CTLFLAG_RD, &stats->mprc,
5641			"Multicast Packets Received");
5642		return;
5643	}
5644
5645	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5646			CTLFLAG_RD, &stats->ecol,
5647			"Excessive collisions");
5648	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5649			CTLFLAG_RD, &stats->scc,
5650			"Single collisions");
5651	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5652			CTLFLAG_RD, &stats->mcc,
5653			"Multiple collisions");
5654	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5655			CTLFLAG_RD, &stats->latecol,
5656			"Late collisions");
5657	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5658			CTLFLAG_RD, &stats->colc,
5659			"Collision Count");
5660	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5661			CTLFLAG_RD, &stats->symerrs,
5662			"Symbol Errors");
5663	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5664			CTLFLAG_RD, &stats->sec,
5665			"Sequence Errors");
5666	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5667			CTLFLAG_RD, &stats->dc,
5668			"Defer Count");
5669	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5670			CTLFLAG_RD, &stats->mpc,
5671			"Missed Packets");
5672	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5673			CTLFLAG_RD, &stats->rnbc,
5674			"Receive No Buffers");
5675	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5676			CTLFLAG_RD, &stats->ruc,
5677			"Receive Undersize");
5678	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5679			CTLFLAG_RD, &stats->rfc,
5680			"Fragmented Packets Received ");
5681	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5682			CTLFLAG_RD, &stats->roc,
5683			"Oversized Packets Received");
5684	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5685			CTLFLAG_RD, &stats->rjc,
5686			"Recevied Jabber");
5687	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5688			CTLFLAG_RD, &stats->rxerrc,
5689			"Receive Errors");
5690	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5691			CTLFLAG_RD, &stats->crcerrs,
5692			"CRC errors");
5693	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5694			CTLFLAG_RD, &stats->algnerrc,
5695			"Alignment Errors");
5696	/* On 82575 these are collision counts */
5697	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5698			CTLFLAG_RD, &stats->cexterr,
5699			"Collision/Carrier extension errors");
5700	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5701			CTLFLAG_RD, &stats->xonrxc,
5702			"XON Received");
5703	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5704			CTLFLAG_RD, &stats->xontxc,
5705			"XON Transmitted");
5706	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5707			CTLFLAG_RD, &stats->xoffrxc,
5708			"XOFF Received");
5709	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5710			CTLFLAG_RD, &stats->xofftxc,
5711			"XOFF Transmitted");
5712	/* Packet Reception Stats */
5713	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5714			CTLFLAG_RD, &stats->tpr,
5715			"Total Packets Received ");
5716	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5717			CTLFLAG_RD, &stats->gprc,
5718			"Good Packets Received");
5719	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5720			CTLFLAG_RD, &stats->bprc,
5721			"Broadcast Packets Received");
5722	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5723			CTLFLAG_RD, &stats->mprc,
5724			"Multicast Packets Received");
5725	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5726			CTLFLAG_RD, &stats->prc64,
5727			"64 byte frames received ");
5728	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5729			CTLFLAG_RD, &stats->prc127,
5730			"65-127 byte frames received");
5731	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5732			CTLFLAG_RD, &stats->prc255,
5733			"128-255 byte frames received");
5734	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5735			CTLFLAG_RD, &stats->prc511,
5736			"256-511 byte frames received");
5737	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5738			CTLFLAG_RD, &stats->prc1023,
5739			"512-1023 byte frames received");
5740	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5741			CTLFLAG_RD, &stats->prc1522,
5742			"1023-1522 byte frames received");
5743 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5744 			CTLFLAG_RD, &stats->gorc,
5745 			"Good Octets Received");
5746
5747	/* Packet Transmission Stats */
5748 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5749 			CTLFLAG_RD, &stats->gotc,
5750 			"Good Octets Transmitted");
5751	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5752			CTLFLAG_RD, &stats->tpt,
5753			"Total Packets Transmitted");
5754	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5755			CTLFLAG_RD, &stats->gptc,
5756			"Good Packets Transmitted");
5757	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5758			CTLFLAG_RD, &stats->bptc,
5759			"Broadcast Packets Transmitted");
5760	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5761			CTLFLAG_RD, &stats->mptc,
5762			"Multicast Packets Transmitted");
5763	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5764			CTLFLAG_RD, &stats->ptc64,
5765			"64 byte frames transmitted ");
5766	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5767			CTLFLAG_RD, &stats->ptc127,
5768			"65-127 byte frames transmitted");
5769	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5770			CTLFLAG_RD, &stats->ptc255,
5771			"128-255 byte frames transmitted");
5772	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5773			CTLFLAG_RD, &stats->ptc511,
5774			"256-511 byte frames transmitted");
5775	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5776			CTLFLAG_RD, &stats->ptc1023,
5777			"512-1023 byte frames transmitted");
5778	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5779			CTLFLAG_RD, &stats->ptc1522,
5780			"1024-1522 byte frames transmitted");
5781	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5782			CTLFLAG_RD, &stats->tsctc,
5783			"TSO Contexts Transmitted");
5784	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5785			CTLFLAG_RD, &stats->tsctfc,
5786			"TSO Contexts Failed");
5787
5788
5789	/* Interrupt Stats */
5790
5791	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5792				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5793	int_list = SYSCTL_CHILDREN(int_node);
5794
5795	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5796			CTLFLAG_RD, &stats->iac,
5797			"Interrupt Assertion Count");
5798
5799	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5800			CTLFLAG_RD, &stats->icrxptc,
5801			"Interrupt Cause Rx Pkt Timer Expire Count");
5802
5803	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5804			CTLFLAG_RD, &stats->icrxatc,
5805			"Interrupt Cause Rx Abs Timer Expire Count");
5806
5807	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5808			CTLFLAG_RD, &stats->ictxptc,
5809			"Interrupt Cause Tx Pkt Timer Expire Count");
5810
5811	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5812			CTLFLAG_RD, &stats->ictxatc,
5813			"Interrupt Cause Tx Abs Timer Expire Count");
5814
5815	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5816			CTLFLAG_RD, &stats->ictxqec,
5817			"Interrupt Cause Tx Queue Empty Count");
5818
5819	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5820			CTLFLAG_RD, &stats->ictxqmtc,
5821			"Interrupt Cause Tx Queue Min Thresh Count");
5822
5823	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5824			CTLFLAG_RD, &stats->icrxdmtc,
5825			"Interrupt Cause Rx Desc Min Thresh Count");
5826
5827	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5828			CTLFLAG_RD, &stats->icrxoc,
5829			"Interrupt Cause Receiver Overrun Count");
5830
5831	/* Host to Card Stats */
5832
5833	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5834				    CTLFLAG_RD, NULL,
5835				    "Host to Card Statistics");
5836
5837	host_list = SYSCTL_CHILDREN(host_node);
5838
5839	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5840			CTLFLAG_RD, &stats->cbtmpc,
5841			"Circuit Breaker Tx Packet Count");
5842
5843	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5844			CTLFLAG_RD, &stats->htdpmc,
5845			"Host Transmit Discarded Packets");
5846
5847	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5848			CTLFLAG_RD, &stats->rpthc,
5849			"Rx Packets To Host");
5850
5851	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5852			CTLFLAG_RD, &stats->cbrmpc,
5853			"Circuit Breaker Rx Packet Count");
5854
5855	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5856			CTLFLAG_RD, &stats->cbrdpc,
5857			"Circuit Breaker Rx Dropped Count");
5858
5859	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5860			CTLFLAG_RD, &stats->hgptc,
5861			"Host Good Packets Tx Count");
5862
5863	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5864			CTLFLAG_RD, &stats->htcbdpc,
5865			"Host Tx Circuit Breaker Dropped Count");
5866
5867	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5868			CTLFLAG_RD, &stats->hgorc,
5869			"Host Good Octets Received Count");
5870
5871	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5872			CTLFLAG_RD, &stats->hgotc,
5873			"Host Good Octets Transmit Count");
5874
5875	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5876			CTLFLAG_RD, &stats->lenerrs,
5877			"Length Errors");
5878
5879	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5880			CTLFLAG_RD, &stats->scvpc,
5881			"SerDes/SGMII Code Violation Pkt Count");
5882
5883	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5884			CTLFLAG_RD, &stats->hrmpc,
5885			"Header Redirection Missed Packet Count");
5886}
5887
5888
5889/**********************************************************************
5890 *
5891 *  This routine provides a way to dump out the adapter eeprom,
5892 *  often a useful debug/service tool. This only dumps the first
5893 *  32 words, stuff that matters is in that extent.
5894 *
5895 **********************************************************************/
5896static int
5897igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5898{
5899	struct adapter *adapter;
5900	int error;
5901	int result;
5902
5903	result = -1;
5904	error = sysctl_handle_int(oidp, &result, 0, req);
5905
5906	if (error || !req->newptr)
5907		return (error);
5908
5909	/*
5910	 * This value will cause a hex dump of the
5911	 * first 32 16-bit words of the EEPROM to
5912	 * the screen.
5913	 */
5914	if (result == 1) {
5915		adapter = (struct adapter *)arg1;
5916		igb_print_nvm_info(adapter);
5917        }
5918
5919	return (error);
5920}
5921
5922static void
5923igb_print_nvm_info(struct adapter *adapter)
5924{
5925	u16	eeprom_data;
5926	int	i, j, row = 0;
5927
5928	/* Its a bit crude, but it gets the job done */
5929	printf("\nInterface EEPROM Dump:\n");
5930	printf("Offset\n0x0000  ");
5931	for (i = 0, j = 0; i < 32; i++, j++) {
5932		if (j == 8) { /* Make the offset block */
5933			j = 0; ++row;
5934			printf("\n0x00%x0  ",row);
5935		}
5936		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5937		printf("%04x ", eeprom_data);
5938	}
5939	printf("\n");
5940}
5941
5942static void
5943igb_set_sysctl_value(struct adapter *adapter, const char *name,
5944	const char *description, int *limit, int value)
5945{
5946	*limit = value;
5947	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5948	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5949	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5950}
5951
5952/*
5953** Set flow control using sysctl:
5954** Flow control values:
5955** 	0 - off
5956**	1 - rx pause
5957**	2 - tx pause
5958**	3 - full
5959*/
5960static int
5961igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5962{
5963	int		error;
5964	static int	input = 3; /* default is full */
5965	struct adapter	*adapter = (struct adapter *) arg1;
5966
5967	error = sysctl_handle_int(oidp, &input, 0, req);
5968
5969	if ((error) || (req->newptr == NULL))
5970		return (error);
5971
5972	switch (input) {
5973		case e1000_fc_rx_pause:
5974		case e1000_fc_tx_pause:
5975		case e1000_fc_full:
5976		case e1000_fc_none:
5977			adapter->hw.fc.requested_mode = input;
5978			adapter->fc = input;
5979			break;
5980		default:
5981			/* Do nothing */
5982			return (error);
5983	}
5984
5985	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5986	e1000_force_mac_fc(&adapter->hw);
5987	return (error);
5988}
5989
5990/*
5991** Manage DMA Coalesce:
5992** Control values:
5993** 	0/1 - off/on
5994**	Legal timer values are:
5995**	250,500,1000-10000 in thousands
5996*/
5997static int
5998igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5999{
6000	struct adapter *adapter = (struct adapter *) arg1;
6001	int		error;
6002
6003	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6004
6005	if ((error) || (req->newptr == NULL))
6006		return (error);
6007
6008	switch (adapter->dmac) {
6009		case 0:
6010			/*Disabling */
6011			break;
6012		case 1: /* Just enable and use default */
6013			adapter->dmac = 1000;
6014			break;
6015		case 250:
6016		case 500:
6017		case 1000:
6018		case 2000:
6019		case 3000:
6020		case 4000:
6021		case 5000:
6022		case 6000:
6023		case 7000:
6024		case 8000:
6025		case 9000:
6026		case 10000:
6027			/* Legal values - allow */
6028			break;
6029		default:
6030			/* Do nothing, illegal value */
6031			adapter->dmac = 0;
6032			return (error);
6033	}
6034	/* Reinit the interface */
6035	igb_init(adapter);
6036	return (error);
6037}
6038
6039/*
6040** Manage Energy Efficient Ethernet:
6041** Control values:
6042**     0/1 - enabled/disabled
6043*/
6044static int
6045igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6046{
6047	struct adapter	*adapter = (struct adapter *) arg1;
6048	int		error, value;
6049
6050	value = adapter->hw.dev_spec._82575.eee_disable;
6051	error = sysctl_handle_int(oidp, &value, 0, req);
6052	if (error || req->newptr == NULL)
6053		return (error);
6054	IGB_CORE_LOCK(adapter);
6055	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6056	igb_init_locked(adapter);
6057	IGB_CORE_UNLOCK(adapter);
6058	return (0);
6059}
6060