1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD$*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#ifdef HAVE_KERNEL_OPTION_HEADERS
40#include "opt_device_polling.h"
41#include "opt_altq.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifndef IGB_LEGACY_TX
47#include <sys/buf_ring.h>
48#endif
49#include <sys/bus.h>
50#include <sys/endian.h>
51#include <sys/kernel.h>
52#include <sys/kthread.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/rman.h>
57#include <sys/socket.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/taskqueue.h>
61#include <sys/eventhandler.h>
62#include <sys/pcpu.h>
63#include <sys/smp.h>
64#include <machine/smp.h>
65#include <machine/bus.h>
66#include <machine/resource.h>
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_dl.h>
73#include <net/if_media.h>
74
75#include <net/if_types.h>
76#include <net/if_vlan_var.h>
77
78#include <netinet/in_systm.h>
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#include <netinet/ip.h>
82#include <netinet/ip6.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_lro.h>
85#include <netinet/udp.h>
86
87#include <machine/in_cksum.h>
88#include <dev/led/led.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 2.3.10";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134						PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148						PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	/* required last entry */
163	{ 0, 0, 0, 0, 0}
164};
165
166/*********************************************************************
167 *  Table of branding strings for all supported NICs.
168 *********************************************************************/
169
170static char *igb_strings[] = {
171	"Intel(R) PRO/1000 Network Connection"
172};
173
174/*********************************************************************
175 *  Function prototypes
176 *********************************************************************/
177static int	igb_probe(device_t);
178static int	igb_attach(device_t);
179static int	igb_detach(device_t);
180static int	igb_shutdown(device_t);
181static int	igb_suspend(device_t);
182static int	igb_resume(device_t);
183#ifndef IGB_LEGACY_TX
184static int	igb_mq_start(struct ifnet *, struct mbuf *);
185static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282#ifdef DEVICE_POLLING
283static poll_handler_t igb_poll;
284#endif /* POLLING */
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t igb_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, igb_probe),
293	DEVMETHOD(device_attach, igb_attach),
294	DEVMETHOD(device_detach, igb_detach),
295	DEVMETHOD(device_shutdown, igb_shutdown),
296	DEVMETHOD(device_suspend, igb_suspend),
297	DEVMETHOD(device_resume, igb_resume),
298	DEVMETHOD_END
299};
300
301static driver_t igb_driver = {
302	"igb", igb_methods, sizeof(struct adapter),
303};
304
305static devclass_t igb_devclass;
306DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307MODULE_DEPEND(igb, pci, 1, 1, 1);
308MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316/* Descriptor defaults */
317static int igb_rxd = IGB_DEFAULT_RXD;
318static int igb_txd = IGB_DEFAULT_TXD;
319TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320TUNABLE_INT("hw.igb.txd", &igb_txd);
321SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322    "Number of receive descriptors per queue");
323SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324    "Number of transmit descriptors per queue");
325
326/*
327** AIM: Adaptive Interrupt Moderation
328** which means that the interrupt rate
329** is varied over time based on the
330** traffic for that interrupt vector
331*/
332static int igb_enable_aim = TRUE;
333TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335    "Enable adaptive interrupt moderation");
336
337/*
338 * MSIX should be the default for best performance,
339 * but this allows it to be forced off for testing.
340 */
341static int igb_enable_msix = 1;
342TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344    "Enable MSI-X interrupts");
345
346/*
347** Tuneable Interrupt rate
348*/
349static int igb_max_interrupt_rate = 8000;
350TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354#if __FreeBSD_version >= 800000
355/*
356** Tuneable number of buffers in the buf-ring (drbr_xxx)
357*/
358static int igb_buf_ring_size = IGB_BR_SIZE;
359TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
360SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
361    &igb_buf_ring_size, 0, "Size of the bufring");
362#endif
363
364/*
365** Header split causes the packet header to
366** be dma'd to a seperate mbuf from the payload.
367** this can have memory alignment benefits. But
368** another plus is that small packets often fit
369** into the header and thus use no cluster. Its
370** a very workload dependent type feature.
371*/
372static int igb_header_split = FALSE;
373TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
374SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
375    "Enable receive mbuf header split");
376
377/*
378** This will autoconfigure based on the
379** number of CPUs and max supported
380** MSIX messages if left at 0.
381*/
382static int igb_num_queues = 0;
383TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
384SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
385    "Number of queues to configure, 0 indicates autoconfigure");
386
387/*
388** Global variable to store last used CPU when binding queues
389** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
390** queue is bound to a cpu.
391*/
392static int igb_last_bind_cpu = -1;
393
394/* How many packets rxeof tries to clean at a time */
395static int igb_rx_process_limit = 100;
396TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
397SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398    &igb_rx_process_limit, 0,
399    "Maximum number of received packets to process at a time, -1 means unlimited");
400
401#ifdef DEV_NETMAP	/* see ixgbe.c for details */
402#include <dev/netmap/if_igb_netmap.h>
403#endif /* DEV_NETMAP */
404/*********************************************************************
405 *  Device identification routine
406 *
407 *  igb_probe determines if the driver should be loaded on
408 *  adapter based on PCI vendor/device id of the adapter.
409 *
410 *  return BUS_PROBE_DEFAULT on success, positive on failure
411 *********************************************************************/
412
413static int
414igb_probe(device_t dev)
415{
416	char		adapter_name[60];
417	uint16_t	pci_vendor_id = 0;
418	uint16_t	pci_device_id = 0;
419	uint16_t	pci_subvendor_id = 0;
420	uint16_t	pci_subdevice_id = 0;
421	igb_vendor_info_t *ent;
422
423	INIT_DEBUGOUT("igb_probe: begin");
424
425	pci_vendor_id = pci_get_vendor(dev);
426	if (pci_vendor_id != IGB_VENDOR_ID)
427		return (ENXIO);
428
429	pci_device_id = pci_get_device(dev);
430	pci_subvendor_id = pci_get_subvendor(dev);
431	pci_subdevice_id = pci_get_subdevice(dev);
432
433	ent = igb_vendor_info_array;
434	while (ent->vendor_id != 0) {
435		if ((pci_vendor_id == ent->vendor_id) &&
436		    (pci_device_id == ent->device_id) &&
437
438		    ((pci_subvendor_id == ent->subvendor_id) ||
439		    (ent->subvendor_id == PCI_ANY_ID)) &&
440
441		    ((pci_subdevice_id == ent->subdevice_id) ||
442		    (ent->subdevice_id == PCI_ANY_ID))) {
443			sprintf(adapter_name, "%s %s",
444				igb_strings[ent->index],
445				igb_driver_version);
446			device_set_desc_copy(dev, adapter_name);
447			return (BUS_PROBE_DEFAULT);
448		}
449		ent++;
450	}
451
452	return (ENXIO);
453}
454
455/*********************************************************************
456 *  Device initialization routine
457 *
458 *  The attach entry point is called when the driver is being loaded.
459 *  This routine identifies the type of hardware, allocates all resources
460 *  and initializes the hardware.
461 *
462 *  return 0 on success, positive on failure
463 *********************************************************************/
464
465static int
466igb_attach(device_t dev)
467{
468	struct adapter	*adapter;
469	int		error = 0;
470	u16		eeprom_data;
471
472	INIT_DEBUGOUT("igb_attach: begin");
473
474	if (resource_disabled("igb", device_get_unit(dev))) {
475		device_printf(dev, "Disabled by device hint\n");
476		return (ENXIO);
477	}
478
479	adapter = device_get_softc(dev);
480	adapter->dev = adapter->osdep.dev = dev;
481	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483	/* SYSCTL stuff */
484	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487	    igb_sysctl_nvm_info, "I", "NVM Information");
488
489	igb_set_sysctl_value(adapter, "enable_aim",
490	    "Interrupt Moderation", &adapter->enable_aim,
491	    igb_enable_aim);
492
493	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
494	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
495	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
496	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
497
498	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
499
500	/* Determine hardware and mac info */
501	igb_identify_hardware(adapter);
502
503	/* Setup PCI resources */
504	if (igb_allocate_pci_resources(adapter)) {
505		device_printf(dev, "Allocation of PCI resources failed\n");
506		error = ENXIO;
507		goto err_pci;
508	}
509
510	/* Do Shared Code initialization */
511	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
512		device_printf(dev, "Setup of Shared code failed\n");
513		error = ENXIO;
514		goto err_pci;
515	}
516
517	e1000_get_bus_info(&adapter->hw);
518
519	/* Sysctl for limiting the amount of work done in the taskqueue */
520	igb_set_sysctl_value(adapter, "rx_processing_limit",
521	    "max number of rx packets to process",
522	    &adapter->rx_process_limit, igb_rx_process_limit);
523
524	/*
525	 * Validate number of transmit and receive descriptors. It
526	 * must not exceed hardware maximum, and must be multiple
527	 * of E1000_DBA_ALIGN.
528	 */
529	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
530	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
531		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
532		    IGB_DEFAULT_TXD, igb_txd);
533		adapter->num_tx_desc = IGB_DEFAULT_TXD;
534	} else
535		adapter->num_tx_desc = igb_txd;
536	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
537	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
538		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
539		    IGB_DEFAULT_RXD, igb_rxd);
540		adapter->num_rx_desc = IGB_DEFAULT_RXD;
541	} else
542		adapter->num_rx_desc = igb_rxd;
543
544	adapter->hw.mac.autoneg = DO_AUTO_NEG;
545	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
546	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
547
548	/* Copper options */
549	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
550		adapter->hw.phy.mdix = AUTO_ALL_MODES;
551		adapter->hw.phy.disable_polarity_correction = FALSE;
552		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
553	}
554
555	/*
556	 * Set the frame limits assuming
557	 * standard ethernet sized frames.
558	 */
559	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
561
562	/*
563	** Allocate and Setup Queues
564	*/
565	if (igb_allocate_queues(adapter)) {
566		error = ENOMEM;
567		goto err_pci;
568	}
569
570	/* Allocate the appropriate stats memory */
571	if (adapter->vf_ifp) {
572		adapter->stats =
573		    (struct e1000_vf_stats *)malloc(sizeof \
574		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
575		igb_vf_init_stats(adapter);
576	} else
577		adapter->stats =
578		    (struct e1000_hw_stats *)malloc(sizeof \
579		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
580	if (adapter->stats == NULL) {
581		device_printf(dev, "Can not allocate stats memory\n");
582		error = ENOMEM;
583		goto err_late;
584	}
585
586	/* Allocate multicast array memory. */
587	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
588	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
589	if (adapter->mta == NULL) {
590		device_printf(dev, "Can not allocate multicast setup array\n");
591		error = ENOMEM;
592		goto err_late;
593	}
594
595	/* Some adapter-specific advanced features */
596	if (adapter->hw.mac.type >= e1000_i350) {
597		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
598		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
599		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
600		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
601		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
602		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
603		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
604		    adapter, 0, igb_sysctl_eee, "I",
605		    "Disable Energy Efficient Ethernet");
606		if (adapter->hw.phy.media_type == e1000_media_type_copper)
607			e1000_set_eee_i350(&adapter->hw);
608	}
609
610	/*
611	** Start from a known state, this is
612	** important in reading the nvm and
613	** mac from that.
614	*/
615	e1000_reset_hw(&adapter->hw);
616
617	/* Make sure we have a good EEPROM before we read from it */
618	if (((adapter->hw.mac.type != e1000_i210) &&
619	    (adapter->hw.mac.type != e1000_i211)) &&
620	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
621		/*
622		** Some PCI-E parts fail the first check due to
623		** the link being in sleep state, call it again,
624		** if it fails a second time its a real issue.
625		*/
626		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
627			device_printf(dev,
628			    "The EEPROM Checksum Is Not Valid\n");
629			error = EIO;
630			goto err_late;
631		}
632	}
633
634	/*
635	** Copy the permanent MAC address out of the EEPROM
636	*/
637	if (e1000_read_mac_addr(&adapter->hw) < 0) {
638		device_printf(dev, "EEPROM read error while reading MAC"
639		    " address\n");
640		error = EIO;
641		goto err_late;
642	}
643	/* Check its sanity */
644	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
645		device_printf(dev, "Invalid MAC address\n");
646		error = EIO;
647		goto err_late;
648	}
649
650	/* Setup OS specific network interface */
651	if (igb_setup_interface(dev, adapter) != 0)
652		goto err_late;
653
654	/* Now get a good starting state */
655	igb_reset(adapter);
656
657	/* Initialize statistics */
658	igb_update_stats_counters(adapter);
659
660	adapter->hw.mac.get_link_status = 1;
661	igb_update_link_status(adapter);
662
663	/* Indicate SOL/IDER usage */
664	if (e1000_check_reset_block(&adapter->hw))
665		device_printf(dev,
666		    "PHY reset is blocked due to SOL/IDER session.\n");
667
668	/* Determine if we have to control management hardware */
669	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
670
671	/*
672	 * Setup Wake-on-Lan
673	 */
674	/* APME bit in EEPROM is mapped to WUC.APME */
675	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
676	if (eeprom_data)
677		adapter->wol = E1000_WUFC_MAG;
678
679	/* Register for VLAN events */
680	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
681	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
683	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
684
685	igb_add_hw_stats(adapter);
686
687	/* Tell the stack that the interface is not active */
688	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
689	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
690
691	adapter->led_dev = led_create(igb_led_func, adapter,
692	    device_get_nameunit(dev));
693
694	/*
695	** Configure Interrupts
696	*/
697	if ((adapter->msix > 1) && (igb_enable_msix))
698		error = igb_allocate_msix(adapter);
699	else /* MSI or Legacy */
700		error = igb_allocate_legacy(adapter);
701	if (error)
702		goto err_late;
703
704#ifdef DEV_NETMAP
705	igb_netmap_attach(adapter);
706#endif /* DEV_NETMAP */
707	INIT_DEBUGOUT("igb_attach: end");
708
709	return (0);
710
711err_late:
712	igb_detach(dev);
713	igb_free_transmit_structures(adapter);
714	igb_free_receive_structures(adapter);
715	igb_release_hw_control(adapter);
716err_pci:
717	igb_free_pci_resources(adapter);
718	if (adapter->ifp != NULL)
719		if_free(adapter->ifp);
720	free(adapter->mta, M_DEVBUF);
721	IGB_CORE_LOCK_DESTROY(adapter);
722
723	return (error);
724}
725
726/*********************************************************************
727 *  Device removal routine
728 *
729 *  The detach entry point is called when the driver is being removed.
730 *  This routine stops the adapter and deallocates all the resources
731 *  that were allocated for driver operation.
732 *
733 *  return 0 on success, positive on failure
734 *********************************************************************/
735
736static int
737igb_detach(device_t dev)
738{
739	struct adapter	*adapter = device_get_softc(dev);
740	struct ifnet	*ifp = adapter->ifp;
741
742	INIT_DEBUGOUT("igb_detach: begin");
743
744	/* Make sure VLANS are not using driver */
745	if (adapter->ifp->if_vlantrunk != NULL) {
746		device_printf(dev,"Vlan in use, detach first\n");
747		return (EBUSY);
748	}
749
750	ether_ifdetach(adapter->ifp);
751
752	if (adapter->led_dev != NULL)
753		led_destroy(adapter->led_dev);
754
755#ifdef DEVICE_POLLING
756	if (ifp->if_capenable & IFCAP_POLLING)
757		ether_poll_deregister(ifp);
758#endif
759
760	IGB_CORE_LOCK(adapter);
761	adapter->in_detach = 1;
762	igb_stop(adapter);
763	IGB_CORE_UNLOCK(adapter);
764
765	e1000_phy_hw_reset(&adapter->hw);
766
767	/* Give control back to firmware */
768	igb_release_manageability(adapter);
769	igb_release_hw_control(adapter);
770
771	if (adapter->wol) {
772		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774		igb_enable_wakeup(dev);
775	}
776
777	/* Unregister VLAN events */
778	if (adapter->vlan_attach != NULL)
779		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
780	if (adapter->vlan_detach != NULL)
781		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
782
783	callout_drain(&adapter->timer);
784
785#ifdef DEV_NETMAP
786	netmap_detach(adapter->ifp);
787#endif /* DEV_NETMAP */
788	igb_free_pci_resources(adapter);
789	bus_generic_detach(dev);
790	if_free(ifp);
791
792	igb_free_transmit_structures(adapter);
793	igb_free_receive_structures(adapter);
794	if (adapter->mta != NULL)
795		free(adapter->mta, M_DEVBUF);
796
797	IGB_CORE_LOCK_DESTROY(adapter);
798
799	return (0);
800}
801
802/*********************************************************************
803 *
804 *  Shutdown entry point
805 *
806 **********************************************************************/
807
808static int
809igb_shutdown(device_t dev)
810{
811	return igb_suspend(dev);
812}
813
814/*
815 * Suspend/resume device methods.
816 */
817static int
818igb_suspend(device_t dev)
819{
820	struct adapter *adapter = device_get_softc(dev);
821
822	IGB_CORE_LOCK(adapter);
823
824	igb_stop(adapter);
825
826        igb_release_manageability(adapter);
827	igb_release_hw_control(adapter);
828
829        if (adapter->wol) {
830                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
831                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
832                igb_enable_wakeup(dev);
833        }
834
835	IGB_CORE_UNLOCK(adapter);
836
837	return bus_generic_suspend(dev);
838}
839
840static int
841igb_resume(device_t dev)
842{
843	struct adapter *adapter = device_get_softc(dev);
844	struct tx_ring	*txr = adapter->tx_rings;
845	struct ifnet *ifp = adapter->ifp;
846
847	IGB_CORE_LOCK(adapter);
848	igb_init_locked(adapter);
849	igb_init_manageability(adapter);
850
851	if ((ifp->if_flags & IFF_UP) &&
852	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
853		for (int i = 0; i < adapter->num_queues; i++, txr++) {
854			IGB_TX_LOCK(txr);
855#ifndef IGB_LEGACY_TX
856			/* Process the stack queue only if not depleted */
857			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
858			    !drbr_empty(ifp, txr->br))
859				igb_mq_start_locked(ifp, txr);
860#else
861			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
862				igb_start_locked(txr, ifp);
863#endif
864			IGB_TX_UNLOCK(txr);
865		}
866	}
867	IGB_CORE_UNLOCK(adapter);
868
869	return bus_generic_resume(dev);
870}
871
872
873#ifdef IGB_LEGACY_TX
874
875/*********************************************************************
876 *  Transmit entry point
877 *
878 *  igb_start is called by the stack to initiate a transmit.
879 *  The driver will remain in this routine as long as there are
880 *  packets to transmit and transmit resources are available.
881 *  In case resources are not available stack is notified and
882 *  the packet is requeued.
883 **********************************************************************/
884
885static void
886igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
887{
888	struct adapter	*adapter = ifp->if_softc;
889	struct mbuf	*m_head;
890
891	IGB_TX_LOCK_ASSERT(txr);
892
893	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
894	    IFF_DRV_RUNNING)
895		return;
896	if (!adapter->link_active)
897		return;
898
899	/* Call cleanup if number of TX descriptors low */
900	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
901		igb_txeof(txr);
902
903	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
904		if (txr->tx_avail <= IGB_MAX_SCATTER) {
905			txr->queue_status |= IGB_QUEUE_DEPLETED;
906			break;
907		}
908		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
909		if (m_head == NULL)
910			break;
911		/*
912		 *  Encapsulation can modify our pointer, and or make it
913		 *  NULL on failure.  In that event, we can't requeue.
914		 */
915		if (igb_xmit(txr, &m_head)) {
916			if (m_head != NULL)
917				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
918			if (txr->tx_avail <= IGB_MAX_SCATTER)
919				txr->queue_status |= IGB_QUEUE_DEPLETED;
920			break;
921		}
922
923		/* Send a copy of the frame to the BPF listener */
924		ETHER_BPF_MTAP(ifp, m_head);
925
926		/* Set watchdog on */
927		txr->watchdog_time = ticks;
928		txr->queue_status |= IGB_QUEUE_WORKING;
929	}
930}
931
932/*
933 * Legacy TX driver routine, called from the
934 * stack, always uses tx[0], and spins for it.
935 * Should not be used with multiqueue tx
936 */
937static void
938igb_start(struct ifnet *ifp)
939{
940	struct adapter	*adapter = ifp->if_softc;
941	struct tx_ring	*txr = adapter->tx_rings;
942
943	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
944		IGB_TX_LOCK(txr);
945		igb_start_locked(txr, ifp);
946		IGB_TX_UNLOCK(txr);
947	}
948	return;
949}
950
951#else /* ~IGB_LEGACY_TX */
952
953/*
954** Multiqueue Transmit Entry:
955**  quick turnaround to the stack
956**
957*/
958static int
959igb_mq_start(struct ifnet *ifp, struct mbuf *m)
960{
961	struct adapter		*adapter = ifp->if_softc;
962	struct igb_queue	*que;
963	struct tx_ring		*txr;
964	int 			i, err = 0;
965
966	/* Which queue to use */
967	if ((m->m_flags & M_FLOWID) != 0)
968		i = m->m_pkthdr.flowid % adapter->num_queues;
969	else
970		i = curcpu % adapter->num_queues;
971	txr = &adapter->tx_rings[i];
972	que = &adapter->queues[i];
973
974	err = drbr_enqueue(ifp, txr->br, m);
975	if (err)
976		return (err);
977	if (IGB_TX_TRYLOCK(txr)) {
978		err = igb_mq_start_locked(ifp, txr);
979		IGB_TX_UNLOCK(txr);
980	} else
981		taskqueue_enqueue(que->tq, &txr->txq_task);
982
983	return (err);
984}
985
986static int
987igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
988{
989	struct adapter  *adapter = txr->adapter;
990        struct mbuf     *next;
991        int             err = 0, enq;
992
993	IGB_TX_LOCK_ASSERT(txr);
994
995	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
996	    adapter->link_active == 0)
997		return (ENETDOWN);
998
999	enq = 0;
1000
1001	/* Process the queue */
1002	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1003		if ((err = igb_xmit(txr, &next)) != 0) {
1004			if (next == NULL) {
1005				/* It was freed, move forward */
1006				drbr_advance(ifp, txr->br);
1007			} else {
1008				/*
1009				 * Still have one left, it may not be
1010				 * the same since the transmit function
1011				 * may have changed it.
1012				 */
1013				drbr_putback(ifp, txr->br, next);
1014			}
1015			break;
1016		}
1017		drbr_advance(ifp, txr->br);
1018		enq++;
1019		ifp->if_obytes += next->m_pkthdr.len;
1020		if (next->m_flags & M_MCAST)
1021			ifp->if_omcasts++;
1022		ETHER_BPF_MTAP(ifp, next);
1023		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1024			break;
1025	}
1026	if (enq > 0) {
1027		/* Set the watchdog */
1028		txr->queue_status |= IGB_QUEUE_WORKING;
1029		txr->watchdog_time = ticks;
1030	}
1031	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1032		igb_txeof(txr);
1033	if (txr->tx_avail <= IGB_MAX_SCATTER)
1034		txr->queue_status |= IGB_QUEUE_DEPLETED;
1035	return (err);
1036}
1037
1038/*
1039 * Called from a taskqueue to drain queued transmit packets.
1040 */
1041static void
1042igb_deferred_mq_start(void *arg, int pending)
1043{
1044	struct tx_ring *txr = arg;
1045	struct adapter *adapter = txr->adapter;
1046	struct ifnet *ifp = adapter->ifp;
1047
1048	IGB_TX_LOCK(txr);
1049	if (!drbr_empty(ifp, txr->br))
1050		igb_mq_start_locked(ifp, txr);
1051	IGB_TX_UNLOCK(txr);
1052}
1053
1054/*
1055** Flush all ring buffers
1056*/
1057static void
1058igb_qflush(struct ifnet *ifp)
1059{
1060	struct adapter	*adapter = ifp->if_softc;
1061	struct tx_ring	*txr = adapter->tx_rings;
1062	struct mbuf	*m;
1063
1064	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1065		IGB_TX_LOCK(txr);
1066		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1067			m_freem(m);
1068		IGB_TX_UNLOCK(txr);
1069	}
1070	if_qflush(ifp);
1071}
1072#endif /* ~IGB_LEGACY_TX */
1073
1074/*********************************************************************
1075 *  Ioctl entry point
1076 *
1077 *  igb_ioctl is called when the user wants to configure the
1078 *  interface.
1079 *
1080 *  return 0 on success, positive on failure
1081 **********************************************************************/
1082
1083static int
1084igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1085{
1086	struct adapter	*adapter = ifp->if_softc;
1087	struct ifreq	*ifr = (struct ifreq *)data;
1088#if defined(INET) || defined(INET6)
1089	struct ifaddr	*ifa = (struct ifaddr *)data;
1090#endif
1091	bool		avoid_reset = FALSE;
1092	int		error = 0;
1093
1094	if (adapter->in_detach)
1095		return (error);
1096
1097	switch (command) {
1098	case SIOCSIFADDR:
1099#ifdef INET
1100		if (ifa->ifa_addr->sa_family == AF_INET)
1101			avoid_reset = TRUE;
1102#endif
1103#ifdef INET6
1104		if (ifa->ifa_addr->sa_family == AF_INET6)
1105			avoid_reset = TRUE;
1106#endif
1107		/*
1108		** Calling init results in link renegotiation,
1109		** so we avoid doing it when possible.
1110		*/
1111		if (avoid_reset) {
1112			ifp->if_flags |= IFF_UP;
1113			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1114				igb_init(adapter);
1115#ifdef INET
1116			if (!(ifp->if_flags & IFF_NOARP))
1117				arp_ifinit(ifp, ifa);
1118#endif
1119		} else
1120			error = ether_ioctl(ifp, command, data);
1121		break;
1122	case SIOCSIFMTU:
1123	    {
1124		int max_frame_size;
1125
1126		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1127
1128		IGB_CORE_LOCK(adapter);
1129		max_frame_size = 9234;
1130		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1131		    ETHER_CRC_LEN) {
1132			IGB_CORE_UNLOCK(adapter);
1133			error = EINVAL;
1134			break;
1135		}
1136
1137		ifp->if_mtu = ifr->ifr_mtu;
1138		adapter->max_frame_size =
1139		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1140		igb_init_locked(adapter);
1141		IGB_CORE_UNLOCK(adapter);
1142		break;
1143	    }
1144	case SIOCSIFFLAGS:
1145		IOCTL_DEBUGOUT("ioctl rcv'd:\
1146		    SIOCSIFFLAGS (Set Interface Flags)");
1147		IGB_CORE_LOCK(adapter);
1148		if (ifp->if_flags & IFF_UP) {
1149			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1150				if ((ifp->if_flags ^ adapter->if_flags) &
1151				    (IFF_PROMISC | IFF_ALLMULTI)) {
1152					igb_disable_promisc(adapter);
1153					igb_set_promisc(adapter);
1154				}
1155			} else
1156				igb_init_locked(adapter);
1157		} else
1158			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1159				igb_stop(adapter);
1160		adapter->if_flags = ifp->if_flags;
1161		IGB_CORE_UNLOCK(adapter);
1162		break;
1163	case SIOCADDMULTI:
1164	case SIOCDELMULTI:
1165		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1166		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1167			IGB_CORE_LOCK(adapter);
1168			igb_disable_intr(adapter);
1169			igb_set_multi(adapter);
1170#ifdef DEVICE_POLLING
1171			if (!(ifp->if_capenable & IFCAP_POLLING))
1172#endif
1173				igb_enable_intr(adapter);
1174			IGB_CORE_UNLOCK(adapter);
1175		}
1176		break;
1177	case SIOCSIFMEDIA:
1178		/* Check SOL/IDER usage */
1179		IGB_CORE_LOCK(adapter);
1180		if (e1000_check_reset_block(&adapter->hw)) {
1181			IGB_CORE_UNLOCK(adapter);
1182			device_printf(adapter->dev, "Media change is"
1183			    " blocked due to SOL/IDER session.\n");
1184			break;
1185		}
1186		IGB_CORE_UNLOCK(adapter);
1187	case SIOCGIFMEDIA:
1188		IOCTL_DEBUGOUT("ioctl rcv'd: \
1189		    SIOCxIFMEDIA (Get/Set Interface Media)");
1190		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1191		break;
1192	case SIOCSIFCAP:
1193	    {
1194		int mask, reinit;
1195
1196		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1197		reinit = 0;
1198		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1199#ifdef DEVICE_POLLING
1200		if (mask & IFCAP_POLLING) {
1201			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1202				error = ether_poll_register(igb_poll, ifp);
1203				if (error)
1204					return (error);
1205				IGB_CORE_LOCK(adapter);
1206				igb_disable_intr(adapter);
1207				ifp->if_capenable |= IFCAP_POLLING;
1208				IGB_CORE_UNLOCK(adapter);
1209			} else {
1210				error = ether_poll_deregister(ifp);
1211				/* Enable interrupt even in error case */
1212				IGB_CORE_LOCK(adapter);
1213				igb_enable_intr(adapter);
1214				ifp->if_capenable &= ~IFCAP_POLLING;
1215				IGB_CORE_UNLOCK(adapter);
1216			}
1217		}
1218#endif
1219		if (mask & IFCAP_HWCSUM) {
1220			ifp->if_capenable ^= IFCAP_HWCSUM;
1221			reinit = 1;
1222		}
1223		if (mask & IFCAP_TSO4) {
1224			ifp->if_capenable ^= IFCAP_TSO4;
1225			reinit = 1;
1226		}
1227		if (mask & IFCAP_VLAN_HWTAGGING) {
1228			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1229			reinit = 1;
1230		}
1231		if (mask & IFCAP_VLAN_HWFILTER) {
1232			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_VLAN_HWTSO) {
1236			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_LRO) {
1240			ifp->if_capenable ^= IFCAP_LRO;
1241			reinit = 1;
1242		}
1243		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1244			igb_init(adapter);
1245		VLAN_CAPABILITIES(ifp);
1246		break;
1247	    }
1248
1249	default:
1250		error = ether_ioctl(ifp, command, data);
1251		break;
1252	}
1253
1254	return (error);
1255}
1256
1257
1258/*********************************************************************
1259 *  Init entry point
1260 *
1261 *  This routine is used in two ways. It is used by the stack as
1262 *  init entry point in network interface structure. It is also used
1263 *  by the driver as a hw/sw initialization routine to get to a
1264 *  consistent state.
1265 *
1266 *  return 0 on success, positive on failure
1267 **********************************************************************/
1268
1269static void
1270igb_init_locked(struct adapter *adapter)
1271{
1272	struct ifnet	*ifp = adapter->ifp;
1273	device_t	dev = adapter->dev;
1274
1275	INIT_DEBUGOUT("igb_init: begin");
1276
1277	IGB_CORE_LOCK_ASSERT(adapter);
1278
1279	igb_disable_intr(adapter);
1280	callout_stop(&adapter->timer);
1281
1282	/* Get the latest mac address, User can use a LAA */
1283        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1284              ETHER_ADDR_LEN);
1285
1286	/* Put the address into the Receive Address Array */
1287	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1288
1289	igb_reset(adapter);
1290	igb_update_link_status(adapter);
1291
1292	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1293
1294	/* Set hardware offload abilities */
1295	ifp->if_hwassist = 0;
1296	if (ifp->if_capenable & IFCAP_TXCSUM) {
1297		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1298#if __FreeBSD_version >= 800000
1299		if (adapter->hw.mac.type == e1000_82576)
1300			ifp->if_hwassist |= CSUM_SCTP;
1301#endif
1302	}
1303
1304	if (ifp->if_capenable & IFCAP_TSO4)
1305		ifp->if_hwassist |= CSUM_TSO;
1306
1307	/* Configure for OS presence */
1308	igb_init_manageability(adapter);
1309
1310	/* Prepare transmit descriptors and buffers */
1311	igb_setup_transmit_structures(adapter);
1312	igb_initialize_transmit_units(adapter);
1313
1314	/* Setup Multicast table */
1315	igb_set_multi(adapter);
1316
1317	/*
1318	** Figure out the desired mbuf pool
1319	** for doing jumbo/packetsplit
1320	*/
1321	if (adapter->max_frame_size <= 2048)
1322		adapter->rx_mbuf_sz = MCLBYTES;
1323	else if (adapter->max_frame_size <= 4096)
1324		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1325	else
1326		adapter->rx_mbuf_sz = MJUM9BYTES;
1327
1328	/* Prepare receive descriptors and buffers */
1329	if (igb_setup_receive_structures(adapter)) {
1330		device_printf(dev, "Could not setup receive structures\n");
1331		return;
1332	}
1333	igb_initialize_receive_units(adapter);
1334
1335        /* Enable VLAN support */
1336	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1337		igb_setup_vlan_hw_support(adapter);
1338
1339	/* Don't lose promiscuous settings */
1340	igb_set_promisc(adapter);
1341
1342	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1343	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1344
1345	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1346	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1347
1348	if (adapter->msix > 1) /* Set up queue routing */
1349		igb_configure_queues(adapter);
1350
1351	/* this clears any pending interrupts */
1352	E1000_READ_REG(&adapter->hw, E1000_ICR);
1353#ifdef DEVICE_POLLING
1354	/*
1355	 * Only enable interrupts if we are not polling, make sure
1356	 * they are off otherwise.
1357	 */
1358	if (ifp->if_capenable & IFCAP_POLLING)
1359		igb_disable_intr(adapter);
1360	else
1361#endif /* DEVICE_POLLING */
1362	{
1363		igb_enable_intr(adapter);
1364		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1365	}
1366
1367	/* Set Energy Efficient Ethernet */
1368	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1369		e1000_set_eee_i350(&adapter->hw);
1370}
1371
1372static void
1373igb_init(void *arg)
1374{
1375	struct adapter *adapter = arg;
1376
1377	IGB_CORE_LOCK(adapter);
1378	igb_init_locked(adapter);
1379	IGB_CORE_UNLOCK(adapter);
1380}
1381
1382
1383static void
1384igb_handle_que(void *context, int pending)
1385{
1386	struct igb_queue *que = context;
1387	struct adapter *adapter = que->adapter;
1388	struct tx_ring *txr = que->txr;
1389	struct ifnet	*ifp = adapter->ifp;
1390
1391	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1392		bool	more;
1393
1394		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1395
1396		IGB_TX_LOCK(txr);
1397		igb_txeof(txr);
1398#ifndef IGB_LEGACY_TX
1399		/* Process the stack queue only if not depleted */
1400		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1401		    !drbr_empty(ifp, txr->br))
1402			igb_mq_start_locked(ifp, txr);
1403#else
1404		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1405			igb_start_locked(txr, ifp);
1406#endif
1407		IGB_TX_UNLOCK(txr);
1408		/* Do we need another? */
1409		if (more) {
1410			taskqueue_enqueue(que->tq, &que->que_task);
1411			return;
1412		}
1413	}
1414
1415#ifdef DEVICE_POLLING
1416	if (ifp->if_capenable & IFCAP_POLLING)
1417		return;
1418#endif
1419	/* Reenable this interrupt */
1420	if (que->eims)
1421		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1422	else
1423		igb_enable_intr(adapter);
1424}
1425
1426/* Deal with link in a sleepable context */
1427static void
1428igb_handle_link(void *context, int pending)
1429{
1430	struct adapter *adapter = context;
1431
1432	IGB_CORE_LOCK(adapter);
1433	igb_handle_link_locked(adapter);
1434	IGB_CORE_UNLOCK(adapter);
1435}
1436
1437static void
1438igb_handle_link_locked(struct adapter *adapter)
1439{
1440	struct tx_ring	*txr = adapter->tx_rings;
1441	struct ifnet *ifp = adapter->ifp;
1442
1443	IGB_CORE_LOCK_ASSERT(adapter);
1444	adapter->hw.mac.get_link_status = 1;
1445	igb_update_link_status(adapter);
1446	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1447		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1448			IGB_TX_LOCK(txr);
1449#ifndef IGB_LEGACY_TX
1450			/* Process the stack queue only if not depleted */
1451			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1452			    !drbr_empty(ifp, txr->br))
1453				igb_mq_start_locked(ifp, txr);
1454#else
1455			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1456				igb_start_locked(txr, ifp);
1457#endif
1458			IGB_TX_UNLOCK(txr);
1459		}
1460	}
1461}
1462
1463/*********************************************************************
1464 *
1465 *  MSI/Legacy Deferred
1466 *  Interrupt Service routine
1467 *
1468 *********************************************************************/
1469static int
1470igb_irq_fast(void *arg)
1471{
1472	struct adapter		*adapter = arg;
1473	struct igb_queue	*que = adapter->queues;
1474	u32			reg_icr;
1475
1476
1477	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1478
1479	/* Hot eject?  */
1480	if (reg_icr == 0xffffffff)
1481		return FILTER_STRAY;
1482
1483	/* Definitely not our interrupt.  */
1484	if (reg_icr == 0x0)
1485		return FILTER_STRAY;
1486
1487	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1488		return FILTER_STRAY;
1489
1490	/*
1491	 * Mask interrupts until the taskqueue is finished running.  This is
1492	 * cheap, just assume that it is needed.  This also works around the
1493	 * MSI message reordering errata on certain systems.
1494	 */
1495	igb_disable_intr(adapter);
1496	taskqueue_enqueue(que->tq, &que->que_task);
1497
1498	/* Link status change */
1499	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1500		taskqueue_enqueue(que->tq, &adapter->link_task);
1501
1502	if (reg_icr & E1000_ICR_RXO)
1503		adapter->rx_overruns++;
1504	return FILTER_HANDLED;
1505}
1506
1507#ifdef DEVICE_POLLING
1508#if __FreeBSD_version >= 800000
1509#define POLL_RETURN_COUNT(a) (a)
1510static int
1511#else
1512#define POLL_RETURN_COUNT(a)
1513static void
1514#endif
1515igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1516{
1517	struct adapter		*adapter = ifp->if_softc;
1518	struct igb_queue	*que;
1519	struct tx_ring		*txr;
1520	u32			reg_icr, rx_done = 0;
1521	u32			loop = IGB_MAX_LOOP;
1522	bool			more;
1523
1524	IGB_CORE_LOCK(adapter);
1525	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1526		IGB_CORE_UNLOCK(adapter);
1527		return POLL_RETURN_COUNT(rx_done);
1528	}
1529
1530	if (cmd == POLL_AND_CHECK_STATUS) {
1531		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1532		/* Link status change */
1533		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1534			igb_handle_link_locked(adapter);
1535
1536		if (reg_icr & E1000_ICR_RXO)
1537			adapter->rx_overruns++;
1538	}
1539	IGB_CORE_UNLOCK(adapter);
1540
1541	for (int i = 0; i < adapter->num_queues; i++) {
1542		que = &adapter->queues[i];
1543		txr = que->txr;
1544
1545		igb_rxeof(que, count, &rx_done);
1546
1547		IGB_TX_LOCK(txr);
1548		do {
1549			more = igb_txeof(txr);
1550		} while (loop-- && more);
1551#ifndef IGB_LEGACY_TX
1552		if (!drbr_empty(ifp, txr->br))
1553			igb_mq_start_locked(ifp, txr);
1554#else
1555		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1556			igb_start_locked(txr, ifp);
1557#endif
1558		IGB_TX_UNLOCK(txr);
1559	}
1560
1561	return POLL_RETURN_COUNT(rx_done);
1562}
1563#endif /* DEVICE_POLLING */
1564
1565/*********************************************************************
1566 *
1567 *  MSIX Que Interrupt Service routine
1568 *
1569 **********************************************************************/
1570static void
1571igb_msix_que(void *arg)
1572{
1573	struct igb_queue *que = arg;
1574	struct adapter *adapter = que->adapter;
1575	struct ifnet   *ifp = adapter->ifp;
1576	struct tx_ring *txr = que->txr;
1577	struct rx_ring *rxr = que->rxr;
1578	u32		newitr = 0;
1579	bool		more_rx;
1580
1581	/* Ignore spurious interrupts */
1582	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1583		return;
1584
1585	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1586	++que->irqs;
1587
1588	IGB_TX_LOCK(txr);
1589	igb_txeof(txr);
1590#ifndef IGB_LEGACY_TX
1591	/* Process the stack queue only if not depleted */
1592	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1593	    !drbr_empty(ifp, txr->br))
1594		igb_mq_start_locked(ifp, txr);
1595#else
1596	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1597		igb_start_locked(txr, ifp);
1598#endif
1599	IGB_TX_UNLOCK(txr);
1600
1601	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1602
1603	if (adapter->enable_aim == FALSE)
1604		goto no_calc;
1605	/*
1606	** Do Adaptive Interrupt Moderation:
1607        **  - Write out last calculated setting
1608	**  - Calculate based on average size over
1609	**    the last interval.
1610	*/
1611        if (que->eitr_setting)
1612                E1000_WRITE_REG(&adapter->hw,
1613                    E1000_EITR(que->msix), que->eitr_setting);
1614
1615        que->eitr_setting = 0;
1616
1617        /* Idle, do nothing */
1618        if ((txr->bytes == 0) && (rxr->bytes == 0))
1619                goto no_calc;
1620
1621        /* Used half Default if sub-gig */
1622        if (adapter->link_speed != 1000)
1623                newitr = IGB_DEFAULT_ITR / 2;
1624        else {
1625		if ((txr->bytes) && (txr->packets))
1626                	newitr = txr->bytes/txr->packets;
1627		if ((rxr->bytes) && (rxr->packets))
1628			newitr = max(newitr,
1629			    (rxr->bytes / rxr->packets));
1630                newitr += 24; /* account for hardware frame, crc */
1631		/* set an upper boundary */
1632		newitr = min(newitr, 3000);
1633		/* Be nice to the mid range */
1634                if ((newitr > 300) && (newitr < 1200))
1635                        newitr = (newitr / 3);
1636                else
1637                        newitr = (newitr / 2);
1638        }
1639        newitr &= 0x7FFC;  /* Mask invalid bits */
1640        if (adapter->hw.mac.type == e1000_82575)
1641                newitr |= newitr << 16;
1642        else
1643                newitr |= E1000_EITR_CNT_IGNR;
1644
1645        /* save for next interrupt */
1646        que->eitr_setting = newitr;
1647
1648        /* Reset state */
1649        txr->bytes = 0;
1650        txr->packets = 0;
1651        rxr->bytes = 0;
1652        rxr->packets = 0;
1653
1654no_calc:
1655	/* Schedule a clean task if needed*/
1656	if (more_rx)
1657		taskqueue_enqueue(que->tq, &que->que_task);
1658	else
1659		/* Reenable this interrupt */
1660		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1661	return;
1662}
1663
1664
1665/*********************************************************************
1666 *
1667 *  MSIX Link Interrupt Service routine
1668 *
1669 **********************************************************************/
1670
1671static void
1672igb_msix_link(void *arg)
1673{
1674	struct adapter	*adapter = arg;
1675	u32       	icr;
1676
1677	++adapter->link_irq;
1678	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1679	if (!(icr & E1000_ICR_LSC))
1680		goto spurious;
1681	igb_handle_link(adapter, 0);
1682
1683spurious:
1684	/* Rearm */
1685	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1686	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1687	return;
1688}
1689
1690
1691/*********************************************************************
1692 *
1693 *  Media Ioctl callback
1694 *
1695 *  This routine is called whenever the user queries the status of
1696 *  the interface using ifconfig.
1697 *
1698 **********************************************************************/
1699static void
1700igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1701{
1702	struct adapter *adapter = ifp->if_softc;
1703
1704	INIT_DEBUGOUT("igb_media_status: begin");
1705
1706	IGB_CORE_LOCK(adapter);
1707	igb_update_link_status(adapter);
1708
1709	ifmr->ifm_status = IFM_AVALID;
1710	ifmr->ifm_active = IFM_ETHER;
1711
1712	if (!adapter->link_active) {
1713		IGB_CORE_UNLOCK(adapter);
1714		return;
1715	}
1716
1717	ifmr->ifm_status |= IFM_ACTIVE;
1718
1719	switch (adapter->link_speed) {
1720	case 10:
1721		ifmr->ifm_active |= IFM_10_T;
1722		break;
1723	case 100:
1724		/*
1725		** Support for 100Mb SFP - these are Fiber
1726		** but the media type appears as serdes
1727		*/
1728		if (adapter->hw.phy.media_type ==
1729		    e1000_media_type_internal_serdes)
1730			ifmr->ifm_active |= IFM_100_FX;
1731		else
1732			ifmr->ifm_active |= IFM_100_TX;
1733		break;
1734	case 1000:
1735		ifmr->ifm_active |= IFM_1000_T;
1736		break;
1737	}
1738
1739	if (adapter->link_duplex == FULL_DUPLEX)
1740		ifmr->ifm_active |= IFM_FDX;
1741	else
1742		ifmr->ifm_active |= IFM_HDX;
1743
1744	IGB_CORE_UNLOCK(adapter);
1745}
1746
1747/*********************************************************************
1748 *
1749 *  Media Ioctl callback
1750 *
1751 *  This routine is called when the user changes speed/duplex using
1752 *  media/mediopt option with ifconfig.
1753 *
1754 **********************************************************************/
1755static int
1756igb_media_change(struct ifnet *ifp)
1757{
1758	struct adapter *adapter = ifp->if_softc;
1759	struct ifmedia  *ifm = &adapter->media;
1760
1761	INIT_DEBUGOUT("igb_media_change: begin");
1762
1763	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1764		return (EINVAL);
1765
1766	IGB_CORE_LOCK(adapter);
1767	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1768	case IFM_AUTO:
1769		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1770		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1771		break;
1772	case IFM_1000_LX:
1773	case IFM_1000_SX:
1774	case IFM_1000_T:
1775		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1776		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1777		break;
1778	case IFM_100_TX:
1779		adapter->hw.mac.autoneg = FALSE;
1780		adapter->hw.phy.autoneg_advertised = 0;
1781		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1783		else
1784			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1785		break;
1786	case IFM_10_T:
1787		adapter->hw.mac.autoneg = FALSE;
1788		adapter->hw.phy.autoneg_advertised = 0;
1789		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1791		else
1792			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1793		break;
1794	default:
1795		device_printf(adapter->dev, "Unsupported media type\n");
1796	}
1797
1798	igb_init_locked(adapter);
1799	IGB_CORE_UNLOCK(adapter);
1800
1801	return (0);
1802}
1803
1804
1805/*********************************************************************
1806 *
1807 *  This routine maps the mbufs to Advanced TX descriptors.
1808 *
1809 **********************************************************************/
1810static int
1811igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1812{
1813	struct adapter		*adapter = txr->adapter;
1814	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1815	bus_dmamap_t		map;
1816	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1817	union e1000_adv_tx_desc	*txd = NULL;
1818	struct mbuf		*m_head = *m_headp;
1819	struct ether_vlan_header *eh = NULL;
1820	struct ip		*ip = NULL;
1821	struct tcphdr		*th = NULL;
1822	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1823	int			ehdrlen, poff;
1824	int			nsegs, i, first, last = 0;
1825	int			error, do_tso, remap = 1;
1826
1827	/* Set basic descriptor constants */
1828	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1829	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1830	if (m_head->m_flags & M_VLANTAG)
1831		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1832
1833retry:
1834	m_head = *m_headp;
1835	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1836	hdrlen = ehdrlen = poff = 0;
1837
1838	/*
1839	 * Intel recommends entire IP/TCP header length reside in a single
1840	 * buffer. If multiple descriptors are used to describe the IP and
1841	 * TCP header, each descriptor should describe one or more
1842	 * complete headers; descriptors referencing only parts of headers
1843	 * are not supported. If all layer headers are not coalesced into
1844	 * a single buffer, each buffer should not cross a 4KB boundary,
1845	 * or be larger than the maximum read request size.
1846	 * Controller also requires modifing IP/TCP header to make TSO work
1847	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1848	 * IP/TCP header into a single buffer to meet the requirement of
1849	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1850	 * which also has similiar restrictions.
1851	 */
1852	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1853		if (do_tso || (m_head->m_next != NULL &&
1854		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1855			if (M_WRITABLE(*m_headp) == 0) {
1856				m_head = m_dup(*m_headp, M_NOWAIT);
1857				m_freem(*m_headp);
1858				if (m_head == NULL) {
1859					*m_headp = NULL;
1860					return (ENOBUFS);
1861				}
1862				*m_headp = m_head;
1863			}
1864		}
1865		/*
1866		 * Assume IPv4, we don't have TSO/checksum offload support
1867		 * for IPv6 yet.
1868		 */
1869		ehdrlen = sizeof(struct ether_header);
1870		m_head = m_pullup(m_head, ehdrlen);
1871		if (m_head == NULL) {
1872			*m_headp = NULL;
1873			return (ENOBUFS);
1874		}
1875		eh = mtod(m_head, struct ether_vlan_header *);
1876		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1877			ehdrlen = sizeof(struct ether_vlan_header);
1878			m_head = m_pullup(m_head, ehdrlen);
1879			if (m_head == NULL) {
1880				*m_headp = NULL;
1881				return (ENOBUFS);
1882			}
1883		}
1884		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1885		if (m_head == NULL) {
1886			*m_headp = NULL;
1887			return (ENOBUFS);
1888		}
1889		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1890		poff = ehdrlen + (ip->ip_hl << 2);
1891		if (do_tso) {
1892			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1893			if (m_head == NULL) {
1894				*m_headp = NULL;
1895				return (ENOBUFS);
1896			}
1897			/*
1898			 * The pseudo TCP checksum does not include TCP payload
1899			 * length so driver should recompute the checksum here
1900			 * what hardware expect to see. This is adherence of
1901			 * Microsoft's Large Send specification.
1902			 */
1903			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1905			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1906			/* Keep track of the full header length */
1907			hdrlen = poff + (th->th_off << 2);
1908		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1909			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1910			if (m_head == NULL) {
1911				*m_headp = NULL;
1912				return (ENOBUFS);
1913			}
1914			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1915			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1916			if (m_head == NULL) {
1917				*m_headp = NULL;
1918				return (ENOBUFS);
1919			}
1920			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1921			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1923			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1924			if (m_head == NULL) {
1925				*m_headp = NULL;
1926				return (ENOBUFS);
1927			}
1928			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1929		}
1930		*m_headp = m_head;
1931	}
1932
1933	/*
1934	 * Map the packet for DMA
1935	 *
1936	 * Capture the first descriptor index,
1937	 * this descriptor will have the index
1938	 * of the EOP which is the only one that
1939	 * now gets a DONE bit writeback.
1940	 */
1941	first = txr->next_avail_desc;
1942	tx_buffer = &txr->tx_buffers[first];
1943	tx_buffer_mapped = tx_buffer;
1944	map = tx_buffer->map;
1945
1946	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1947	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1948
1949	/*
1950	 * There are two types of errors we can (try) to handle:
1951	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1952	 *   out of segments.  Defragment the mbuf chain and try again.
1953	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1954	 *   at this point in time.  Defer sending and try again later.
1955	 * All other errors, in particular EINVAL, are fatal and prevent the
1956	 * mbuf chain from ever going through.  Drop it and report error.
1957	 */
1958	if (error == EFBIG && remap) {
1959		struct mbuf *m;
1960
1961		m = m_defrag(*m_headp, M_NOWAIT);
1962		if (m == NULL) {
1963			adapter->mbuf_defrag_failed++;
1964			m_freem(*m_headp);
1965			*m_headp = NULL;
1966			return (ENOBUFS);
1967		}
1968		*m_headp = m;
1969
1970		/* Try it again, but only once */
1971		remap = 0;
1972		goto retry;
1973	} else if (error == ENOMEM) {
1974		adapter->no_tx_dma_setup++;
1975		return (error);
1976	} else if (error != 0) {
1977		adapter->no_tx_dma_setup++;
1978		m_freem(*m_headp);
1979		*m_headp = NULL;
1980		return (error);
1981	}
1982
1983	/*
1984	** Make sure we don't overrun the ring,
1985	** we need nsegs descriptors and one for
1986	** the context descriptor used for the
1987	** offloads.
1988	*/
1989        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1990                txr->no_desc_avail++;
1991		bus_dmamap_unload(txr->txtag, map);
1992		return (ENOBUFS);
1993        }
1994	m_head = *m_headp;
1995
1996	/* Do hardware assists:
1997         * Set up the context descriptor, used
1998         * when any hardware offload is done.
1999         * This includes CSUM, VLAN, and TSO.
2000         * It will use the first descriptor.
2001         */
2002
2003	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2004		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
2005			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2006			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2007			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2008		} else
2009			return (ENXIO);
2010	} else if (igb_tx_ctx_setup(txr, m_head))
2011			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2012
2013	/* Calculate payload length */
2014	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2015	    << E1000_ADVTXD_PAYLEN_SHIFT);
2016
2017	/* 82575 needs the queue index added */
2018	if (adapter->hw.mac.type == e1000_82575)
2019		olinfo_status |= txr->me << 4;
2020
2021	/* Set up our transmit descriptors */
2022	i = txr->next_avail_desc;
2023	for (int j = 0; j < nsegs; j++) {
2024		bus_size_t seg_len;
2025		bus_addr_t seg_addr;
2026
2027		tx_buffer = &txr->tx_buffers[i];
2028		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2029		seg_addr = segs[j].ds_addr;
2030		seg_len  = segs[j].ds_len;
2031
2032		txd->read.buffer_addr = htole64(seg_addr);
2033		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2034		txd->read.olinfo_status = htole32(olinfo_status);
2035		last = i;
2036		if (++i == adapter->num_tx_desc)
2037			i = 0;
2038		tx_buffer->m_head = NULL;
2039		tx_buffer->next_eop = -1;
2040	}
2041
2042	txr->next_avail_desc = i;
2043	txr->tx_avail -= nsegs;
2044        tx_buffer->m_head = m_head;
2045
2046	/*
2047	** Here we swap the map so the last descriptor,
2048	** which gets the completion interrupt has the
2049	** real map, and the first descriptor gets the
2050	** unused map from this descriptor.
2051	*/
2052	tx_buffer_mapped->map = tx_buffer->map;
2053	tx_buffer->map = map;
2054        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2055
2056        /*
2057         * Last Descriptor of Packet
2058	 * needs End Of Packet (EOP)
2059	 * and Report Status (RS)
2060         */
2061        txd->read.cmd_type_len |=
2062	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2063	/*
2064	 * Keep track in the first buffer which
2065	 * descriptor will be written back
2066	 */
2067	tx_buffer = &txr->tx_buffers[first];
2068	tx_buffer->next_eop = last;
2069	/* Update the watchdog time early and often */
2070	txr->watchdog_time = ticks;
2071
2072	/*
2073	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2074	 * that this frame is available to transmit.
2075	 */
2076	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2077	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2078	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2079	++txr->tx_packets;
2080
2081	return (0);
2082}
2083static void
2084igb_set_promisc(struct adapter *adapter)
2085{
2086	struct ifnet	*ifp = adapter->ifp;
2087	struct e1000_hw *hw = &adapter->hw;
2088	u32		reg;
2089
2090	if (adapter->vf_ifp) {
2091		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2092		return;
2093	}
2094
2095	reg = E1000_READ_REG(hw, E1000_RCTL);
2096	if (ifp->if_flags & IFF_PROMISC) {
2097		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2098		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2099	} else if (ifp->if_flags & IFF_ALLMULTI) {
2100		reg |= E1000_RCTL_MPE;
2101		reg &= ~E1000_RCTL_UPE;
2102		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2103	}
2104}
2105
2106static void
2107igb_disable_promisc(struct adapter *adapter)
2108{
2109	struct e1000_hw *hw = &adapter->hw;
2110	struct ifnet	*ifp = adapter->ifp;
2111	u32		reg;
2112	int		mcnt = 0;
2113
2114	if (adapter->vf_ifp) {
2115		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2116		return;
2117	}
2118	reg = E1000_READ_REG(hw, E1000_RCTL);
2119	reg &=  (~E1000_RCTL_UPE);
2120	if (ifp->if_flags & IFF_ALLMULTI)
2121		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2122	else {
2123		struct  ifmultiaddr *ifma;
2124#if __FreeBSD_version < 800000
2125		IF_ADDR_LOCK(ifp);
2126#else
2127		if_maddr_rlock(ifp);
2128#endif
2129		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130			if (ifma->ifma_addr->sa_family != AF_LINK)
2131				continue;
2132			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2133				break;
2134			mcnt++;
2135		}
2136#if __FreeBSD_version < 800000
2137		IF_ADDR_UNLOCK(ifp);
2138#else
2139		if_maddr_runlock(ifp);
2140#endif
2141	}
2142	/* Don't disable if in MAX groups */
2143	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2144		reg &=  (~E1000_RCTL_MPE);
2145	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2146}
2147
2148
2149/*********************************************************************
2150 *  Multicast Update
2151 *
2152 *  This routine is called whenever multicast address list is updated.
2153 *
2154 **********************************************************************/
2155
2156static void
2157igb_set_multi(struct adapter *adapter)
2158{
2159	struct ifnet	*ifp = adapter->ifp;
2160	struct ifmultiaddr *ifma;
2161	u32 reg_rctl = 0;
2162	u8  *mta;
2163
2164	int mcnt = 0;
2165
2166	IOCTL_DEBUGOUT("igb_set_multi: begin");
2167
2168	mta = adapter->mta;
2169	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2170	    MAX_NUM_MULTICAST_ADDRESSES);
2171
2172#if __FreeBSD_version < 800000
2173	IF_ADDR_LOCK(ifp);
2174#else
2175	if_maddr_rlock(ifp);
2176#endif
2177	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2178		if (ifma->ifma_addr->sa_family != AF_LINK)
2179			continue;
2180
2181		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2182			break;
2183
2184		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2185		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2186		mcnt++;
2187	}
2188#if __FreeBSD_version < 800000
2189	IF_ADDR_UNLOCK(ifp);
2190#else
2191	if_maddr_runlock(ifp);
2192#endif
2193
2194	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2195		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2196		reg_rctl |= E1000_RCTL_MPE;
2197		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2198	} else
2199		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2200}
2201
2202
2203/*********************************************************************
2204 *  Timer routine:
2205 *  	This routine checks for link status,
2206 *	updates statistics, and does the watchdog.
2207 *
2208 **********************************************************************/
2209
2210static void
2211igb_local_timer(void *arg)
2212{
2213	struct adapter		*adapter = arg;
2214	device_t		dev = adapter->dev;
2215	struct ifnet		*ifp = adapter->ifp;
2216	struct tx_ring		*txr = adapter->tx_rings;
2217	struct igb_queue	*que = adapter->queues;
2218	int			hung = 0, busy = 0;
2219
2220
2221	IGB_CORE_LOCK_ASSERT(adapter);
2222
2223	igb_update_link_status(adapter);
2224	igb_update_stats_counters(adapter);
2225
2226        /*
2227        ** Check the TX queues status
2228	**	- central locked handling of OACTIVE
2229	**	- watchdog only if all queues show hung
2230        */
2231	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2232		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2233		    (adapter->pause_frames == 0))
2234			++hung;
2235		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2236			++busy;
2237		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2238			taskqueue_enqueue(que->tq, &que->que_task);
2239	}
2240	if (hung == adapter->num_queues)
2241		goto timeout;
2242	if (busy == adapter->num_queues)
2243		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2244	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2245	    (busy < adapter->num_queues))
2246		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2247
2248	adapter->pause_frames = 0;
2249	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2250#ifndef DEVICE_POLLING
2251	/* Schedule all queue interrupts - deadlock protection */
2252	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2253#endif
2254	return;
2255
2256timeout:
2257	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2258	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2259            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2260            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2261	device_printf(dev,"TX(%d) desc avail = %d,"
2262            "Next TX to Clean = %d\n",
2263            txr->me, txr->tx_avail, txr->next_to_clean);
2264	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2265	adapter->watchdog_events++;
2266	igb_init_locked(adapter);
2267}
2268
2269static void
2270igb_update_link_status(struct adapter *adapter)
2271{
2272	struct e1000_hw		*hw = &adapter->hw;
2273	struct e1000_fc_info	*fc = &hw->fc;
2274	struct ifnet		*ifp = adapter->ifp;
2275	device_t		dev = adapter->dev;
2276	struct tx_ring		*txr = adapter->tx_rings;
2277	u32			link_check, thstat, ctrl;
2278	char			*flowctl = NULL;
2279
2280	link_check = thstat = ctrl = 0;
2281
2282	/* Get the cached link value or read for real */
2283        switch (hw->phy.media_type) {
2284        case e1000_media_type_copper:
2285                if (hw->mac.get_link_status) {
2286			/* Do the work to read phy */
2287                        e1000_check_for_link(hw);
2288                        link_check = !hw->mac.get_link_status;
2289                } else
2290                        link_check = TRUE;
2291                break;
2292        case e1000_media_type_fiber:
2293                e1000_check_for_link(hw);
2294                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2295                                 E1000_STATUS_LU);
2296                break;
2297        case e1000_media_type_internal_serdes:
2298                e1000_check_for_link(hw);
2299                link_check = adapter->hw.mac.serdes_has_link;
2300                break;
2301	/* VF device is type_unknown */
2302        case e1000_media_type_unknown:
2303                e1000_check_for_link(hw);
2304		link_check = !hw->mac.get_link_status;
2305		/* Fall thru */
2306        default:
2307                break;
2308        }
2309
2310	/* Check for thermal downshift or shutdown */
2311	if (hw->mac.type == e1000_i350) {
2312		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2313		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2314	}
2315
2316	/* Get the flow control for display */
2317	switch (fc->current_mode) {
2318	case e1000_fc_rx_pause:
2319		flowctl = "RX";
2320		break;
2321	case e1000_fc_tx_pause:
2322		flowctl = "TX";
2323		break;
2324	case e1000_fc_full:
2325		flowctl = "Full";
2326		break;
2327	case e1000_fc_none:
2328	default:
2329		flowctl = "None";
2330		break;
2331	}
2332
2333	/* Now we check if a transition has happened */
2334	if (link_check && (adapter->link_active == 0)) {
2335		e1000_get_speed_and_duplex(&adapter->hw,
2336		    &adapter->link_speed, &adapter->link_duplex);
2337		if (bootverbose)
2338			device_printf(dev, "Link is up %d Mbps %s,"
2339			    " Flow Control: %s\n",
2340			    adapter->link_speed,
2341			    ((adapter->link_duplex == FULL_DUPLEX) ?
2342			    "Full Duplex" : "Half Duplex"), flowctl);
2343		adapter->link_active = 1;
2344		ifp->if_baudrate = adapter->link_speed * 1000000;
2345		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2346		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2347			device_printf(dev, "Link: thermal downshift\n");
2348		/* This can sleep */
2349		if_link_state_change(ifp, LINK_STATE_UP);
2350	} else if (!link_check && (adapter->link_active == 1)) {
2351		ifp->if_baudrate = adapter->link_speed = 0;
2352		adapter->link_duplex = 0;
2353		if (bootverbose)
2354			device_printf(dev, "Link is Down\n");
2355		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2356		    (thstat & E1000_THSTAT_PWR_DOWN))
2357			device_printf(dev, "Link: thermal shutdown\n");
2358		adapter->link_active = 0;
2359		/* This can sleep */
2360		if_link_state_change(ifp, LINK_STATE_DOWN);
2361		/* Reset queue state */
2362		for (int i = 0; i < adapter->num_queues; i++, txr++)
2363			txr->queue_status = IGB_QUEUE_IDLE;
2364	}
2365}
2366
2367/*********************************************************************
2368 *
2369 *  This routine disables all traffic on the adapter by issuing a
2370 *  global reset on the MAC and deallocates TX/RX buffers.
2371 *
2372 **********************************************************************/
2373
2374static void
2375igb_stop(void *arg)
2376{
2377	struct adapter	*adapter = arg;
2378	struct ifnet	*ifp = adapter->ifp;
2379	struct tx_ring *txr = adapter->tx_rings;
2380
2381	IGB_CORE_LOCK_ASSERT(adapter);
2382
2383	INIT_DEBUGOUT("igb_stop: begin");
2384
2385	igb_disable_intr(adapter);
2386
2387	callout_stop(&adapter->timer);
2388
2389	/* Tell the stack that the interface is no longer active */
2390	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2391	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2392
2393	/* Disarm watchdog timer. */
2394	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395		IGB_TX_LOCK(txr);
2396		txr->queue_status = IGB_QUEUE_IDLE;
2397		IGB_TX_UNLOCK(txr);
2398	}
2399
2400	e1000_reset_hw(&adapter->hw);
2401	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2402
2403	e1000_led_off(&adapter->hw);
2404	e1000_cleanup_led(&adapter->hw);
2405}
2406
2407
2408/*********************************************************************
2409 *
2410 *  Determine hardware revision.
2411 *
2412 **********************************************************************/
2413static void
2414igb_identify_hardware(struct adapter *adapter)
2415{
2416	device_t dev = adapter->dev;
2417
2418	/* Make sure our PCI config space has the necessary stuff set */
2419	pci_enable_busmaster(dev);
2420	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2421
2422	/* Save off the information about this board */
2423	adapter->hw.vendor_id = pci_get_vendor(dev);
2424	adapter->hw.device_id = pci_get_device(dev);
2425	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2426	adapter->hw.subsystem_vendor_id =
2427	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2428	adapter->hw.subsystem_device_id =
2429	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2430
2431	/* Set MAC type early for PCI setup */
2432	e1000_set_mac_type(&adapter->hw);
2433
2434	/* Are we a VF device? */
2435	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2436	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2437		adapter->vf_ifp = 1;
2438	else
2439		adapter->vf_ifp = 0;
2440}
2441
2442static int
2443igb_allocate_pci_resources(struct adapter *adapter)
2444{
2445	device_t	dev = adapter->dev;
2446	int		rid;
2447
2448	rid = PCIR_BAR(0);
2449	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2450	    &rid, RF_ACTIVE);
2451	if (adapter->pci_mem == NULL) {
2452		device_printf(dev, "Unable to allocate bus resource: memory\n");
2453		return (ENXIO);
2454	}
2455	adapter->osdep.mem_bus_space_tag =
2456	    rman_get_bustag(adapter->pci_mem);
2457	adapter->osdep.mem_bus_space_handle =
2458	    rman_get_bushandle(adapter->pci_mem);
2459	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2460
2461	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2462
2463	/* This will setup either MSI/X or MSI */
2464	adapter->msix = igb_setup_msix(adapter);
2465	adapter->hw.back = &adapter->osdep;
2466
2467	return (0);
2468}
2469
2470/*********************************************************************
2471 *
2472 *  Setup the Legacy or MSI Interrupt handler
2473 *
2474 **********************************************************************/
2475static int
2476igb_allocate_legacy(struct adapter *adapter)
2477{
2478	device_t		dev = adapter->dev;
2479	struct igb_queue	*que = adapter->queues;
2480	int			error, rid = 0;
2481
2482	/* Turn off all interrupts */
2483	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2484
2485	/* MSI RID is 1 */
2486	if (adapter->msix == 1)
2487		rid = 1;
2488
2489	/* We allocate a single interrupt resource */
2490	adapter->res = bus_alloc_resource_any(dev,
2491	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2492	if (adapter->res == NULL) {
2493		device_printf(dev, "Unable to allocate bus resource: "
2494		    "interrupt\n");
2495		return (ENXIO);
2496	}
2497
2498#ifndef IGB_LEGACY_TX
2499	TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
2500#endif
2501
2502	/*
2503	 * Try allocating a fast interrupt and the associated deferred
2504	 * processing contexts.
2505	 */
2506	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2507	/* Make tasklet for deferred link handling */
2508	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2509	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2510	    taskqueue_thread_enqueue, &que->tq);
2511	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2512	    device_get_nameunit(adapter->dev));
2513	if ((error = bus_setup_intr(dev, adapter->res,
2514	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2515	    adapter, &adapter->tag)) != 0) {
2516		device_printf(dev, "Failed to register fast interrupt "
2517			    "handler: %d\n", error);
2518		taskqueue_free(que->tq);
2519		que->tq = NULL;
2520		return (error);
2521	}
2522
2523	return (0);
2524}
2525
2526
2527/*********************************************************************
2528 *
2529 *  Setup the MSIX Queue Interrupt handlers:
2530 *
2531 **********************************************************************/
2532static int
2533igb_allocate_msix(struct adapter *adapter)
2534{
2535	device_t		dev = adapter->dev;
2536	struct igb_queue	*que = adapter->queues;
2537	int			error, rid, vector = 0;
2538
2539	/* Be sure to start with all interrupts disabled */
2540	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2541	E1000_WRITE_FLUSH(&adapter->hw);
2542
2543	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2544		rid = vector +1;
2545		que->res = bus_alloc_resource_any(dev,
2546		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2547		if (que->res == NULL) {
2548			device_printf(dev,
2549			    "Unable to allocate bus resource: "
2550			    "MSIX Queue Interrupt\n");
2551			return (ENXIO);
2552		}
2553		error = bus_setup_intr(dev, que->res,
2554	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2555		    igb_msix_que, que, &que->tag);
2556		if (error) {
2557			que->res = NULL;
2558			device_printf(dev, "Failed to register Queue handler");
2559			return (error);
2560		}
2561#if __FreeBSD_version >= 800504
2562		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2563#endif
2564		que->msix = vector;
2565		if (adapter->hw.mac.type == e1000_82575)
2566			que->eims = E1000_EICR_TX_QUEUE0 << i;
2567		else
2568			que->eims = 1 << vector;
2569		/*
2570		** Bind the msix vector, and thus the
2571		** rings to the corresponding cpu.
2572		*/
2573		if (adapter->num_queues > 1) {
2574			if (igb_last_bind_cpu < 0)
2575				igb_last_bind_cpu = CPU_FIRST();
2576			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2577			device_printf(dev,
2578				"Bound queue %d to cpu %d\n",
2579				i,igb_last_bind_cpu);
2580			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2581		}
2582#ifndef IGB_LEGACY_TX
2583		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2584		    que->txr);
2585#endif
2586		/* Make tasklet for deferred handling */
2587		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2588		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2589		    taskqueue_thread_enqueue, &que->tq);
2590		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2591		    device_get_nameunit(adapter->dev));
2592	}
2593
2594	/* And Link */
2595	rid = vector + 1;
2596	adapter->res = bus_alloc_resource_any(dev,
2597	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2598	if (adapter->res == NULL) {
2599		device_printf(dev,
2600		    "Unable to allocate bus resource: "
2601		    "MSIX Link Interrupt\n");
2602		return (ENXIO);
2603	}
2604	if ((error = bus_setup_intr(dev, adapter->res,
2605	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2606	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2607		device_printf(dev, "Failed to register Link handler");
2608		return (error);
2609	}
2610#if __FreeBSD_version >= 800504
2611	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2612#endif
2613	adapter->linkvec = vector;
2614
2615	return (0);
2616}
2617
2618
2619static void
2620igb_configure_queues(struct adapter *adapter)
2621{
2622	struct	e1000_hw	*hw = &adapter->hw;
2623	struct	igb_queue	*que;
2624	u32			tmp, ivar = 0, newitr = 0;
2625
2626	/* First turn on RSS capability */
2627	if (adapter->hw.mac.type != e1000_82575)
2628		E1000_WRITE_REG(hw, E1000_GPIE,
2629		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2630		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2631
2632	/* Turn on MSIX */
2633	switch (adapter->hw.mac.type) {
2634	case e1000_82580:
2635	case e1000_i350:
2636	case e1000_i210:
2637	case e1000_i211:
2638	case e1000_vfadapt:
2639	case e1000_vfadapt_i350:
2640		/* RX entries */
2641		for (int i = 0; i < adapter->num_queues; i++) {
2642			u32 index = i >> 1;
2643			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2644			que = &adapter->queues[i];
2645			if (i & 1) {
2646				ivar &= 0xFF00FFFF;
2647				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648			} else {
2649				ivar &= 0xFFFFFF00;
2650				ivar |= que->msix | E1000_IVAR_VALID;
2651			}
2652			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2653		}
2654		/* TX entries */
2655		for (int i = 0; i < adapter->num_queues; i++) {
2656			u32 index = i >> 1;
2657			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2658			que = &adapter->queues[i];
2659			if (i & 1) {
2660				ivar &= 0x00FFFFFF;
2661				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2662			} else {
2663				ivar &= 0xFFFF00FF;
2664				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2665			}
2666			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2667			adapter->que_mask |= que->eims;
2668		}
2669
2670		/* And for the link interrupt */
2671		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2672		adapter->link_mask = 1 << adapter->linkvec;
2673		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2674		break;
2675	case e1000_82576:
2676		/* RX entries */
2677		for (int i = 0; i < adapter->num_queues; i++) {
2678			u32 index = i & 0x7; /* Each IVAR has two entries */
2679			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2680			que = &adapter->queues[i];
2681			if (i < 8) {
2682				ivar &= 0xFFFFFF00;
2683				ivar |= que->msix | E1000_IVAR_VALID;
2684			} else {
2685				ivar &= 0xFF00FFFF;
2686				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2687			}
2688			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2689			adapter->que_mask |= que->eims;
2690		}
2691		/* TX entries */
2692		for (int i = 0; i < adapter->num_queues; i++) {
2693			u32 index = i & 0x7; /* Each IVAR has two entries */
2694			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2695			que = &adapter->queues[i];
2696			if (i < 8) {
2697				ivar &= 0xFFFF00FF;
2698				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2699			} else {
2700				ivar &= 0x00FFFFFF;
2701				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2702			}
2703			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2704			adapter->que_mask |= que->eims;
2705		}
2706
2707		/* And for the link interrupt */
2708		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2709		adapter->link_mask = 1 << adapter->linkvec;
2710		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2711		break;
2712
2713	case e1000_82575:
2714                /* enable MSI-X support*/
2715		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2716                tmp |= E1000_CTRL_EXT_PBA_CLR;
2717                /* Auto-Mask interrupts upon ICR read. */
2718                tmp |= E1000_CTRL_EXT_EIAME;
2719                tmp |= E1000_CTRL_EXT_IRCA;
2720                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2721
2722		/* Queues */
2723		for (int i = 0; i < adapter->num_queues; i++) {
2724			que = &adapter->queues[i];
2725			tmp = E1000_EICR_RX_QUEUE0 << i;
2726			tmp |= E1000_EICR_TX_QUEUE0 << i;
2727			que->eims = tmp;
2728			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2729			    i, que->eims);
2730			adapter->que_mask |= que->eims;
2731		}
2732
2733		/* Link */
2734		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2735		    E1000_EIMS_OTHER);
2736		adapter->link_mask |= E1000_EIMS_OTHER;
2737	default:
2738		break;
2739	}
2740
2741	/* Set the starting interrupt rate */
2742	if (igb_max_interrupt_rate > 0)
2743		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2744
2745        if (hw->mac.type == e1000_82575)
2746                newitr |= newitr << 16;
2747        else
2748                newitr |= E1000_EITR_CNT_IGNR;
2749
2750	for (int i = 0; i < adapter->num_queues; i++) {
2751		que = &adapter->queues[i];
2752		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2753	}
2754
2755	return;
2756}
2757
2758
2759static void
2760igb_free_pci_resources(struct adapter *adapter)
2761{
2762	struct		igb_queue *que = adapter->queues;
2763	device_t	dev = adapter->dev;
2764	int		rid;
2765
2766	/*
2767	** There is a slight possibility of a failure mode
2768	** in attach that will result in entering this function
2769	** before interrupt resources have been initialized, and
2770	** in that case we do not want to execute the loops below
2771	** We can detect this reliably by the state of the adapter
2772	** res pointer.
2773	*/
2774	if (adapter->res == NULL)
2775		goto mem;
2776
2777	/*
2778	 * First release all the interrupt resources:
2779	 */
2780	for (int i = 0; i < adapter->num_queues; i++, que++) {
2781		rid = que->msix + 1;
2782		if (que->tag != NULL) {
2783			bus_teardown_intr(dev, que->res, que->tag);
2784			que->tag = NULL;
2785		}
2786		if (que->res != NULL)
2787			bus_release_resource(dev,
2788			    SYS_RES_IRQ, rid, que->res);
2789	}
2790
2791	/* Clean the Legacy or Link interrupt last */
2792	if (adapter->linkvec) /* we are doing MSIX */
2793		rid = adapter->linkvec + 1;
2794	else
2795		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2796
2797	que = adapter->queues;
2798	if (adapter->tag != NULL) {
2799		taskqueue_drain(que->tq, &adapter->link_task);
2800		bus_teardown_intr(dev, adapter->res, adapter->tag);
2801		adapter->tag = NULL;
2802	}
2803	if (adapter->res != NULL)
2804		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2805
2806	for (int i = 0; i < adapter->num_queues; i++, que++) {
2807		if (que->tq != NULL) {
2808#ifndef IGB_LEGACY_TX
2809			taskqueue_drain(que->tq, &que->txr->txq_task);
2810#endif
2811			taskqueue_drain(que->tq, &que->que_task);
2812			taskqueue_free(que->tq);
2813		}
2814	}
2815mem:
2816	if (adapter->msix)
2817		pci_release_msi(dev);
2818
2819	if (adapter->msix_mem != NULL)
2820		bus_release_resource(dev, SYS_RES_MEMORY,
2821		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2822
2823	if (adapter->pci_mem != NULL)
2824		bus_release_resource(dev, SYS_RES_MEMORY,
2825		    PCIR_BAR(0), adapter->pci_mem);
2826
2827}
2828
2829/*
2830 * Setup Either MSI/X or MSI
2831 */
2832static int
2833igb_setup_msix(struct adapter *adapter)
2834{
2835	device_t dev = adapter->dev;
2836	int rid, want, queues, msgs, maxqueues;
2837
2838	/* tuneable override */
2839	if (igb_enable_msix == 0)
2840		goto msi;
2841
2842	/* First try MSI/X */
2843	msgs = pci_msix_count(dev);
2844	if (msgs == 0)
2845		goto msi;
2846	rid = PCIR_BAR(IGB_MSIX_BAR);
2847	adapter->msix_mem = bus_alloc_resource_any(dev,
2848	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2849       	if (adapter->msix_mem == NULL) {
2850		/* May not be enabled */
2851		device_printf(adapter->dev,
2852		    "Unable to map MSIX table \n");
2853		goto msi;
2854	}
2855
2856	/* Figure out a reasonable auto config value */
2857	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2858
2859	/* Manual override */
2860	if (igb_num_queues != 0)
2861		queues = igb_num_queues;
2862
2863	/* Sanity check based on HW */
2864	switch (adapter->hw.mac.type) {
2865		case e1000_82575:
2866			maxqueues = 4;
2867			break;
2868		case e1000_82576:
2869		case e1000_82580:
2870		case e1000_i350:
2871			maxqueues = 8;
2872			break;
2873		case e1000_i210:
2874			maxqueues = 4;
2875			break;
2876		case e1000_i211:
2877			maxqueues = 2;
2878			break;
2879		default:  /* VF interfaces */
2880			maxqueues = 1;
2881			break;
2882	}
2883	if (queues > maxqueues)
2884		queues = maxqueues;
2885
2886	/* reflect correct sysctl value */
2887	igb_num_queues = queues;
2888
2889	/*
2890	** One vector (RX/TX pair) per queue
2891	** plus an additional for Link interrupt
2892	*/
2893	want = queues + 1;
2894	if (msgs >= want)
2895		msgs = want;
2896	else {
2897               	device_printf(adapter->dev,
2898		    "MSIX Configuration Problem, "
2899		    "%d vectors configured, but %d queues wanted!\n",
2900		    msgs, want);
2901		goto msi;
2902	}
2903	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2904               	device_printf(adapter->dev,
2905		    "Using MSIX interrupts with %d vectors\n", msgs);
2906		adapter->num_queues = queues;
2907		return (msgs);
2908	}
2909	/*
2910	** If MSIX alloc failed or provided us with
2911	** less than needed, free and fall through to MSI
2912	*/
2913	pci_release_msi(dev);
2914
2915msi:
2916       	if (adapter->msix_mem != NULL) {
2917		bus_release_resource(dev, SYS_RES_MEMORY,
2918		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2919		adapter->msix_mem = NULL;
2920	}
2921       	msgs = 1;
2922	if (pci_alloc_msi(dev, &msgs) == 0) {
2923		device_printf(adapter->dev," Using an MSI interrupt\n");
2924		return (msgs);
2925	}
2926	device_printf(adapter->dev," Using a Legacy interrupt\n");
2927	return (0);
2928}
2929
2930/*********************************************************************
2931 *
2932 *  Set up an fresh starting state
2933 *
2934 **********************************************************************/
2935static void
2936igb_reset(struct adapter *adapter)
2937{
2938	device_t	dev = adapter->dev;
2939	struct e1000_hw *hw = &adapter->hw;
2940	struct e1000_fc_info *fc = &hw->fc;
2941	struct ifnet	*ifp = adapter->ifp;
2942	u32		pba = 0;
2943	u16		hwm;
2944
2945	INIT_DEBUGOUT("igb_reset: begin");
2946
2947	/* Let the firmware know the OS is in control */
2948	igb_get_hw_control(adapter);
2949
2950	/*
2951	 * Packet Buffer Allocation (PBA)
2952	 * Writing PBA sets the receive portion of the buffer
2953	 * the remainder is used for the transmit buffer.
2954	 */
2955	switch (hw->mac.type) {
2956	case e1000_82575:
2957		pba = E1000_PBA_32K;
2958		break;
2959	case e1000_82576:
2960	case e1000_vfadapt:
2961		pba = E1000_READ_REG(hw, E1000_RXPBS);
2962		pba &= E1000_RXPBS_SIZE_MASK_82576;
2963		break;
2964	case e1000_82580:
2965	case e1000_i350:
2966	case e1000_vfadapt_i350:
2967		pba = E1000_READ_REG(hw, E1000_RXPBS);
2968		pba = e1000_rxpbs_adjust_82580(pba);
2969		break;
2970	case e1000_i210:
2971	case e1000_i211:
2972		pba = E1000_PBA_34K;
2973	default:
2974		break;
2975	}
2976
2977	/* Special needs in case of Jumbo frames */
2978	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2979		u32 tx_space, min_tx, min_rx;
2980		pba = E1000_READ_REG(hw, E1000_PBA);
2981		tx_space = pba >> 16;
2982		pba &= 0xffff;
2983		min_tx = (adapter->max_frame_size +
2984		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2985		min_tx = roundup2(min_tx, 1024);
2986		min_tx >>= 10;
2987                min_rx = adapter->max_frame_size;
2988                min_rx = roundup2(min_rx, 1024);
2989                min_rx >>= 10;
2990		if (tx_space < min_tx &&
2991		    ((min_tx - tx_space) < pba)) {
2992			pba = pba - (min_tx - tx_space);
2993			/*
2994                         * if short on rx space, rx wins
2995                         * and must trump tx adjustment
2996			 */
2997                        if (pba < min_rx)
2998                                pba = min_rx;
2999		}
3000		E1000_WRITE_REG(hw, E1000_PBA, pba);
3001	}
3002
3003	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3004
3005	/*
3006	 * These parameters control the automatic generation (Tx) and
3007	 * response (Rx) to Ethernet PAUSE frames.
3008	 * - High water mark should allow for at least two frames to be
3009	 *   received after sending an XOFF.
3010	 * - Low water mark works best when it is very near the high water mark.
3011	 *   This allows the receiver to restart by sending XON when it has
3012	 *   drained a bit.
3013	 */
3014	hwm = min(((pba << 10) * 9 / 10),
3015	    ((pba << 10) - 2 * adapter->max_frame_size));
3016
3017	if (hw->mac.type < e1000_82576) {
3018		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3019		fc->low_water = fc->high_water - 8;
3020	} else {
3021		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3022		fc->low_water = fc->high_water - 16;
3023	}
3024
3025	fc->pause_time = IGB_FC_PAUSE_TIME;
3026	fc->send_xon = TRUE;
3027	if (adapter->fc)
3028		fc->requested_mode = adapter->fc;
3029	else
3030		fc->requested_mode = e1000_fc_default;
3031
3032	/* Issue a global reset */
3033	e1000_reset_hw(hw);
3034	E1000_WRITE_REG(hw, E1000_WUC, 0);
3035
3036	if (e1000_init_hw(hw) < 0)
3037		device_printf(dev, "Hardware Initialization Failed\n");
3038
3039	/* Setup DMA Coalescing */
3040	if ((hw->mac.type > e1000_82580) &&
3041	    (hw->mac.type != e1000_i211)) {
3042		u32 dmac;
3043		u32 reg = ~E1000_DMACR_DMAC_EN;
3044
3045		if (adapter->dmac == 0) { /* Disabling it */
3046			E1000_WRITE_REG(hw, E1000_DMACR, reg);
3047			goto reset_out;
3048		}
3049
3050		/* Set starting thresholds */
3051		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3052		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3053
3054		hwm = 64 * pba - adapter->max_frame_size / 16;
3055		if (hwm < 64 * (pba - 6))
3056			hwm = 64 * (pba - 6);
3057		reg = E1000_READ_REG(hw, E1000_FCRTC);
3058		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3059		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3060		    & E1000_FCRTC_RTH_COAL_MASK);
3061		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3062
3063
3064		dmac = pba - adapter->max_frame_size / 512;
3065		if (dmac < pba - 10)
3066			dmac = pba - 10;
3067		reg = E1000_READ_REG(hw, E1000_DMACR);
3068		reg &= ~E1000_DMACR_DMACTHR_MASK;
3069		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3070		    & E1000_DMACR_DMACTHR_MASK);
3071		/* transition to L0x or L1 if available..*/
3072		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3073		/* timer = value in adapter->dmac in 32usec intervals */
3074		reg |= (adapter->dmac >> 5);
3075		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3076
3077		/* Set the interval before transition */
3078		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3079		reg |= 0x80000004;
3080		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3081
3082		/* free space in tx packet buffer to wake from DMA coal */
3083		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3084		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3085
3086		/* make low power state decision controlled by DMA coal */
3087		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3088		reg &= ~E1000_PCIEMISC_LX_DECISION;
3089		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3090		device_printf(dev, "DMA Coalescing enabled\n");
3091
3092	} else if (hw->mac.type == e1000_82580) {
3093		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3094		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3095		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3096		    reg & ~E1000_PCIEMISC_LX_DECISION);
3097	}
3098
3099reset_out:
3100	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3101	e1000_get_phy_info(hw);
3102	e1000_check_for_link(hw);
3103	return;
3104}
3105
3106/*********************************************************************
3107 *
3108 *  Setup networking device structure and register an interface.
3109 *
3110 **********************************************************************/
3111static int
3112igb_setup_interface(device_t dev, struct adapter *adapter)
3113{
3114	struct ifnet   *ifp;
3115
3116	INIT_DEBUGOUT("igb_setup_interface: begin");
3117
3118	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3119	if (ifp == NULL) {
3120		device_printf(dev, "can not allocate ifnet structure\n");
3121		return (-1);
3122	}
3123	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3124	ifp->if_init =  igb_init;
3125	ifp->if_softc = adapter;
3126	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3127	ifp->if_ioctl = igb_ioctl;
3128#ifndef IGB_LEGACY_TX
3129	ifp->if_transmit = igb_mq_start;
3130	ifp->if_qflush = igb_qflush;
3131#else
3132	ifp->if_start = igb_start;
3133	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3134	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3135	IFQ_SET_READY(&ifp->if_snd);
3136#endif
3137
3138	ether_ifattach(ifp, adapter->hw.mac.addr);
3139
3140	ifp->if_capabilities = ifp->if_capenable = 0;
3141
3142	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3143	ifp->if_capabilities |= IFCAP_TSO4;
3144	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3145	ifp->if_capenable = ifp->if_capabilities;
3146
3147	/* Don't enable LRO by default */
3148	ifp->if_capabilities |= IFCAP_LRO;
3149
3150#ifdef DEVICE_POLLING
3151	ifp->if_capabilities |= IFCAP_POLLING;
3152#endif
3153
3154	/*
3155	 * Tell the upper layer(s) we
3156	 * support full VLAN capability.
3157	 */
3158	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3159	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3160			     |  IFCAP_VLAN_HWTSO
3161			     |  IFCAP_VLAN_MTU;
3162	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3163			  |  IFCAP_VLAN_HWTSO
3164			  |  IFCAP_VLAN_MTU;
3165
3166	/*
3167	** Don't turn this on by default, if vlans are
3168	** created on another pseudo device (eg. lagg)
3169	** then vlan events are not passed thru, breaking
3170	** operation, but with HW FILTER off it works. If
3171	** using vlans directly on the igb driver you can
3172	** enable this and get full hardware tag filtering.
3173	*/
3174	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3175
3176	/*
3177	 * Specify the media types supported by this adapter and register
3178	 * callbacks to update media and link information
3179	 */
3180	ifmedia_init(&adapter->media, IFM_IMASK,
3181	    igb_media_change, igb_media_status);
3182	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3183	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3184		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3185			    0, NULL);
3186		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3187	} else {
3188		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3189		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3190			    0, NULL);
3191		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3192			    0, NULL);
3193		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3194			    0, NULL);
3195		if (adapter->hw.phy.type != e1000_phy_ife) {
3196			ifmedia_add(&adapter->media,
3197				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3198			ifmedia_add(&adapter->media,
3199				IFM_ETHER | IFM_1000_T, 0, NULL);
3200		}
3201	}
3202	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3203	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3204	return (0);
3205}
3206
3207
3208/*
3209 * Manage DMA'able memory.
3210 */
3211static void
3212igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3213{
3214	if (error)
3215		return;
3216	*(bus_addr_t *) arg = segs[0].ds_addr;
3217}
3218
3219static int
3220igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3221        struct igb_dma_alloc *dma, int mapflags)
3222{
3223	int error;
3224
3225	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3226				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3227				BUS_SPACE_MAXADDR,	/* lowaddr */
3228				BUS_SPACE_MAXADDR,	/* highaddr */
3229				NULL, NULL,		/* filter, filterarg */
3230				size,			/* maxsize */
3231				1,			/* nsegments */
3232				size,			/* maxsegsize */
3233				0,			/* flags */
3234				NULL,			/* lockfunc */
3235				NULL,			/* lockarg */
3236				&dma->dma_tag);
3237	if (error) {
3238		device_printf(adapter->dev,
3239		    "%s: bus_dma_tag_create failed: %d\n",
3240		    __func__, error);
3241		goto fail_0;
3242	}
3243
3244	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3245	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3246	if (error) {
3247		device_printf(adapter->dev,
3248		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3249		    __func__, (uintmax_t)size, error);
3250		goto fail_2;
3251	}
3252
3253	dma->dma_paddr = 0;
3254	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3255	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3256	if (error || dma->dma_paddr == 0) {
3257		device_printf(adapter->dev,
3258		    "%s: bus_dmamap_load failed: %d\n",
3259		    __func__, error);
3260		goto fail_3;
3261	}
3262
3263	return (0);
3264
3265fail_3:
3266	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3267fail_2:
3268	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3269	bus_dma_tag_destroy(dma->dma_tag);
3270fail_0:
3271	dma->dma_map = NULL;
3272	dma->dma_tag = NULL;
3273
3274	return (error);
3275}
3276
3277static void
3278igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3279{
3280	if (dma->dma_tag == NULL)
3281		return;
3282	if (dma->dma_map != NULL) {
3283		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3284		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3285		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3286		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3287		dma->dma_map = NULL;
3288	}
3289	bus_dma_tag_destroy(dma->dma_tag);
3290	dma->dma_tag = NULL;
3291}
3292
3293
3294/*********************************************************************
3295 *
3296 *  Allocate memory for the transmit and receive rings, and then
3297 *  the descriptors associated with each, called only once at attach.
3298 *
3299 **********************************************************************/
3300static int
3301igb_allocate_queues(struct adapter *adapter)
3302{
3303	device_t dev = adapter->dev;
3304	struct igb_queue	*que = NULL;
3305	struct tx_ring		*txr = NULL;
3306	struct rx_ring		*rxr = NULL;
3307	int rsize, tsize, error = E1000_SUCCESS;
3308	int txconf = 0, rxconf = 0;
3309
3310	/* First allocate the top level queue structs */
3311	if (!(adapter->queues =
3312	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3313	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3314		device_printf(dev, "Unable to allocate queue memory\n");
3315		error = ENOMEM;
3316		goto fail;
3317	}
3318
3319	/* Next allocate the TX ring struct memory */
3320	if (!(adapter->tx_rings =
3321	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3322	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3323		device_printf(dev, "Unable to allocate TX ring memory\n");
3324		error = ENOMEM;
3325		goto tx_fail;
3326	}
3327
3328	/* Now allocate the RX */
3329	if (!(adapter->rx_rings =
3330	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3331	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3332		device_printf(dev, "Unable to allocate RX ring memory\n");
3333		error = ENOMEM;
3334		goto rx_fail;
3335	}
3336
3337	tsize = roundup2(adapter->num_tx_desc *
3338	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3339	/*
3340	 * Now set up the TX queues, txconf is needed to handle the
3341	 * possibility that things fail midcourse and we need to
3342	 * undo memory gracefully
3343	 */
3344	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3345		/* Set up some basics */
3346		txr = &adapter->tx_rings[i];
3347		txr->adapter = adapter;
3348		txr->me = i;
3349
3350		/* Initialize the TX lock */
3351		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3352		    device_get_nameunit(dev), txr->me);
3353		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3354
3355		if (igb_dma_malloc(adapter, tsize,
3356			&txr->txdma, BUS_DMA_NOWAIT)) {
3357			device_printf(dev,
3358			    "Unable to allocate TX Descriptor memory\n");
3359			error = ENOMEM;
3360			goto err_tx_desc;
3361		}
3362		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3363		bzero((void *)txr->tx_base, tsize);
3364
3365        	/* Now allocate transmit buffers for the ring */
3366        	if (igb_allocate_transmit_buffers(txr)) {
3367			device_printf(dev,
3368			    "Critical Failure setting up transmit buffers\n");
3369			error = ENOMEM;
3370			goto err_tx_desc;
3371        	}
3372#ifndef IGB_LEGACY_TX
3373		/* Allocate a buf ring */
3374		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3375		    M_WAITOK, &txr->tx_mtx);
3376#endif
3377	}
3378
3379	/*
3380	 * Next the RX queues...
3381	 */
3382	rsize = roundup2(adapter->num_rx_desc *
3383	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3384	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3385		rxr = &adapter->rx_rings[i];
3386		rxr->adapter = adapter;
3387		rxr->me = i;
3388
3389		/* Initialize the RX lock */
3390		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3391		    device_get_nameunit(dev), txr->me);
3392		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3393
3394		if (igb_dma_malloc(adapter, rsize,
3395			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3396			device_printf(dev,
3397			    "Unable to allocate RxDescriptor memory\n");
3398			error = ENOMEM;
3399			goto err_rx_desc;
3400		}
3401		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3402		bzero((void *)rxr->rx_base, rsize);
3403
3404        	/* Allocate receive buffers for the ring*/
3405		if (igb_allocate_receive_buffers(rxr)) {
3406			device_printf(dev,
3407			    "Critical Failure setting up receive buffers\n");
3408			error = ENOMEM;
3409			goto err_rx_desc;
3410		}
3411	}
3412
3413	/*
3414	** Finally set up the queue holding structs
3415	*/
3416	for (int i = 0; i < adapter->num_queues; i++) {
3417		que = &adapter->queues[i];
3418		que->adapter = adapter;
3419		que->txr = &adapter->tx_rings[i];
3420		que->rxr = &adapter->rx_rings[i];
3421	}
3422
3423	return (0);
3424
3425err_rx_desc:
3426	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3427		igb_dma_free(adapter, &rxr->rxdma);
3428err_tx_desc:
3429	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3430		igb_dma_free(adapter, &txr->txdma);
3431	free(adapter->rx_rings, M_DEVBUF);
3432rx_fail:
3433#ifndef IGB_LEGACY_TX
3434	buf_ring_free(txr->br, M_DEVBUF);
3435#endif
3436	free(adapter->tx_rings, M_DEVBUF);
3437tx_fail:
3438	free(adapter->queues, M_DEVBUF);
3439fail:
3440	return (error);
3441}
3442
3443/*********************************************************************
3444 *
3445 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3446 *  the information needed to transmit a packet on the wire. This is
3447 *  called only once at attach, setup is done every reset.
3448 *
3449 **********************************************************************/
3450static int
3451igb_allocate_transmit_buffers(struct tx_ring *txr)
3452{
3453	struct adapter *adapter = txr->adapter;
3454	device_t dev = adapter->dev;
3455	struct igb_tx_buffer *txbuf;
3456	int error, i;
3457
3458	/*
3459	 * Setup DMA descriptor areas.
3460	 */
3461	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3462			       1, 0,			/* alignment, bounds */
3463			       BUS_SPACE_MAXADDR,	/* lowaddr */
3464			       BUS_SPACE_MAXADDR,	/* highaddr */
3465			       NULL, NULL,		/* filter, filterarg */
3466			       IGB_TSO_SIZE,		/* maxsize */
3467			       IGB_MAX_SCATTER,		/* nsegments */
3468			       PAGE_SIZE,		/* maxsegsize */
3469			       0,			/* flags */
3470			       NULL,			/* lockfunc */
3471			       NULL,			/* lockfuncarg */
3472			       &txr->txtag))) {
3473		device_printf(dev,"Unable to allocate TX DMA tag\n");
3474		goto fail;
3475	}
3476
3477	if (!(txr->tx_buffers =
3478	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3479	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3480		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3481		error = ENOMEM;
3482		goto fail;
3483	}
3484
3485        /* Create the descriptor buffer dma maps */
3486	txbuf = txr->tx_buffers;
3487	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3488		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3489		if (error != 0) {
3490			device_printf(dev, "Unable to create TX DMA map\n");
3491			goto fail;
3492		}
3493	}
3494
3495	return 0;
3496fail:
3497	/* We free all, it handles case where we are in the middle */
3498	igb_free_transmit_structures(adapter);
3499	return (error);
3500}
3501
3502/*********************************************************************
3503 *
3504 *  Initialize a transmit ring.
3505 *
3506 **********************************************************************/
3507static void
3508igb_setup_transmit_ring(struct tx_ring *txr)
3509{
3510	struct adapter *adapter = txr->adapter;
3511	struct igb_tx_buffer *txbuf;
3512	int i;
3513#ifdef DEV_NETMAP
3514	struct netmap_adapter *na = NA(adapter->ifp);
3515	struct netmap_slot *slot;
3516#endif /* DEV_NETMAP */
3517
3518	/* Clear the old descriptor contents */
3519	IGB_TX_LOCK(txr);
3520#ifdef DEV_NETMAP
3521	slot = netmap_reset(na, NR_TX, txr->me, 0);
3522#endif /* DEV_NETMAP */
3523	bzero((void *)txr->tx_base,
3524	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3525	/* Reset indices */
3526	txr->next_avail_desc = 0;
3527	txr->next_to_clean = 0;
3528
3529	/* Free any existing tx buffers. */
3530        txbuf = txr->tx_buffers;
3531	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3532		if (txbuf->m_head != NULL) {
3533			bus_dmamap_sync(txr->txtag, txbuf->map,
3534			    BUS_DMASYNC_POSTWRITE);
3535			bus_dmamap_unload(txr->txtag, txbuf->map);
3536			m_freem(txbuf->m_head);
3537			txbuf->m_head = NULL;
3538		}
3539#ifdef DEV_NETMAP
3540		if (slot) {
3541			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3542			/* no need to set the address */
3543			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3544		}
3545#endif /* DEV_NETMAP */
3546		/* clear the watch index */
3547		txbuf->next_eop = -1;
3548        }
3549
3550	/* Set number of descriptors available */
3551	txr->tx_avail = adapter->num_tx_desc;
3552
3553	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3554	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3555	IGB_TX_UNLOCK(txr);
3556}
3557
3558/*********************************************************************
3559 *
3560 *  Initialize all transmit rings.
3561 *
3562 **********************************************************************/
3563static void
3564igb_setup_transmit_structures(struct adapter *adapter)
3565{
3566	struct tx_ring *txr = adapter->tx_rings;
3567
3568	for (int i = 0; i < adapter->num_queues; i++, txr++)
3569		igb_setup_transmit_ring(txr);
3570
3571	return;
3572}
3573
3574/*********************************************************************
3575 *
3576 *  Enable transmit unit.
3577 *
3578 **********************************************************************/
3579static void
3580igb_initialize_transmit_units(struct adapter *adapter)
3581{
3582	struct tx_ring	*txr = adapter->tx_rings;
3583	struct e1000_hw *hw = &adapter->hw;
3584	u32		tctl, txdctl;
3585
3586	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3587	tctl = txdctl = 0;
3588
3589	/* Setup the Tx Descriptor Rings */
3590	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3591		u64 bus_addr = txr->txdma.dma_paddr;
3592
3593		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3594		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3595		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3596		    (uint32_t)(bus_addr >> 32));
3597		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3598		    (uint32_t)bus_addr);
3599
3600		/* Setup the HW Tx Head and Tail descriptor pointers */
3601		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3602		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3603
3604		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3605		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3606		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3607
3608		txr->queue_status = IGB_QUEUE_IDLE;
3609
3610		txdctl |= IGB_TX_PTHRESH;
3611		txdctl |= IGB_TX_HTHRESH << 8;
3612		txdctl |= IGB_TX_WTHRESH << 16;
3613		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3614		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3615	}
3616
3617	if (adapter->vf_ifp)
3618		return;
3619
3620	e1000_config_collision_dist(hw);
3621
3622	/* Program the Transmit Control Register */
3623	tctl = E1000_READ_REG(hw, E1000_TCTL);
3624	tctl &= ~E1000_TCTL_CT;
3625	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3626		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3627
3628	/* This write will effectively turn on the transmit unit. */
3629	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3630}
3631
3632/*********************************************************************
3633 *
3634 *  Free all transmit rings.
3635 *
3636 **********************************************************************/
3637static void
3638igb_free_transmit_structures(struct adapter *adapter)
3639{
3640	struct tx_ring *txr = adapter->tx_rings;
3641
3642	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3643		IGB_TX_LOCK(txr);
3644		igb_free_transmit_buffers(txr);
3645		igb_dma_free(adapter, &txr->txdma);
3646		IGB_TX_UNLOCK(txr);
3647		IGB_TX_LOCK_DESTROY(txr);
3648	}
3649	free(adapter->tx_rings, M_DEVBUF);
3650}
3651
3652/*********************************************************************
3653 *
3654 *  Free transmit ring related data structures.
3655 *
3656 **********************************************************************/
3657static void
3658igb_free_transmit_buffers(struct tx_ring *txr)
3659{
3660	struct adapter *adapter = txr->adapter;
3661	struct igb_tx_buffer *tx_buffer;
3662	int             i;
3663
3664	INIT_DEBUGOUT("free_transmit_ring: begin");
3665
3666	if (txr->tx_buffers == NULL)
3667		return;
3668
3669	tx_buffer = txr->tx_buffers;
3670	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3671		if (tx_buffer->m_head != NULL) {
3672			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3673			    BUS_DMASYNC_POSTWRITE);
3674			bus_dmamap_unload(txr->txtag,
3675			    tx_buffer->map);
3676			m_freem(tx_buffer->m_head);
3677			tx_buffer->m_head = NULL;
3678			if (tx_buffer->map != NULL) {
3679				bus_dmamap_destroy(txr->txtag,
3680				    tx_buffer->map);
3681				tx_buffer->map = NULL;
3682			}
3683		} else if (tx_buffer->map != NULL) {
3684			bus_dmamap_unload(txr->txtag,
3685			    tx_buffer->map);
3686			bus_dmamap_destroy(txr->txtag,
3687			    tx_buffer->map);
3688			tx_buffer->map = NULL;
3689		}
3690	}
3691#ifndef IGB_LEGACY_TX
3692	if (txr->br != NULL)
3693		buf_ring_free(txr->br, M_DEVBUF);
3694#endif
3695	if (txr->tx_buffers != NULL) {
3696		free(txr->tx_buffers, M_DEVBUF);
3697		txr->tx_buffers = NULL;
3698	}
3699	if (txr->txtag != NULL) {
3700		bus_dma_tag_destroy(txr->txtag);
3701		txr->txtag = NULL;
3702	}
3703	return;
3704}
3705
3706/**********************************************************************
3707 *
3708 *  Setup work for hardware segmentation offload (TSO)
3709 *
3710 **********************************************************************/
3711static bool
3712igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3713	struct ip *ip, struct tcphdr *th)
3714{
3715	struct adapter *adapter = txr->adapter;
3716	struct e1000_adv_tx_context_desc *TXD;
3717	struct igb_tx_buffer        *tx_buffer;
3718	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3719	u32 mss_l4len_idx = 0;
3720	u16 vtag = 0;
3721	int ctxd, ip_hlen, tcp_hlen;
3722
3723	ctxd = txr->next_avail_desc;
3724	tx_buffer = &txr->tx_buffers[ctxd];
3725	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3726
3727	ip->ip_sum = 0;
3728	ip_hlen = ip->ip_hl << 2;
3729	tcp_hlen = th->th_off << 2;
3730
3731	/* VLAN MACLEN IPLEN */
3732	if (mp->m_flags & M_VLANTAG) {
3733		vtag = htole16(mp->m_pkthdr.ether_vtag);
3734		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3735	}
3736
3737	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3738	vlan_macip_lens |= ip_hlen;
3739	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3740
3741	/* ADV DTYPE TUCMD */
3742	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3743	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3744	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3745	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3746
3747	/* MSS L4LEN IDX */
3748	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3749	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3750	/* 82575 needs the queue index added */
3751	if (adapter->hw.mac.type == e1000_82575)
3752		mss_l4len_idx |= txr->me << 4;
3753	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3754
3755	TXD->seqnum_seed = htole32(0);
3756	tx_buffer->m_head = NULL;
3757	tx_buffer->next_eop = -1;
3758
3759	if (++ctxd == adapter->num_tx_desc)
3760		ctxd = 0;
3761
3762	txr->tx_avail--;
3763	txr->next_avail_desc = ctxd;
3764	return TRUE;
3765}
3766
3767
3768/*********************************************************************
3769 *
3770 *  Context Descriptor setup for VLAN or CSUM
3771 *
3772 **********************************************************************/
3773
3774static bool
3775igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3776{
3777	struct adapter *adapter = txr->adapter;
3778	struct e1000_adv_tx_context_desc *TXD;
3779	struct igb_tx_buffer        *tx_buffer;
3780	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3781	struct ether_vlan_header *eh;
3782	struct ip *ip = NULL;
3783	struct ip6_hdr *ip6;
3784	int  ehdrlen, ctxd, ip_hlen = 0;
3785	u16	etype, vtag = 0;
3786	u8	ipproto = 0;
3787	bool	offload = TRUE;
3788
3789	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3790		offload = FALSE;
3791
3792	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3793	ctxd = txr->next_avail_desc;
3794	tx_buffer = &txr->tx_buffers[ctxd];
3795	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3796
3797	/*
3798	** In advanced descriptors the vlan tag must
3799	** be placed into the context descriptor, thus
3800	** we need to be here just for that setup.
3801	*/
3802	if (mp->m_flags & M_VLANTAG) {
3803		vtag = htole16(mp->m_pkthdr.ether_vtag);
3804		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3805	} else if (offload == FALSE)
3806		return FALSE;
3807
3808	/*
3809	 * Determine where frame payload starts.
3810	 * Jump over vlan headers if already present,
3811	 * helpful for QinQ too.
3812	 */
3813	eh = mtod(mp, struct ether_vlan_header *);
3814	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3815		etype = ntohs(eh->evl_proto);
3816		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3817	} else {
3818		etype = ntohs(eh->evl_encap_proto);
3819		ehdrlen = ETHER_HDR_LEN;
3820	}
3821
3822	/* Set the ether header length */
3823	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3824
3825	switch (etype) {
3826		case ETHERTYPE_IP:
3827			ip = (struct ip *)(mp->m_data + ehdrlen);
3828			ip_hlen = ip->ip_hl << 2;
3829			if (mp->m_len < ehdrlen + ip_hlen) {
3830				offload = FALSE;
3831				break;
3832			}
3833			ipproto = ip->ip_p;
3834			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3835			break;
3836		case ETHERTYPE_IPV6:
3837			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3838			ip_hlen = sizeof(struct ip6_hdr);
3839			ipproto = ip6->ip6_nxt;
3840			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3841			break;
3842		default:
3843			offload = FALSE;
3844			break;
3845	}
3846
3847	vlan_macip_lens |= ip_hlen;
3848	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3849
3850	switch (ipproto) {
3851		case IPPROTO_TCP:
3852			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3853				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3854			break;
3855		case IPPROTO_UDP:
3856			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3857				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3858			break;
3859#if __FreeBSD_version >= 800000
3860		case IPPROTO_SCTP:
3861			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3862				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3863			break;
3864#endif
3865		default:
3866			offload = FALSE;
3867			break;
3868	}
3869
3870	/* 82575 needs the queue index added */
3871	if (adapter->hw.mac.type == e1000_82575)
3872		mss_l4len_idx = txr->me << 4;
3873
3874	/* Now copy bits into descriptor */
3875	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3876	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3877	TXD->seqnum_seed = htole32(0);
3878	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3879
3880	tx_buffer->m_head = NULL;
3881	tx_buffer->next_eop = -1;
3882
3883	/* We've consumed the first desc, adjust counters */
3884	if (++ctxd == adapter->num_tx_desc)
3885		ctxd = 0;
3886	txr->next_avail_desc = ctxd;
3887	--txr->tx_avail;
3888
3889        return (offload);
3890}
3891
3892
3893/**********************************************************************
3894 *
3895 *  Examine each tx_buffer in the used queue. If the hardware is done
3896 *  processing the packet then free associated resources. The
3897 *  tx_buffer is put back on the free queue.
3898 *
3899 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3900 **********************************************************************/
3901static bool
3902igb_txeof(struct tx_ring *txr)
3903{
3904	struct adapter	*adapter = txr->adapter;
3905        int first, last, done, processed;
3906        struct igb_tx_buffer *tx_buffer;
3907        struct e1000_tx_desc   *tx_desc, *eop_desc;
3908	struct ifnet   *ifp = adapter->ifp;
3909
3910	IGB_TX_LOCK_ASSERT(txr);
3911
3912#ifdef DEV_NETMAP
3913	if (netmap_tx_irq(ifp, txr->me ))
3914		return (FALSE);
3915#endif /* DEV_NETMAP */
3916        if (txr->tx_avail == adapter->num_tx_desc) {
3917		txr->queue_status = IGB_QUEUE_IDLE;
3918                return FALSE;
3919	}
3920
3921	processed = 0;
3922        first = txr->next_to_clean;
3923        tx_desc = &txr->tx_base[first];
3924        tx_buffer = &txr->tx_buffers[first];
3925	last = tx_buffer->next_eop;
3926        eop_desc = &txr->tx_base[last];
3927
3928	/*
3929	 * What this does is get the index of the
3930	 * first descriptor AFTER the EOP of the
3931	 * first packet, that way we can do the
3932	 * simple comparison on the inner while loop.
3933	 */
3934	if (++last == adapter->num_tx_desc)
3935 		last = 0;
3936	done = last;
3937
3938        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3939            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3940
3941        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3942		/* We clean the range of the packet */
3943		while (first != done) {
3944                	tx_desc->upper.data = 0;
3945                	tx_desc->lower.data = 0;
3946                	tx_desc->buffer_addr = 0;
3947                	++txr->tx_avail;
3948			++processed;
3949
3950			if (tx_buffer->m_head) {
3951				txr->bytes +=
3952				    tx_buffer->m_head->m_pkthdr.len;
3953				bus_dmamap_sync(txr->txtag,
3954				    tx_buffer->map,
3955				    BUS_DMASYNC_POSTWRITE);
3956				bus_dmamap_unload(txr->txtag,
3957				    tx_buffer->map);
3958
3959                        	m_freem(tx_buffer->m_head);
3960                        	tx_buffer->m_head = NULL;
3961                	}
3962			tx_buffer->next_eop = -1;
3963			txr->watchdog_time = ticks;
3964
3965	                if (++first == adapter->num_tx_desc)
3966				first = 0;
3967
3968	                tx_buffer = &txr->tx_buffers[first];
3969			tx_desc = &txr->tx_base[first];
3970		}
3971		++txr->packets;
3972		++ifp->if_opackets;
3973		/* See if we can continue to the next packet */
3974		last = tx_buffer->next_eop;
3975		if (last != -1) {
3976        		eop_desc = &txr->tx_base[last];
3977			/* Get new done point */
3978			if (++last == adapter->num_tx_desc) last = 0;
3979			done = last;
3980		} else
3981			break;
3982        }
3983        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3984            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3985
3986        txr->next_to_clean = first;
3987
3988	/*
3989	** Watchdog calculation, we know there's
3990	** work outstanding or the first return
3991	** would have been taken, so none processed
3992	** for too long indicates a hang.
3993	*/
3994	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3995		txr->queue_status |= IGB_QUEUE_HUNG;
3996        /*
3997         * If we have a minimum free,
3998         * clear depleted state bit
3999         */
4000        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4001                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4002
4003	/* All clean, turn off the watchdog */
4004	if (txr->tx_avail == adapter->num_tx_desc) {
4005		txr->queue_status = IGB_QUEUE_IDLE;
4006		return (FALSE);
4007        }
4008
4009	return (TRUE);
4010}
4011
4012/*********************************************************************
4013 *
4014 *  Refresh mbuf buffers for RX descriptor rings
4015 *   - now keeps its own state so discards due to resource
4016 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4017 *     it just returns, keeping its placeholder, thus it can simply
4018 *     be recalled to try again.
4019 *
4020 **********************************************************************/
4021static void
4022igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4023{
4024	struct adapter		*adapter = rxr->adapter;
4025	bus_dma_segment_t	hseg[1];
4026	bus_dma_segment_t	pseg[1];
4027	struct igb_rx_buf	*rxbuf;
4028	struct mbuf		*mh, *mp;
4029	int			i, j, nsegs, error;
4030	bool			refreshed = FALSE;
4031
4032	i = j = rxr->next_to_refresh;
4033	/*
4034	** Get one descriptor beyond
4035	** our work mark to control
4036	** the loop.
4037        */
4038	if (++j == adapter->num_rx_desc)
4039		j = 0;
4040
4041	while (j != limit) {
4042		rxbuf = &rxr->rx_buffers[i];
4043		/* No hdr mbuf used with header split off */
4044		if (rxr->hdr_split == FALSE)
4045			goto no_split;
4046		if (rxbuf->m_head == NULL) {
4047			mh = m_gethdr(M_NOWAIT, MT_DATA);
4048			if (mh == NULL)
4049				goto update;
4050		} else
4051			mh = rxbuf->m_head;
4052
4053		mh->m_pkthdr.len = mh->m_len = MHLEN;
4054		mh->m_len = MHLEN;
4055		mh->m_flags |= M_PKTHDR;
4056		/* Get the memory mapping */
4057		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4058		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4059		if (error != 0) {
4060			printf("Refresh mbufs: hdr dmamap load"
4061			    " failure - %d\n", error);
4062			m_free(mh);
4063			rxbuf->m_head = NULL;
4064			goto update;
4065		}
4066		rxbuf->m_head = mh;
4067		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4068		    BUS_DMASYNC_PREREAD);
4069		rxr->rx_base[i].read.hdr_addr =
4070		    htole64(hseg[0].ds_addr);
4071no_split:
4072		if (rxbuf->m_pack == NULL) {
4073			mp = m_getjcl(M_NOWAIT, MT_DATA,
4074			    M_PKTHDR, adapter->rx_mbuf_sz);
4075			if (mp == NULL)
4076				goto update;
4077		} else
4078			mp = rxbuf->m_pack;
4079
4080		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4081		/* Get the memory mapping */
4082		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4083		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4084		if (error != 0) {
4085			printf("Refresh mbufs: payload dmamap load"
4086			    " failure - %d\n", error);
4087			m_free(mp);
4088			rxbuf->m_pack = NULL;
4089			goto update;
4090		}
4091		rxbuf->m_pack = mp;
4092		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4093		    BUS_DMASYNC_PREREAD);
4094		rxr->rx_base[i].read.pkt_addr =
4095		    htole64(pseg[0].ds_addr);
4096		refreshed = TRUE; /* I feel wefreshed :) */
4097
4098		i = j; /* our next is precalculated */
4099		rxr->next_to_refresh = i;
4100		if (++j == adapter->num_rx_desc)
4101			j = 0;
4102	}
4103update:
4104	if (refreshed) /* update tail */
4105		E1000_WRITE_REG(&adapter->hw,
4106		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4107	return;
4108}
4109
4110
4111/*********************************************************************
4112 *
4113 *  Allocate memory for rx_buffer structures. Since we use one
4114 *  rx_buffer per received packet, the maximum number of rx_buffer's
4115 *  that we'll need is equal to the number of receive descriptors
4116 *  that we've allocated.
4117 *
4118 **********************************************************************/
4119static int
4120igb_allocate_receive_buffers(struct rx_ring *rxr)
4121{
4122	struct	adapter 	*adapter = rxr->adapter;
4123	device_t 		dev = adapter->dev;
4124	struct igb_rx_buf	*rxbuf;
4125	int             	i, bsize, error;
4126
4127	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4128	if (!(rxr->rx_buffers =
4129	    (struct igb_rx_buf *) malloc(bsize,
4130	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4131		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4132		error = ENOMEM;
4133		goto fail;
4134	}
4135
4136	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4137				   1, 0,		/* alignment, bounds */
4138				   BUS_SPACE_MAXADDR,	/* lowaddr */
4139				   BUS_SPACE_MAXADDR,	/* highaddr */
4140				   NULL, NULL,		/* filter, filterarg */
4141				   MSIZE,		/* maxsize */
4142				   1,			/* nsegments */
4143				   MSIZE,		/* maxsegsize */
4144				   0,			/* flags */
4145				   NULL,		/* lockfunc */
4146				   NULL,		/* lockfuncarg */
4147				   &rxr->htag))) {
4148		device_printf(dev, "Unable to create RX DMA tag\n");
4149		goto fail;
4150	}
4151
4152	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4153				   1, 0,		/* alignment, bounds */
4154				   BUS_SPACE_MAXADDR,	/* lowaddr */
4155				   BUS_SPACE_MAXADDR,	/* highaddr */
4156				   NULL, NULL,		/* filter, filterarg */
4157				   MJUM9BYTES,		/* maxsize */
4158				   1,			/* nsegments */
4159				   MJUM9BYTES,		/* maxsegsize */
4160				   0,			/* flags */
4161				   NULL,		/* lockfunc */
4162				   NULL,		/* lockfuncarg */
4163				   &rxr->ptag))) {
4164		device_printf(dev, "Unable to create RX payload DMA tag\n");
4165		goto fail;
4166	}
4167
4168	for (i = 0; i < adapter->num_rx_desc; i++) {
4169		rxbuf = &rxr->rx_buffers[i];
4170		error = bus_dmamap_create(rxr->htag,
4171		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4172		if (error) {
4173			device_printf(dev,
4174			    "Unable to create RX head DMA maps\n");
4175			goto fail;
4176		}
4177		error = bus_dmamap_create(rxr->ptag,
4178		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4179		if (error) {
4180			device_printf(dev,
4181			    "Unable to create RX packet DMA maps\n");
4182			goto fail;
4183		}
4184	}
4185
4186	return (0);
4187
4188fail:
4189	/* Frees all, but can handle partial completion */
4190	igb_free_receive_structures(adapter);
4191	return (error);
4192}
4193
4194
4195static void
4196igb_free_receive_ring(struct rx_ring *rxr)
4197{
4198	struct	adapter		*adapter = rxr->adapter;
4199	struct igb_rx_buf	*rxbuf;
4200
4201
4202	for (int i = 0; i < adapter->num_rx_desc; i++) {
4203		rxbuf = &rxr->rx_buffers[i];
4204		if (rxbuf->m_head != NULL) {
4205			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4206			    BUS_DMASYNC_POSTREAD);
4207			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4208			rxbuf->m_head->m_flags |= M_PKTHDR;
4209			m_freem(rxbuf->m_head);
4210		}
4211		if (rxbuf->m_pack != NULL) {
4212			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4213			    BUS_DMASYNC_POSTREAD);
4214			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4215			rxbuf->m_pack->m_flags |= M_PKTHDR;
4216			m_freem(rxbuf->m_pack);
4217		}
4218		rxbuf->m_head = NULL;
4219		rxbuf->m_pack = NULL;
4220	}
4221}
4222
4223
4224/*********************************************************************
4225 *
4226 *  Initialize a receive ring and its buffers.
4227 *
4228 **********************************************************************/
4229static int
4230igb_setup_receive_ring(struct rx_ring *rxr)
4231{
4232	struct	adapter		*adapter;
4233	struct  ifnet		*ifp;
4234	device_t		dev;
4235	struct igb_rx_buf	*rxbuf;
4236	bus_dma_segment_t	pseg[1], hseg[1];
4237	struct lro_ctrl		*lro = &rxr->lro;
4238	int			rsize, nsegs, error = 0;
4239#ifdef DEV_NETMAP
4240	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4241	struct netmap_slot *slot;
4242#endif /* DEV_NETMAP */
4243
4244	adapter = rxr->adapter;
4245	dev = adapter->dev;
4246	ifp = adapter->ifp;
4247
4248	/* Clear the ring contents */
4249	IGB_RX_LOCK(rxr);
4250#ifdef DEV_NETMAP
4251	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4252#endif /* DEV_NETMAP */
4253	rsize = roundup2(adapter->num_rx_desc *
4254	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4255	bzero((void *)rxr->rx_base, rsize);
4256
4257	/*
4258	** Free current RX buffer structures and their mbufs
4259	*/
4260	igb_free_receive_ring(rxr);
4261
4262	/* Configure for header split? */
4263	if (igb_header_split)
4264		rxr->hdr_split = TRUE;
4265
4266        /* Now replenish the ring mbufs */
4267	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4268		struct mbuf	*mh, *mp;
4269
4270		rxbuf = &rxr->rx_buffers[j];
4271#ifdef DEV_NETMAP
4272		if (slot) {
4273			/* slot sj is mapped to the i-th NIC-ring entry */
4274			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4275			uint64_t paddr;
4276			void *addr;
4277
4278			addr = PNMB(slot + sj, &paddr);
4279			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4280			/* Update descriptor */
4281			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4282			continue;
4283		}
4284#endif /* DEV_NETMAP */
4285		if (rxr->hdr_split == FALSE)
4286			goto skip_head;
4287
4288		/* First the header */
4289		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4290		if (rxbuf->m_head == NULL) {
4291			error = ENOBUFS;
4292                        goto fail;
4293		}
4294		m_adj(rxbuf->m_head, ETHER_ALIGN);
4295		mh = rxbuf->m_head;
4296		mh->m_len = mh->m_pkthdr.len = MHLEN;
4297		mh->m_flags |= M_PKTHDR;
4298		/* Get the memory mapping */
4299		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4300		    rxbuf->hmap, rxbuf->m_head, hseg,
4301		    &nsegs, BUS_DMA_NOWAIT);
4302		if (error != 0) /* Nothing elegant to do here */
4303                        goto fail;
4304		bus_dmamap_sync(rxr->htag,
4305		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4306		/* Update descriptor */
4307		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4308
4309skip_head:
4310		/* Now the payload cluster */
4311		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4312		    M_PKTHDR, adapter->rx_mbuf_sz);
4313		if (rxbuf->m_pack == NULL) {
4314			error = ENOBUFS;
4315                        goto fail;
4316		}
4317		mp = rxbuf->m_pack;
4318		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4319		/* Get the memory mapping */
4320		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4321		    rxbuf->pmap, mp, pseg,
4322		    &nsegs, BUS_DMA_NOWAIT);
4323		if (error != 0)
4324                        goto fail;
4325		bus_dmamap_sync(rxr->ptag,
4326		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4327		/* Update descriptor */
4328		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4329        }
4330
4331	/* Setup our descriptor indices */
4332	rxr->next_to_check = 0;
4333	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4334	rxr->lro_enabled = FALSE;
4335	rxr->rx_split_packets = 0;
4336	rxr->rx_bytes = 0;
4337
4338	rxr->fmp = NULL;
4339	rxr->lmp = NULL;
4340	rxr->discard = FALSE;
4341
4342	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4343	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4344
4345	/*
4346	** Now set up the LRO interface, we
4347	** also only do head split when LRO
4348	** is enabled, since so often they
4349	** are undesireable in similar setups.
4350	*/
4351	if (ifp->if_capenable & IFCAP_LRO) {
4352		error = tcp_lro_init(lro);
4353		if (error) {
4354			device_printf(dev, "LRO Initialization failed!\n");
4355			goto fail;
4356		}
4357		INIT_DEBUGOUT("RX LRO Initialized\n");
4358		rxr->lro_enabled = TRUE;
4359		lro->ifp = adapter->ifp;
4360	}
4361
4362	IGB_RX_UNLOCK(rxr);
4363	return (0);
4364
4365fail:
4366	igb_free_receive_ring(rxr);
4367	IGB_RX_UNLOCK(rxr);
4368	return (error);
4369}
4370
4371
4372/*********************************************************************
4373 *
4374 *  Initialize all receive rings.
4375 *
4376 **********************************************************************/
4377static int
4378igb_setup_receive_structures(struct adapter *adapter)
4379{
4380	struct rx_ring *rxr = adapter->rx_rings;
4381	int i;
4382
4383	for (i = 0; i < adapter->num_queues; i++, rxr++)
4384		if (igb_setup_receive_ring(rxr))
4385			goto fail;
4386
4387	return (0);
4388fail:
4389	/*
4390	 * Free RX buffers allocated so far, we will only handle
4391	 * the rings that completed, the failing case will have
4392	 * cleaned up for itself. 'i' is the endpoint.
4393	 */
4394	for (int j = 0; j < i; ++j) {
4395		rxr = &adapter->rx_rings[j];
4396		IGB_RX_LOCK(rxr);
4397		igb_free_receive_ring(rxr);
4398		IGB_RX_UNLOCK(rxr);
4399	}
4400
4401	return (ENOBUFS);
4402}
4403
4404/*********************************************************************
4405 *
4406 *  Enable receive unit.
4407 *
4408 **********************************************************************/
4409static void
4410igb_initialize_receive_units(struct adapter *adapter)
4411{
4412	struct rx_ring	*rxr = adapter->rx_rings;
4413	struct ifnet	*ifp = adapter->ifp;
4414	struct e1000_hw *hw = &adapter->hw;
4415	u32		rctl, rxcsum, psize, srrctl = 0;
4416
4417	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4418
4419	/*
4420	 * Make sure receives are disabled while setting
4421	 * up the descriptor ring
4422	 */
4423	rctl = E1000_READ_REG(hw, E1000_RCTL);
4424	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4425
4426	/*
4427	** Set up for header split
4428	*/
4429	if (igb_header_split) {
4430		/* Use a standard mbuf for the header */
4431		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4432		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4433	} else
4434		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4435
4436	/*
4437	** Set up for jumbo frames
4438	*/
4439	if (ifp->if_mtu > ETHERMTU) {
4440		rctl |= E1000_RCTL_LPE;
4441		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4442			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4443			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4444		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4445			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4446			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4447		}
4448		/* Set maximum packet len */
4449		psize = adapter->max_frame_size;
4450		/* are we on a vlan? */
4451		if (adapter->ifp->if_vlantrunk != NULL)
4452			psize += VLAN_TAG_SIZE;
4453		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4454	} else {
4455		rctl &= ~E1000_RCTL_LPE;
4456		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4457		rctl |= E1000_RCTL_SZ_2048;
4458	}
4459
4460	/* Setup the Base and Length of the Rx Descriptor Rings */
4461	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4462		u64 bus_addr = rxr->rxdma.dma_paddr;
4463		u32 rxdctl;
4464
4465		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4466		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4467		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4468		    (uint32_t)(bus_addr >> 32));
4469		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4470		    (uint32_t)bus_addr);
4471		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4472		/* Enable this Queue */
4473		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4474		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4475		rxdctl &= 0xFFF00000;
4476		rxdctl |= IGB_RX_PTHRESH;
4477		rxdctl |= IGB_RX_HTHRESH << 8;
4478		rxdctl |= IGB_RX_WTHRESH << 16;
4479		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4480	}
4481
4482	/*
4483	** Setup for RX MultiQueue
4484	*/
4485	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4486	if (adapter->num_queues >1) {
4487		u32 random[10], mrqc, shift = 0;
4488		union igb_reta {
4489			u32 dword;
4490			u8  bytes[4];
4491		} reta;
4492
4493		arc4rand(&random, sizeof(random), 0);
4494		if (adapter->hw.mac.type == e1000_82575)
4495			shift = 6;
4496		/* Warning FM follows */
4497		for (int i = 0; i < 128; i++) {
4498			reta.bytes[i & 3] =
4499			    (i % adapter->num_queues) << shift;
4500			if ((i & 3) == 3)
4501				E1000_WRITE_REG(hw,
4502				    E1000_RETA(i >> 2), reta.dword);
4503		}
4504		/* Now fill in hash table */
4505		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4506		for (int i = 0; i < 10; i++)
4507			E1000_WRITE_REG_ARRAY(hw,
4508			    E1000_RSSRK(0), i, random[i]);
4509
4510		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4511		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4512		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4513		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4514		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4515		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4516		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4517		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4518
4519		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4520
4521		/*
4522		** NOTE: Receive Full-Packet Checksum Offload
4523		** is mutually exclusive with Multiqueue. However
4524		** this is not the same as TCP/IP checksums which
4525		** still work.
4526		*/
4527		rxcsum |= E1000_RXCSUM_PCSD;
4528#if __FreeBSD_version >= 800000
4529		/* For SCTP Offload */
4530		if ((hw->mac.type == e1000_82576)
4531		    && (ifp->if_capenable & IFCAP_RXCSUM))
4532			rxcsum |= E1000_RXCSUM_CRCOFL;
4533#endif
4534	} else {
4535		/* Non RSS setup */
4536		if (ifp->if_capenable & IFCAP_RXCSUM) {
4537			rxcsum |= E1000_RXCSUM_IPPCSE;
4538#if __FreeBSD_version >= 800000
4539			if (adapter->hw.mac.type == e1000_82576)
4540				rxcsum |= E1000_RXCSUM_CRCOFL;
4541#endif
4542		} else
4543			rxcsum &= ~E1000_RXCSUM_TUOFL;
4544	}
4545	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4546
4547	/* Setup the Receive Control Register */
4548	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4549	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4550		   E1000_RCTL_RDMTS_HALF |
4551		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4552	/* Strip CRC bytes. */
4553	rctl |= E1000_RCTL_SECRC;
4554	/* Make sure VLAN Filters are off */
4555	rctl &= ~E1000_RCTL_VFE;
4556	/* Don't store bad packets */
4557	rctl &= ~E1000_RCTL_SBP;
4558
4559	/* Enable Receives */
4560	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4561
4562	/*
4563	 * Setup the HW Rx Head and Tail Descriptor Pointers
4564	 *   - needs to be after enable
4565	 */
4566	for (int i = 0; i < adapter->num_queues; i++) {
4567		rxr = &adapter->rx_rings[i];
4568		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4569#ifdef DEV_NETMAP
4570		/*
4571		 * an init() while a netmap client is active must
4572		 * preserve the rx buffers passed to userspace.
4573		 * In this driver it means we adjust RDT to
4574		 * something different from next_to_refresh
4575		 * (which is not used in netmap mode).
4576		 */
4577		if (ifp->if_capenable & IFCAP_NETMAP) {
4578			struct netmap_adapter *na = NA(adapter->ifp);
4579			struct netmap_kring *kring = &na->rx_rings[i];
4580			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4581
4582			if (t >= adapter->num_rx_desc)
4583				t -= adapter->num_rx_desc;
4584			else if (t < 0)
4585				t += adapter->num_rx_desc;
4586			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4587		} else
4588#endif /* DEV_NETMAP */
4589		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4590	}
4591	return;
4592}
4593
4594/*********************************************************************
4595 *
4596 *  Free receive rings.
4597 *
4598 **********************************************************************/
4599static void
4600igb_free_receive_structures(struct adapter *adapter)
4601{
4602	struct rx_ring *rxr = adapter->rx_rings;
4603
4604	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4605		struct lro_ctrl	*lro = &rxr->lro;
4606		igb_free_receive_buffers(rxr);
4607		tcp_lro_free(lro);
4608		igb_dma_free(adapter, &rxr->rxdma);
4609	}
4610
4611	free(adapter->rx_rings, M_DEVBUF);
4612}
4613
4614/*********************************************************************
4615 *
4616 *  Free receive ring data structures.
4617 *
4618 **********************************************************************/
4619static void
4620igb_free_receive_buffers(struct rx_ring *rxr)
4621{
4622	struct adapter		*adapter = rxr->adapter;
4623	struct igb_rx_buf	*rxbuf;
4624	int i;
4625
4626	INIT_DEBUGOUT("free_receive_structures: begin");
4627
4628	/* Cleanup any existing buffers */
4629	if (rxr->rx_buffers != NULL) {
4630		for (i = 0; i < adapter->num_rx_desc; i++) {
4631			rxbuf = &rxr->rx_buffers[i];
4632			if (rxbuf->m_head != NULL) {
4633				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4634				    BUS_DMASYNC_POSTREAD);
4635				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4636				rxbuf->m_head->m_flags |= M_PKTHDR;
4637				m_freem(rxbuf->m_head);
4638			}
4639			if (rxbuf->m_pack != NULL) {
4640				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4641				    BUS_DMASYNC_POSTREAD);
4642				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4643				rxbuf->m_pack->m_flags |= M_PKTHDR;
4644				m_freem(rxbuf->m_pack);
4645			}
4646			rxbuf->m_head = NULL;
4647			rxbuf->m_pack = NULL;
4648			if (rxbuf->hmap != NULL) {
4649				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4650				rxbuf->hmap = NULL;
4651			}
4652			if (rxbuf->pmap != NULL) {
4653				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4654				rxbuf->pmap = NULL;
4655			}
4656		}
4657		if (rxr->rx_buffers != NULL) {
4658			free(rxr->rx_buffers, M_DEVBUF);
4659			rxr->rx_buffers = NULL;
4660		}
4661	}
4662
4663	if (rxr->htag != NULL) {
4664		bus_dma_tag_destroy(rxr->htag);
4665		rxr->htag = NULL;
4666	}
4667	if (rxr->ptag != NULL) {
4668		bus_dma_tag_destroy(rxr->ptag);
4669		rxr->ptag = NULL;
4670	}
4671}
4672
4673static __inline void
4674igb_rx_discard(struct rx_ring *rxr, int i)
4675{
4676	struct igb_rx_buf	*rbuf;
4677
4678	rbuf = &rxr->rx_buffers[i];
4679
4680	/* Partially received? Free the chain */
4681	if (rxr->fmp != NULL) {
4682		rxr->fmp->m_flags |= M_PKTHDR;
4683		m_freem(rxr->fmp);
4684		rxr->fmp = NULL;
4685		rxr->lmp = NULL;
4686	}
4687
4688	/*
4689	** With advanced descriptors the writeback
4690	** clobbers the buffer addrs, so its easier
4691	** to just free the existing mbufs and take
4692	** the normal refresh path to get new buffers
4693	** and mapping.
4694	*/
4695	if (rbuf->m_head) {
4696		m_free(rbuf->m_head);
4697		rbuf->m_head = NULL;
4698	}
4699
4700	if (rbuf->m_pack) {
4701		m_free(rbuf->m_pack);
4702		rbuf->m_pack = NULL;
4703	}
4704
4705	return;
4706}
4707
4708static __inline void
4709igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4710{
4711
4712	/*
4713	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4714	 * should be computed by hardware. Also it should not have VLAN tag in
4715	 * ethernet header.
4716	 */
4717	if (rxr->lro_enabled &&
4718	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4719	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4720	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4721	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4722	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4723	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4724		/*
4725		 * Send to the stack if:
4726		 **  - LRO not enabled, or
4727		 **  - no LRO resources, or
4728		 **  - lro enqueue fails
4729		 */
4730		if (rxr->lro.lro_cnt != 0)
4731			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4732				return;
4733	}
4734	IGB_RX_UNLOCK(rxr);
4735	(*ifp->if_input)(ifp, m);
4736	IGB_RX_LOCK(rxr);
4737}
4738
4739/*********************************************************************
4740 *
4741 *  This routine executes in interrupt context. It replenishes
4742 *  the mbufs in the descriptor and sends data which has been
4743 *  dma'ed into host memory to upper layer.
4744 *
4745 *  We loop at most count times if count is > 0, or until done if
4746 *  count < 0.
4747 *
4748 *  Return TRUE if more to clean, FALSE otherwise
4749 *********************************************************************/
4750static bool
4751igb_rxeof(struct igb_queue *que, int count, int *done)
4752{
4753	struct adapter		*adapter = que->adapter;
4754	struct rx_ring		*rxr = que->rxr;
4755	struct ifnet		*ifp = adapter->ifp;
4756	struct lro_ctrl		*lro = &rxr->lro;
4757	struct lro_entry	*queued;
4758	int			i, processed = 0, rxdone = 0;
4759	u32			ptype, staterr = 0;
4760	union e1000_adv_rx_desc	*cur;
4761
4762	IGB_RX_LOCK(rxr);
4763	/* Sync the ring. */
4764	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4765	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4766
4767#ifdef DEV_NETMAP
4768	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4769		IGB_RX_UNLOCK(rxr);
4770		return (FALSE);
4771	}
4772#endif /* DEV_NETMAP */
4773
4774	/* Main clean loop */
4775	for (i = rxr->next_to_check; count != 0;) {
4776		struct mbuf		*sendmp, *mh, *mp;
4777		struct igb_rx_buf	*rxbuf;
4778		u16			hlen, plen, hdr, vtag;
4779		bool			eop = FALSE;
4780
4781		cur = &rxr->rx_base[i];
4782		staterr = le32toh(cur->wb.upper.status_error);
4783		if ((staterr & E1000_RXD_STAT_DD) == 0)
4784			break;
4785		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4786			break;
4787		count--;
4788		sendmp = mh = mp = NULL;
4789		cur->wb.upper.status_error = 0;
4790		rxbuf = &rxr->rx_buffers[i];
4791		plen = le16toh(cur->wb.upper.length);
4792		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4793		if ((adapter->hw.mac.type == e1000_i350) &&
4794		    (staterr & E1000_RXDEXT_STATERR_LB))
4795			vtag = be16toh(cur->wb.upper.vlan);
4796		else
4797			vtag = le16toh(cur->wb.upper.vlan);
4798		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4799		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4800
4801		/* Make sure all segments of a bad packet are discarded */
4802		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4803		    (rxr->discard)) {
4804			adapter->dropped_pkts++;
4805			++rxr->rx_discarded;
4806			if (!eop) /* Catch subsequent segs */
4807				rxr->discard = TRUE;
4808			else
4809				rxr->discard = FALSE;
4810			igb_rx_discard(rxr, i);
4811			goto next_desc;
4812		}
4813
4814		/*
4815		** The way the hardware is configured to
4816		** split, it will ONLY use the header buffer
4817		** when header split is enabled, otherwise we
4818		** get normal behavior, ie, both header and
4819		** payload are DMA'd into the payload buffer.
4820		**
4821		** The fmp test is to catch the case where a
4822		** packet spans multiple descriptors, in that
4823		** case only the first header is valid.
4824		*/
4825		if (rxr->hdr_split && rxr->fmp == NULL) {
4826			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4827			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4828			if (hlen > IGB_HDR_BUF)
4829				hlen = IGB_HDR_BUF;
4830			mh = rxr->rx_buffers[i].m_head;
4831			mh->m_len = hlen;
4832			/* clear buf pointer for refresh */
4833			rxbuf->m_head = NULL;
4834			/*
4835			** Get the payload length, this
4836			** could be zero if its a small
4837			** packet.
4838			*/
4839			if (plen > 0) {
4840				mp = rxr->rx_buffers[i].m_pack;
4841				mp->m_len = plen;
4842				mh->m_next = mp;
4843				/* clear buf pointer */
4844				rxbuf->m_pack = NULL;
4845				rxr->rx_split_packets++;
4846			}
4847		} else {
4848			/*
4849			** Either no header split, or a
4850			** secondary piece of a fragmented
4851			** split packet.
4852			*/
4853			mh = rxr->rx_buffers[i].m_pack;
4854			mh->m_len = plen;
4855			/* clear buf info for refresh */
4856			rxbuf->m_pack = NULL;
4857		}
4858
4859		++processed; /* So we know when to refresh */
4860
4861		/* Initial frame - setup */
4862		if (rxr->fmp == NULL) {
4863			mh->m_pkthdr.len = mh->m_len;
4864			/* Save the head of the chain */
4865			rxr->fmp = mh;
4866			rxr->lmp = mh;
4867			if (mp != NULL) {
4868				/* Add payload if split */
4869				mh->m_pkthdr.len += mp->m_len;
4870				rxr->lmp = mh->m_next;
4871			}
4872		} else {
4873			/* Chain mbuf's together */
4874			rxr->lmp->m_next = mh;
4875			rxr->lmp = rxr->lmp->m_next;
4876			rxr->fmp->m_pkthdr.len += mh->m_len;
4877		}
4878
4879		if (eop) {
4880			rxr->fmp->m_pkthdr.rcvif = ifp;
4881			ifp->if_ipackets++;
4882			rxr->rx_packets++;
4883			/* capture data for AIM */
4884			rxr->packets++;
4885			rxr->bytes += rxr->fmp->m_pkthdr.len;
4886			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4887
4888			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4889				igb_rx_checksum(staterr, rxr->fmp, ptype);
4890
4891			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4892			    (staterr & E1000_RXD_STAT_VP) != 0) {
4893				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4894				rxr->fmp->m_flags |= M_VLANTAG;
4895			}
4896#ifndef IGB_LEGACY_TX
4897			rxr->fmp->m_pkthdr.flowid = que->msix;
4898			rxr->fmp->m_flags |= M_FLOWID;
4899#endif
4900			sendmp = rxr->fmp;
4901			/* Make sure to set M_PKTHDR. */
4902			sendmp->m_flags |= M_PKTHDR;
4903			rxr->fmp = NULL;
4904			rxr->lmp = NULL;
4905		}
4906
4907next_desc:
4908		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4909		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4910
4911		/* Advance our pointers to the next descriptor. */
4912		if (++i == adapter->num_rx_desc)
4913			i = 0;
4914		/*
4915		** Send to the stack or LRO
4916		*/
4917		if (sendmp != NULL) {
4918			rxr->next_to_check = i;
4919			igb_rx_input(rxr, ifp, sendmp, ptype);
4920			i = rxr->next_to_check;
4921			rxdone++;
4922		}
4923
4924		/* Every 8 descriptors we go to refresh mbufs */
4925		if (processed == 8) {
4926                        igb_refresh_mbufs(rxr, i);
4927                        processed = 0;
4928		}
4929	}
4930
4931	/* Catch any remainders */
4932	if (igb_rx_unrefreshed(rxr))
4933		igb_refresh_mbufs(rxr, i);
4934
4935	rxr->next_to_check = i;
4936
4937	/*
4938	 * Flush any outstanding LRO work
4939	 */
4940	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4941		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4942		tcp_lro_flush(lro, queued);
4943	}
4944
4945	if (done != NULL)
4946		*done += rxdone;
4947
4948	IGB_RX_UNLOCK(rxr);
4949	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4950}
4951
4952/*********************************************************************
4953 *
4954 *  Verify that the hardware indicated that the checksum is valid.
4955 *  Inform the stack about the status of checksum so that stack
4956 *  doesn't spend time verifying the checksum.
4957 *
4958 *********************************************************************/
4959static void
4960igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4961{
4962	u16 status = (u16)staterr;
4963	u8  errors = (u8) (staterr >> 24);
4964	int sctp;
4965
4966	/* Ignore Checksum bit is set */
4967	if (status & E1000_RXD_STAT_IXSM) {
4968		mp->m_pkthdr.csum_flags = 0;
4969		return;
4970	}
4971
4972	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4973	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4974		sctp = 1;
4975	else
4976		sctp = 0;
4977	if (status & E1000_RXD_STAT_IPCS) {
4978		/* Did it pass? */
4979		if (!(errors & E1000_RXD_ERR_IPE)) {
4980			/* IP Checksum Good */
4981			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4982			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4983		} else
4984			mp->m_pkthdr.csum_flags = 0;
4985	}
4986
4987	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4988		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4989#if __FreeBSD_version >= 800000
4990		if (sctp) /* reassign */
4991			type = CSUM_SCTP_VALID;
4992#endif
4993		/* Did it pass? */
4994		if (!(errors & E1000_RXD_ERR_TCPE)) {
4995			mp->m_pkthdr.csum_flags |= type;
4996			if (sctp == 0)
4997				mp->m_pkthdr.csum_data = htons(0xffff);
4998		}
4999	}
5000	return;
5001}
5002
5003/*
5004 * This routine is run via an vlan
5005 * config EVENT
5006 */
5007static void
5008igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5009{
5010	struct adapter	*adapter = ifp->if_softc;
5011	u32		index, bit;
5012
5013	if (ifp->if_softc !=  arg)   /* Not our event */
5014		return;
5015
5016	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5017                return;
5018
5019	IGB_CORE_LOCK(adapter);
5020	index = (vtag >> 5) & 0x7F;
5021	bit = vtag & 0x1F;
5022	adapter->shadow_vfta[index] |= (1 << bit);
5023	++adapter->num_vlans;
5024	/* Change hw filter setting */
5025	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5026		igb_setup_vlan_hw_support(adapter);
5027	IGB_CORE_UNLOCK(adapter);
5028}
5029
5030/*
5031 * This routine is run via an vlan
5032 * unconfig EVENT
5033 */
5034static void
5035igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5036{
5037	struct adapter	*adapter = ifp->if_softc;
5038	u32		index, bit;
5039
5040	if (ifp->if_softc !=  arg)
5041		return;
5042
5043	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5044                return;
5045
5046	IGB_CORE_LOCK(adapter);
5047	index = (vtag >> 5) & 0x7F;
5048	bit = vtag & 0x1F;
5049	adapter->shadow_vfta[index] &= ~(1 << bit);
5050	--adapter->num_vlans;
5051	/* Change hw filter setting */
5052	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5053		igb_setup_vlan_hw_support(adapter);
5054	IGB_CORE_UNLOCK(adapter);
5055}
5056
5057static void
5058igb_setup_vlan_hw_support(struct adapter *adapter)
5059{
5060	struct e1000_hw *hw = &adapter->hw;
5061	struct ifnet	*ifp = adapter->ifp;
5062	u32             reg;
5063
5064	if (adapter->vf_ifp) {
5065		e1000_rlpml_set_vf(hw,
5066		    adapter->max_frame_size + VLAN_TAG_SIZE);
5067		return;
5068	}
5069
5070	reg = E1000_READ_REG(hw, E1000_CTRL);
5071	reg |= E1000_CTRL_VME;
5072	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5073
5074	/* Enable the Filter Table */
5075	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5076		reg = E1000_READ_REG(hw, E1000_RCTL);
5077		reg &= ~E1000_RCTL_CFIEN;
5078		reg |= E1000_RCTL_VFE;
5079		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5080	}
5081
5082	/* Update the frame size */
5083	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5084	    adapter->max_frame_size + VLAN_TAG_SIZE);
5085
5086	/* Don't bother with table if no vlans */
5087	if ((adapter->num_vlans == 0) ||
5088	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5089                return;
5090	/*
5091	** A soft reset zero's out the VFTA, so
5092	** we need to repopulate it now.
5093	*/
5094	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5095                if (adapter->shadow_vfta[i] != 0) {
5096			if (adapter->vf_ifp)
5097				e1000_vfta_set_vf(hw,
5098				    adapter->shadow_vfta[i], TRUE);
5099			else
5100				e1000_write_vfta(hw,
5101				    i, adapter->shadow_vfta[i]);
5102		}
5103}
5104
5105static void
5106igb_enable_intr(struct adapter *adapter)
5107{
5108	/* With RSS set up what to auto clear */
5109	if (adapter->msix_mem) {
5110		u32 mask = (adapter->que_mask | adapter->link_mask);
5111		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5112		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5113		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5114		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5115		    E1000_IMS_LSC);
5116	} else {
5117		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5118		    IMS_ENABLE_MASK);
5119	}
5120	E1000_WRITE_FLUSH(&adapter->hw);
5121
5122	return;
5123}
5124
5125static void
5126igb_disable_intr(struct adapter *adapter)
5127{
5128	if (adapter->msix_mem) {
5129		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5130		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5131	}
5132	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5133	E1000_WRITE_FLUSH(&adapter->hw);
5134	return;
5135}
5136
5137/*
5138 * Bit of a misnomer, what this really means is
5139 * to enable OS management of the system... aka
5140 * to disable special hardware management features
5141 */
5142static void
5143igb_init_manageability(struct adapter *adapter)
5144{
5145	if (adapter->has_manage) {
5146		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5147		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5148
5149		/* disable hardware interception of ARP */
5150		manc &= ~(E1000_MANC_ARP_EN);
5151
5152                /* enable receiving management packets to the host */
5153		manc |= E1000_MANC_EN_MNG2HOST;
5154		manc2h |= 1 << 5;  /* Mng Port 623 */
5155		manc2h |= 1 << 6;  /* Mng Port 664 */
5156		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5157		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5158	}
5159}
5160
5161/*
5162 * Give control back to hardware management
5163 * controller if there is one.
5164 */
5165static void
5166igb_release_manageability(struct adapter *adapter)
5167{
5168	if (adapter->has_manage) {
5169		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5170
5171		/* re-enable hardware interception of ARP */
5172		manc |= E1000_MANC_ARP_EN;
5173		manc &= ~E1000_MANC_EN_MNG2HOST;
5174
5175		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5176	}
5177}
5178
5179/*
5180 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5181 * For ASF and Pass Through versions of f/w this means that
5182 * the driver is loaded.
5183 *
5184 */
5185static void
5186igb_get_hw_control(struct adapter *adapter)
5187{
5188	u32 ctrl_ext;
5189
5190	if (adapter->vf_ifp)
5191		return;
5192
5193	/* Let firmware know the driver has taken over */
5194	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5195	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5196	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5197}
5198
5199/*
5200 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5201 * For ASF and Pass Through versions of f/w this means that the
5202 * driver is no longer loaded.
5203 *
5204 */
5205static void
5206igb_release_hw_control(struct adapter *adapter)
5207{
5208	u32 ctrl_ext;
5209
5210	if (adapter->vf_ifp)
5211		return;
5212
5213	/* Let firmware taken over control of h/w */
5214	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5215	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5216	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5217}
5218
5219static int
5220igb_is_valid_ether_addr(uint8_t *addr)
5221{
5222	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5223
5224	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5225		return (FALSE);
5226	}
5227
5228	return (TRUE);
5229}
5230
5231
5232/*
5233 * Enable PCI Wake On Lan capability
5234 */
5235static void
5236igb_enable_wakeup(device_t dev)
5237{
5238	u16     cap, status;
5239	u8      id;
5240
5241	/* First find the capabilities pointer*/
5242	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5243	/* Read the PM Capabilities */
5244	id = pci_read_config(dev, cap, 1);
5245	if (id != PCIY_PMG)     /* Something wrong */
5246		return;
5247	/* OK, we have the power capabilities, so
5248	   now get the status register */
5249	cap += PCIR_POWER_STATUS;
5250	status = pci_read_config(dev, cap, 2);
5251	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5252	pci_write_config(dev, cap, status, 2);
5253	return;
5254}
5255
5256static void
5257igb_led_func(void *arg, int onoff)
5258{
5259	struct adapter	*adapter = arg;
5260
5261	IGB_CORE_LOCK(adapter);
5262	if (onoff) {
5263		e1000_setup_led(&adapter->hw);
5264		e1000_led_on(&adapter->hw);
5265	} else {
5266		e1000_led_off(&adapter->hw);
5267		e1000_cleanup_led(&adapter->hw);
5268	}
5269	IGB_CORE_UNLOCK(adapter);
5270}
5271
5272/**********************************************************************
5273 *
5274 *  Update the board statistics counters.
5275 *
5276 **********************************************************************/
5277static void
5278igb_update_stats_counters(struct adapter *adapter)
5279{
5280	struct ifnet		*ifp;
5281        struct e1000_hw		*hw = &adapter->hw;
5282	struct e1000_hw_stats	*stats;
5283
5284	/*
5285	** The virtual function adapter has only a
5286	** small controlled set of stats, do only
5287	** those and return.
5288	*/
5289	if (adapter->vf_ifp) {
5290		igb_update_vf_stats_counters(adapter);
5291		return;
5292	}
5293
5294	stats = (struct e1000_hw_stats	*)adapter->stats;
5295
5296	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5297	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5298		stats->symerrs +=
5299		    E1000_READ_REG(hw,E1000_SYMERRS);
5300		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5301	}
5302
5303	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5304	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5305	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5306	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5307
5308	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5309	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5310	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5311	stats->dc += E1000_READ_REG(hw, E1000_DC);
5312	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5313	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5314	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5315	/*
5316	** For watchdog management we need to know if we have been
5317	** paused during the last interval, so capture that here.
5318	*/
5319        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5320        stats->xoffrxc += adapter->pause_frames;
5321	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5322	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5323	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5324	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5325	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5326	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5327	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5328	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5329	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5330	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5331	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5332	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5333
5334	/* For the 64-bit byte counters the low dword must be read first. */
5335	/* Both registers clear on the read of the high dword */
5336
5337	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5338	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5339	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5340	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5341
5342	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5343	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5344	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5345	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5346	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5347
5348	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5349	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5350
5351	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5352	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5353	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5354	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5355	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5356	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5357	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5358	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5359	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5360	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5361
5362	/* Interrupt Counts */
5363
5364	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5365	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5366	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5367	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5368	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5369	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5370	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5371	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5372	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5373
5374	/* Host to Card Statistics */
5375
5376	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5377	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5378	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5379	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5380	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5381	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5382	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5383	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5384	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5385	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5386	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5387	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5388	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5389	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5390
5391	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5392	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5393	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5394	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5395	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5396	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5397
5398	ifp = adapter->ifp;
5399	ifp->if_collisions = stats->colc;
5400
5401	/* Rx Errors */
5402	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5403	    stats->crcerrs + stats->algnerrc +
5404	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5405
5406	/* Tx Errors */
5407	ifp->if_oerrors = stats->ecol +
5408	    stats->latecol + adapter->watchdog_events;
5409
5410	/* Driver specific counters */
5411	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5412	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5413	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5414	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5415	adapter->packet_buf_alloc_tx =
5416	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5417	adapter->packet_buf_alloc_rx =
5418	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5419}
5420
5421
5422/**********************************************************************
5423 *
5424 *  Initialize the VF board statistics counters.
5425 *
5426 **********************************************************************/
5427static void
5428igb_vf_init_stats(struct adapter *adapter)
5429{
5430        struct e1000_hw *hw = &adapter->hw;
5431	struct e1000_vf_stats	*stats;
5432
5433	stats = (struct e1000_vf_stats	*)adapter->stats;
5434	if (stats == NULL)
5435		return;
5436        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5437        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5438        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5439        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5440        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5441}
5442
5443/**********************************************************************
5444 *
5445 *  Update the VF board statistics counters.
5446 *
5447 **********************************************************************/
5448static void
5449igb_update_vf_stats_counters(struct adapter *adapter)
5450{
5451	struct e1000_hw *hw = &adapter->hw;
5452	struct e1000_vf_stats	*stats;
5453
5454	if (adapter->link_speed == 0)
5455		return;
5456
5457	stats = (struct e1000_vf_stats	*)adapter->stats;
5458
5459	UPDATE_VF_REG(E1000_VFGPRC,
5460	    stats->last_gprc, stats->gprc);
5461	UPDATE_VF_REG(E1000_VFGORC,
5462	    stats->last_gorc, stats->gorc);
5463	UPDATE_VF_REG(E1000_VFGPTC,
5464	    stats->last_gptc, stats->gptc);
5465	UPDATE_VF_REG(E1000_VFGOTC,
5466	    stats->last_gotc, stats->gotc);
5467	UPDATE_VF_REG(E1000_VFMPRC,
5468	    stats->last_mprc, stats->mprc);
5469}
5470
5471/* Export a single 32-bit register via a read-only sysctl. */
5472static int
5473igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5474{
5475	struct adapter *adapter;
5476	u_int val;
5477
5478	adapter = oidp->oid_arg1;
5479	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5480	return (sysctl_handle_int(oidp, &val, 0, req));
5481}
5482
5483/*
5484**  Tuneable interrupt rate handler
5485*/
5486static int
5487igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5488{
5489	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5490	int			error;
5491	u32			reg, usec, rate;
5492
5493	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5494	usec = ((reg & 0x7FFC) >> 2);
5495	if (usec > 0)
5496		rate = 1000000 / usec;
5497	else
5498		rate = 0;
5499	error = sysctl_handle_int(oidp, &rate, 0, req);
5500	if (error || !req->newptr)
5501		return error;
5502	return 0;
5503}
5504
5505/*
5506 * Add sysctl variables, one per statistic, to the system.
5507 */
5508static void
5509igb_add_hw_stats(struct adapter *adapter)
5510{
5511	device_t dev = adapter->dev;
5512
5513	struct tx_ring *txr = adapter->tx_rings;
5514	struct rx_ring *rxr = adapter->rx_rings;
5515
5516	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5517	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5518	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5519	struct e1000_hw_stats *stats = adapter->stats;
5520
5521	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5522	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5523
5524#define QUEUE_NAME_LEN 32
5525	char namebuf[QUEUE_NAME_LEN];
5526
5527	/* Driver Statistics */
5528	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5529			CTLFLAG_RD, &adapter->link_irq, 0,
5530			"Link MSIX IRQ Handled");
5531	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5532			CTLFLAG_RD, &adapter->dropped_pkts,
5533			"Driver dropped packets");
5534	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5535			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5536			"Driver tx dma failure in xmit");
5537	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5538			CTLFLAG_RD, &adapter->rx_overruns,
5539			"RX overruns");
5540	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5541			CTLFLAG_RD, &adapter->watchdog_events,
5542			"Watchdog timeouts");
5543
5544	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5545			CTLFLAG_RD, &adapter->device_control,
5546			"Device Control Register");
5547	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5548			CTLFLAG_RD, &adapter->rx_control,
5549			"Receiver Control Register");
5550	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5551			CTLFLAG_RD, &adapter->int_mask,
5552			"Interrupt Mask");
5553	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5554			CTLFLAG_RD, &adapter->eint_mask,
5555			"Extended Interrupt Mask");
5556	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5557			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5558			"Transmit Buffer Packet Allocation");
5559	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5560			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5561			"Receive Buffer Packet Allocation");
5562	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5563			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5564			"Flow Control High Watermark");
5565	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5566			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5567			"Flow Control Low Watermark");
5568
5569	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5570		struct lro_ctrl *lro = &rxr->lro;
5571
5572		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5573		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5574					    CTLFLAG_RD, NULL, "Queue Name");
5575		queue_list = SYSCTL_CHILDREN(queue_node);
5576
5577		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5578				CTLFLAG_RD, &adapter->queues[i],
5579				sizeof(&adapter->queues[i]),
5580				igb_sysctl_interrupt_rate_handler,
5581				"IU", "Interrupt Rate");
5582
5583		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5584				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5585				igb_sysctl_reg_handler, "IU",
5586 				"Transmit Descriptor Head");
5587		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5588				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5589				igb_sysctl_reg_handler, "IU",
5590 				"Transmit Descriptor Tail");
5591		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5592				CTLFLAG_RD, &txr->no_desc_avail,
5593				"Queue No Descriptor Available");
5594		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5595				CTLFLAG_RD, &txr->tx_packets,
5596				"Queue Packets Transmitted");
5597
5598		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5599				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5600				igb_sysctl_reg_handler, "IU",
5601				"Receive Descriptor Head");
5602		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5603				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5604				igb_sysctl_reg_handler, "IU",
5605				"Receive Descriptor Tail");
5606		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5607				CTLFLAG_RD, &rxr->rx_packets,
5608				"Queue Packets Received");
5609		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5610				CTLFLAG_RD, &rxr->rx_bytes,
5611				"Queue Bytes Received");
5612		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5613				CTLFLAG_RD, &lro->lro_queued, 0,
5614				"LRO Queued");
5615		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5616				CTLFLAG_RD, &lro->lro_flushed, 0,
5617				"LRO Flushed");
5618	}
5619
5620	/* MAC stats get their own sub node */
5621
5622	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5623				    CTLFLAG_RD, NULL, "MAC Statistics");
5624	stat_list = SYSCTL_CHILDREN(stat_node);
5625
5626	/*
5627	** VF adapter has a very limited set of stats
5628	** since its not managing the metal, so to speak.
5629	*/
5630	if (adapter->vf_ifp) {
5631	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5632			CTLFLAG_RD, &stats->gprc,
5633			"Good Packets Received");
5634	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5635			CTLFLAG_RD, &stats->gptc,
5636			"Good Packets Transmitted");
5637 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5638 			CTLFLAG_RD, &stats->gorc,
5639 			"Good Octets Received");
5640 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5641 			CTLFLAG_RD, &stats->gotc,
5642 			"Good Octets Transmitted");
5643	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5644			CTLFLAG_RD, &stats->mprc,
5645			"Multicast Packets Received");
5646		return;
5647	}
5648
5649	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5650			CTLFLAG_RD, &stats->ecol,
5651			"Excessive collisions");
5652	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5653			CTLFLAG_RD, &stats->scc,
5654			"Single collisions");
5655	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5656			CTLFLAG_RD, &stats->mcc,
5657			"Multiple collisions");
5658	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5659			CTLFLAG_RD, &stats->latecol,
5660			"Late collisions");
5661	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5662			CTLFLAG_RD, &stats->colc,
5663			"Collision Count");
5664	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5665			CTLFLAG_RD, &stats->symerrs,
5666			"Symbol Errors");
5667	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5668			CTLFLAG_RD, &stats->sec,
5669			"Sequence Errors");
5670	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5671			CTLFLAG_RD, &stats->dc,
5672			"Defer Count");
5673	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5674			CTLFLAG_RD, &stats->mpc,
5675			"Missed Packets");
5676	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5677			CTLFLAG_RD, &stats->rnbc,
5678			"Receive No Buffers");
5679	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5680			CTLFLAG_RD, &stats->ruc,
5681			"Receive Undersize");
5682	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5683			CTLFLAG_RD, &stats->rfc,
5684			"Fragmented Packets Received ");
5685	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5686			CTLFLAG_RD, &stats->roc,
5687			"Oversized Packets Received");
5688	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5689			CTLFLAG_RD, &stats->rjc,
5690			"Recevied Jabber");
5691	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5692			CTLFLAG_RD, &stats->rxerrc,
5693			"Receive Errors");
5694	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5695			CTLFLAG_RD, &stats->crcerrs,
5696			"CRC errors");
5697	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5698			CTLFLAG_RD, &stats->algnerrc,
5699			"Alignment Errors");
5700	/* On 82575 these are collision counts */
5701	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5702			CTLFLAG_RD, &stats->cexterr,
5703			"Collision/Carrier extension errors");
5704	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5705			CTLFLAG_RD, &stats->xonrxc,
5706			"XON Received");
5707	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5708			CTLFLAG_RD, &stats->xontxc,
5709			"XON Transmitted");
5710	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5711			CTLFLAG_RD, &stats->xoffrxc,
5712			"XOFF Received");
5713	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5714			CTLFLAG_RD, &stats->xofftxc,
5715			"XOFF Transmitted");
5716	/* Packet Reception Stats */
5717	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5718			CTLFLAG_RD, &stats->tpr,
5719			"Total Packets Received ");
5720	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5721			CTLFLAG_RD, &stats->gprc,
5722			"Good Packets Received");
5723	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5724			CTLFLAG_RD, &stats->bprc,
5725			"Broadcast Packets Received");
5726	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5727			CTLFLAG_RD, &stats->mprc,
5728			"Multicast Packets Received");
5729	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5730			CTLFLAG_RD, &stats->prc64,
5731			"64 byte frames received ");
5732	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5733			CTLFLAG_RD, &stats->prc127,
5734			"65-127 byte frames received");
5735	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5736			CTLFLAG_RD, &stats->prc255,
5737			"128-255 byte frames received");
5738	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5739			CTLFLAG_RD, &stats->prc511,
5740			"256-511 byte frames received");
5741	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5742			CTLFLAG_RD, &stats->prc1023,
5743			"512-1023 byte frames received");
5744	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5745			CTLFLAG_RD, &stats->prc1522,
5746			"1023-1522 byte frames received");
5747 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5748 			CTLFLAG_RD, &stats->gorc,
5749 			"Good Octets Received");
5750
5751	/* Packet Transmission Stats */
5752 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5753 			CTLFLAG_RD, &stats->gotc,
5754 			"Good Octets Transmitted");
5755	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5756			CTLFLAG_RD, &stats->tpt,
5757			"Total Packets Transmitted");
5758	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5759			CTLFLAG_RD, &stats->gptc,
5760			"Good Packets Transmitted");
5761	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5762			CTLFLAG_RD, &stats->bptc,
5763			"Broadcast Packets Transmitted");
5764	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5765			CTLFLAG_RD, &stats->mptc,
5766			"Multicast Packets Transmitted");
5767	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5768			CTLFLAG_RD, &stats->ptc64,
5769			"64 byte frames transmitted ");
5770	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5771			CTLFLAG_RD, &stats->ptc127,
5772			"65-127 byte frames transmitted");
5773	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5774			CTLFLAG_RD, &stats->ptc255,
5775			"128-255 byte frames transmitted");
5776	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5777			CTLFLAG_RD, &stats->ptc511,
5778			"256-511 byte frames transmitted");
5779	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5780			CTLFLAG_RD, &stats->ptc1023,
5781			"512-1023 byte frames transmitted");
5782	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5783			CTLFLAG_RD, &stats->ptc1522,
5784			"1024-1522 byte frames transmitted");
5785	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5786			CTLFLAG_RD, &stats->tsctc,
5787			"TSO Contexts Transmitted");
5788	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5789			CTLFLAG_RD, &stats->tsctfc,
5790			"TSO Contexts Failed");
5791
5792
5793	/* Interrupt Stats */
5794
5795	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5796				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5797	int_list = SYSCTL_CHILDREN(int_node);
5798
5799	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5800			CTLFLAG_RD, &stats->iac,
5801			"Interrupt Assertion Count");
5802
5803	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5804			CTLFLAG_RD, &stats->icrxptc,
5805			"Interrupt Cause Rx Pkt Timer Expire Count");
5806
5807	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5808			CTLFLAG_RD, &stats->icrxatc,
5809			"Interrupt Cause Rx Abs Timer Expire Count");
5810
5811	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5812			CTLFLAG_RD, &stats->ictxptc,
5813			"Interrupt Cause Tx Pkt Timer Expire Count");
5814
5815	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5816			CTLFLAG_RD, &stats->ictxatc,
5817			"Interrupt Cause Tx Abs Timer Expire Count");
5818
5819	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5820			CTLFLAG_RD, &stats->ictxqec,
5821			"Interrupt Cause Tx Queue Empty Count");
5822
5823	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5824			CTLFLAG_RD, &stats->ictxqmtc,
5825			"Interrupt Cause Tx Queue Min Thresh Count");
5826
5827	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5828			CTLFLAG_RD, &stats->icrxdmtc,
5829			"Interrupt Cause Rx Desc Min Thresh Count");
5830
5831	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5832			CTLFLAG_RD, &stats->icrxoc,
5833			"Interrupt Cause Receiver Overrun Count");
5834
5835	/* Host to Card Stats */
5836
5837	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5838				    CTLFLAG_RD, NULL,
5839				    "Host to Card Statistics");
5840
5841	host_list = SYSCTL_CHILDREN(host_node);
5842
5843	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5844			CTLFLAG_RD, &stats->cbtmpc,
5845			"Circuit Breaker Tx Packet Count");
5846
5847	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5848			CTLFLAG_RD, &stats->htdpmc,
5849			"Host Transmit Discarded Packets");
5850
5851	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5852			CTLFLAG_RD, &stats->rpthc,
5853			"Rx Packets To Host");
5854
5855	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5856			CTLFLAG_RD, &stats->cbrmpc,
5857			"Circuit Breaker Rx Packet Count");
5858
5859	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5860			CTLFLAG_RD, &stats->cbrdpc,
5861			"Circuit Breaker Rx Dropped Count");
5862
5863	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5864			CTLFLAG_RD, &stats->hgptc,
5865			"Host Good Packets Tx Count");
5866
5867	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5868			CTLFLAG_RD, &stats->htcbdpc,
5869			"Host Tx Circuit Breaker Dropped Count");
5870
5871	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5872			CTLFLAG_RD, &stats->hgorc,
5873			"Host Good Octets Received Count");
5874
5875	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5876			CTLFLAG_RD, &stats->hgotc,
5877			"Host Good Octets Transmit Count");
5878
5879	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5880			CTLFLAG_RD, &stats->lenerrs,
5881			"Length Errors");
5882
5883	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5884			CTLFLAG_RD, &stats->scvpc,
5885			"SerDes/SGMII Code Violation Pkt Count");
5886
5887	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5888			CTLFLAG_RD, &stats->hrmpc,
5889			"Header Redirection Missed Packet Count");
5890}
5891
5892
5893/**********************************************************************
5894 *
5895 *  This routine provides a way to dump out the adapter eeprom,
5896 *  often a useful debug/service tool. This only dumps the first
5897 *  32 words, stuff that matters is in that extent.
5898 *
5899 **********************************************************************/
5900static int
5901igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5902{
5903	struct adapter *adapter;
5904	int error;
5905	int result;
5906
5907	result = -1;
5908	error = sysctl_handle_int(oidp, &result, 0, req);
5909
5910	if (error || !req->newptr)
5911		return (error);
5912
5913	/*
5914	 * This value will cause a hex dump of the
5915	 * first 32 16-bit words of the EEPROM to
5916	 * the screen.
5917	 */
5918	if (result == 1) {
5919		adapter = (struct adapter *)arg1;
5920		igb_print_nvm_info(adapter);
5921        }
5922
5923	return (error);
5924}
5925
5926static void
5927igb_print_nvm_info(struct adapter *adapter)
5928{
5929	u16	eeprom_data;
5930	int	i, j, row = 0;
5931
5932	/* Its a bit crude, but it gets the job done */
5933	printf("\nInterface EEPROM Dump:\n");
5934	printf("Offset\n0x0000  ");
5935	for (i = 0, j = 0; i < 32; i++, j++) {
5936		if (j == 8) { /* Make the offset block */
5937			j = 0; ++row;
5938			printf("\n0x00%x0  ",row);
5939		}
5940		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5941		printf("%04x ", eeprom_data);
5942	}
5943	printf("\n");
5944}
5945
5946static void
5947igb_set_sysctl_value(struct adapter *adapter, const char *name,
5948	const char *description, int *limit, int value)
5949{
5950	*limit = value;
5951	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5952	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5953	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5954}
5955
5956/*
5957** Set flow control using sysctl:
5958** Flow control values:
5959** 	0 - off
5960**	1 - rx pause
5961**	2 - tx pause
5962**	3 - full
5963*/
5964static int
5965igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5966{
5967	int		error;
5968	static int	input = 3; /* default is full */
5969	struct adapter	*adapter = (struct adapter *) arg1;
5970
5971	error = sysctl_handle_int(oidp, &input, 0, req);
5972
5973	if ((error) || (req->newptr == NULL))
5974		return (error);
5975
5976	switch (input) {
5977		case e1000_fc_rx_pause:
5978		case e1000_fc_tx_pause:
5979		case e1000_fc_full:
5980		case e1000_fc_none:
5981			adapter->hw.fc.requested_mode = input;
5982			adapter->fc = input;
5983			break;
5984		default:
5985			/* Do nothing */
5986			return (error);
5987	}
5988
5989	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5990	e1000_force_mac_fc(&adapter->hw);
5991	return (error);
5992}
5993
5994/*
5995** Manage DMA Coalesce:
5996** Control values:
5997** 	0/1 - off/on
5998**	Legal timer values are:
5999**	250,500,1000-10000 in thousands
6000*/
6001static int
6002igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6003{
6004	struct adapter *adapter = (struct adapter *) arg1;
6005	int		error;
6006
6007	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6008
6009	if ((error) || (req->newptr == NULL))
6010		return (error);
6011
6012	switch (adapter->dmac) {
6013		case 0:
6014			/*Disabling */
6015			break;
6016		case 1: /* Just enable and use default */
6017			adapter->dmac = 1000;
6018			break;
6019		case 250:
6020		case 500:
6021		case 1000:
6022		case 2000:
6023		case 3000:
6024		case 4000:
6025		case 5000:
6026		case 6000:
6027		case 7000:
6028		case 8000:
6029		case 9000:
6030		case 10000:
6031			/* Legal values - allow */
6032			break;
6033		default:
6034			/* Do nothing, illegal value */
6035			adapter->dmac = 0;
6036			return (error);
6037	}
6038	/* Reinit the interface */
6039	igb_init(adapter);
6040	return (error);
6041}
6042
6043/*
6044** Manage Energy Efficient Ethernet:
6045** Control values:
6046**     0/1 - enabled/disabled
6047*/
6048static int
6049igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6050{
6051	struct adapter	*adapter = (struct adapter *) arg1;
6052	int		error, value;
6053
6054	value = adapter->hw.dev_spec._82575.eee_disable;
6055	error = sysctl_handle_int(oidp, &value, 0, req);
6056	if (error || req->newptr == NULL)
6057		return (error);
6058	IGB_CORE_LOCK(adapter);
6059	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6060	igb_init_locked(adapter);
6061	IGB_CORE_UNLOCK(adapter);
6062	return (0);
6063}
6064