if_igb.c revision 262151
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_igb.c 262151 2014-02-18 05:01:04Z luigi $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#ifdef HAVE_KERNEL_OPTION_HEADERS
40#include "opt_device_polling.h"
41#include "opt_altq.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifndef IGB_LEGACY_TX
47#include <sys/buf_ring.h>
48#endif
49#include <sys/bus.h>
50#include <sys/endian.h>
51#include <sys/kernel.h>
52#include <sys/kthread.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/rman.h>
57#include <sys/socket.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/taskqueue.h>
61#include <sys/eventhandler.h>
62#include <sys/pcpu.h>
63#include <sys/smp.h>
64#include <machine/smp.h>
65#include <machine/bus.h>
66#include <machine/resource.h>
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_dl.h>
73#include <net/if_media.h>
74
75#include <net/if_types.h>
76#include <net/if_vlan_var.h>
77
78#include <netinet/in_systm.h>
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#include <netinet/ip.h>
82#include <netinet/ip6.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_lro.h>
85#include <netinet/udp.h>
86
87#include <machine/in_cksum.h>
88#include <dev/led/led.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 2.4.0";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134						PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148						PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_I354_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	/* required last entry */
172	{ 0, 0, 0, 0, 0}
173};
174
175/*********************************************************************
176 *  Table of branding strings for all supported NICs.
177 *********************************************************************/
178
179static char *igb_strings[] = {
180	"Intel(R) PRO/1000 Network Connection"
181};
182
183/*********************************************************************
184 *  Function prototypes
185 *********************************************************************/
186static int	igb_probe(device_t);
187static int	igb_attach(device_t);
188static int	igb_detach(device_t);
189static int	igb_shutdown(device_t);
190static int	igb_suspend(device_t);
191static int	igb_resume(device_t);
192#ifndef IGB_LEGACY_TX
193static int	igb_mq_start(struct ifnet *, struct mbuf *);
194static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
195static void	igb_qflush(struct ifnet *);
196static void	igb_deferred_mq_start(void *, int);
197#else
198static void	igb_start(struct ifnet *);
199static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
200#endif
201static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
202static void	igb_init(void *);
203static void	igb_init_locked(struct adapter *);
204static void	igb_stop(void *);
205static void	igb_media_status(struct ifnet *, struct ifmediareq *);
206static int	igb_media_change(struct ifnet *);
207static void	igb_identify_hardware(struct adapter *);
208static int	igb_allocate_pci_resources(struct adapter *);
209static int	igb_allocate_msix(struct adapter *);
210static int	igb_allocate_legacy(struct adapter *);
211static int	igb_setup_msix(struct adapter *);
212static void	igb_free_pci_resources(struct adapter *);
213static void	igb_local_timer(void *);
214static void	igb_reset(struct adapter *);
215static int	igb_setup_interface(device_t, struct adapter *);
216static int	igb_allocate_queues(struct adapter *);
217static void	igb_configure_queues(struct adapter *);
218
219static int	igb_allocate_transmit_buffers(struct tx_ring *);
220static void	igb_setup_transmit_structures(struct adapter *);
221static void	igb_setup_transmit_ring(struct tx_ring *);
222static void	igb_initialize_transmit_units(struct adapter *);
223static void	igb_free_transmit_structures(struct adapter *);
224static void	igb_free_transmit_buffers(struct tx_ring *);
225
226static int	igb_allocate_receive_buffers(struct rx_ring *);
227static int	igb_setup_receive_structures(struct adapter *);
228static int	igb_setup_receive_ring(struct rx_ring *);
229static void	igb_initialize_receive_units(struct adapter *);
230static void	igb_free_receive_structures(struct adapter *);
231static void	igb_free_receive_buffers(struct rx_ring *);
232static void	igb_free_receive_ring(struct rx_ring *);
233
234static void	igb_enable_intr(struct adapter *);
235static void	igb_disable_intr(struct adapter *);
236static void	igb_update_stats_counters(struct adapter *);
237static bool	igb_txeof(struct tx_ring *);
238
239static __inline	void igb_rx_discard(struct rx_ring *, int);
240static __inline void igb_rx_input(struct rx_ring *,
241		    struct ifnet *, struct mbuf *, u32);
242
243static bool	igb_rxeof(struct igb_queue *, int, int *);
244static void	igb_rx_checksum(u32, struct mbuf *, u32);
245static int	igb_tx_ctx_setup(struct tx_ring *,
246		    struct mbuf *, u32 *, u32 *);
247static int	igb_tso_setup(struct tx_ring *,
248		    struct mbuf *, u32 *, u32 *);
249static void	igb_set_promisc(struct adapter *);
250static void	igb_disable_promisc(struct adapter *);
251static void	igb_set_multi(struct adapter *);
252static void	igb_update_link_status(struct adapter *);
253static void	igb_refresh_mbufs(struct rx_ring *, int);
254
255static void	igb_register_vlan(void *, struct ifnet *, u16);
256static void	igb_unregister_vlan(void *, struct ifnet *, u16);
257static void	igb_setup_vlan_hw_support(struct adapter *);
258
259static int	igb_xmit(struct tx_ring *, struct mbuf **);
260static int	igb_dma_malloc(struct adapter *, bus_size_t,
261		    struct igb_dma_alloc *, int);
262static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
263static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
264static void	igb_print_nvm_info(struct adapter *);
265static int 	igb_is_valid_ether_addr(u8 *);
266static void     igb_add_hw_stats(struct adapter *);
267
268static void	igb_vf_init_stats(struct adapter *);
269static void	igb_update_vf_stats_counters(struct adapter *);
270
271/* Management and WOL Support */
272static void	igb_init_manageability(struct adapter *);
273static void	igb_release_manageability(struct adapter *);
274static void     igb_get_hw_control(struct adapter *);
275static void     igb_release_hw_control(struct adapter *);
276static void     igb_enable_wakeup(device_t);
277static void     igb_led_func(void *, int);
278
279static int	igb_irq_fast(void *);
280static void	igb_msix_que(void *);
281static void	igb_msix_link(void *);
282static void	igb_handle_que(void *context, int pending);
283static void	igb_handle_link(void *context, int pending);
284static void	igb_handle_link_locked(struct adapter *);
285
286static void	igb_set_sysctl_value(struct adapter *, const char *,
287		    const char *, int *, int);
288static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
289static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
290static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t igb_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t igb_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, igb_probe),
303	DEVMETHOD(device_attach, igb_attach),
304	DEVMETHOD(device_detach, igb_detach),
305	DEVMETHOD(device_shutdown, igb_shutdown),
306	DEVMETHOD(device_suspend, igb_suspend),
307	DEVMETHOD(device_resume, igb_resume),
308	DEVMETHOD_END
309};
310
311static driver_t igb_driver = {
312	"igb", igb_methods, sizeof(struct adapter),
313};
314
315static devclass_t igb_devclass;
316DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
317MODULE_DEPEND(igb, pci, 1, 1, 1);
318MODULE_DEPEND(igb, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
325
326/* Descriptor defaults */
327static int igb_rxd = IGB_DEFAULT_RXD;
328static int igb_txd = IGB_DEFAULT_TXD;
329TUNABLE_INT("hw.igb.rxd", &igb_rxd);
330TUNABLE_INT("hw.igb.txd", &igb_txd);
331SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
332    "Number of receive descriptors per queue");
333SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
334    "Number of transmit descriptors per queue");
335
336/*
337** AIM: Adaptive Interrupt Moderation
338** which means that the interrupt rate
339** is varied over time based on the
340** traffic for that interrupt vector
341*/
342static int igb_enable_aim = TRUE;
343TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
344SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
345    "Enable adaptive interrupt moderation");
346
347/*
348 * MSIX should be the default for best performance,
349 * but this allows it to be forced off for testing.
350 */
351static int igb_enable_msix = 1;
352TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
353SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
354    "Enable MSI-X interrupts");
355
356/*
357** Tuneable Interrupt rate
358*/
359static int igb_max_interrupt_rate = 8000;
360TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
361SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
362    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
363
364#ifndef IGB_LEGACY_TX
365/*
366** Tuneable number of buffers in the buf-ring (drbr_xxx)
367*/
368static int igb_buf_ring_size = IGB_BR_SIZE;
369TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
370SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
371    &igb_buf_ring_size, 0, "Size of the bufring");
372#endif
373
374/*
375** Header split causes the packet header to
376** be dma'd to a seperate mbuf from the payload.
377** this can have memory alignment benefits. But
378** another plus is that small packets often fit
379** into the header and thus use no cluster. Its
380** a very workload dependent type feature.
381*/
382static int igb_header_split = FALSE;
383TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
384SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
385    "Enable receive mbuf header split");
386
387/*
388** This will autoconfigure based on the
389** number of CPUs and max supported
390** MSIX messages if left at 0.
391*/
392static int igb_num_queues = 0;
393TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
394SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
395    "Number of queues to configure, 0 indicates autoconfigure");
396
397/*
398** Global variable to store last used CPU when binding queues
399** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
400** queue is bound to a cpu.
401*/
402static int igb_last_bind_cpu = -1;
403
404/* How many packets rxeof tries to clean at a time */
405static int igb_rx_process_limit = 100;
406TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
407SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408    &igb_rx_process_limit, 0,
409    "Maximum number of received packets to process at a time, -1 means unlimited");
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_igb_netmap.h>
413#endif /* DEV_NETMAP */
414/*********************************************************************
415 *  Device identification routine
416 *
417 *  igb_probe determines if the driver should be loaded on
418 *  adapter based on PCI vendor/device id of the adapter.
419 *
420 *  return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424igb_probe(device_t dev)
425{
426	char		adapter_name[60];
427	uint16_t	pci_vendor_id = 0;
428	uint16_t	pci_device_id = 0;
429	uint16_t	pci_subvendor_id = 0;
430	uint16_t	pci_subdevice_id = 0;
431	igb_vendor_info_t *ent;
432
433	INIT_DEBUGOUT("igb_probe: begin");
434
435	pci_vendor_id = pci_get_vendor(dev);
436	if (pci_vendor_id != IGB_VENDOR_ID)
437		return (ENXIO);
438
439	pci_device_id = pci_get_device(dev);
440	pci_subvendor_id = pci_get_subvendor(dev);
441	pci_subdevice_id = pci_get_subdevice(dev);
442
443	ent = igb_vendor_info_array;
444	while (ent->vendor_id != 0) {
445		if ((pci_vendor_id == ent->vendor_id) &&
446		    (pci_device_id == ent->device_id) &&
447
448		    ((pci_subvendor_id == ent->subvendor_id) ||
449		    (ent->subvendor_id == PCI_ANY_ID)) &&
450
451		    ((pci_subdevice_id == ent->subdevice_id) ||
452		    (ent->subdevice_id == PCI_ANY_ID))) {
453			sprintf(adapter_name, "%s %s",
454				igb_strings[ent->index],
455				igb_driver_version);
456			device_set_desc_copy(dev, adapter_name);
457			return (BUS_PROBE_DEFAULT);
458		}
459		ent++;
460	}
461
462	return (ENXIO);
463}
464
465/*********************************************************************
466 *  Device initialization routine
467 *
468 *  The attach entry point is called when the driver is being loaded.
469 *  This routine identifies the type of hardware, allocates all resources
470 *  and initializes the hardware.
471 *
472 *  return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476igb_attach(device_t dev)
477{
478	struct adapter	*adapter;
479	int		error = 0;
480	u16		eeprom_data;
481
482	INIT_DEBUGOUT("igb_attach: begin");
483
484	if (resource_disabled("igb", device_get_unit(dev))) {
485		device_printf(dev, "Disabled by device hint\n");
486		return (ENXIO);
487	}
488
489	adapter = device_get_softc(dev);
490	adapter->dev = adapter->osdep.dev = dev;
491	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493	/* SYSCTL stuff */
494	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497	    igb_sysctl_nvm_info, "I", "NVM Information");
498
499	igb_set_sysctl_value(adapter, "enable_aim",
500	    "Interrupt Moderation", &adapter->enable_aim,
501	    igb_enable_aim);
502
503	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510	/* Determine hardware and mac info */
511	igb_identify_hardware(adapter);
512
513	/* Setup PCI resources */
514	if (igb_allocate_pci_resources(adapter)) {
515		device_printf(dev, "Allocation of PCI resources failed\n");
516		error = ENXIO;
517		goto err_pci;
518	}
519
520	/* Do Shared Code initialization */
521	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522		device_printf(dev, "Setup of Shared code failed\n");
523		error = ENXIO;
524		goto err_pci;
525	}
526
527	e1000_get_bus_info(&adapter->hw);
528
529	/* Sysctl for limiting the amount of work done in the taskqueue */
530	igb_set_sysctl_value(adapter, "rx_processing_limit",
531	    "max number of rx packets to process",
532	    &adapter->rx_process_limit, igb_rx_process_limit);
533
534	/*
535	 * Validate number of transmit and receive descriptors. It
536	 * must not exceed hardware maximum, and must be multiple
537	 * of E1000_DBA_ALIGN.
538	 */
539	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542		    IGB_DEFAULT_TXD, igb_txd);
543		adapter->num_tx_desc = IGB_DEFAULT_TXD;
544	} else
545		adapter->num_tx_desc = igb_txd;
546	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549		    IGB_DEFAULT_RXD, igb_rxd);
550		adapter->num_rx_desc = IGB_DEFAULT_RXD;
551	} else
552		adapter->num_rx_desc = igb_rxd;
553
554	adapter->hw.mac.autoneg = DO_AUTO_NEG;
555	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558	/* Copper options */
559	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560		adapter->hw.phy.mdix = AUTO_ALL_MODES;
561		adapter->hw.phy.disable_polarity_correction = FALSE;
562		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563	}
564
565	/*
566	 * Set the frame limits assuming
567	 * standard ethernet sized frames.
568	 */
569	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571	/*
572	** Allocate and Setup Queues
573	*/
574	if (igb_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/* Allocate the appropriate stats memory */
580	if (adapter->vf_ifp) {
581		adapter->stats =
582		    (struct e1000_vf_stats *)malloc(sizeof \
583		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584		igb_vf_init_stats(adapter);
585	} else
586		adapter->stats =
587		    (struct e1000_hw_stats *)malloc(sizeof \
588		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589	if (adapter->stats == NULL) {
590		device_printf(dev, "Can not allocate stats memory\n");
591		error = ENOMEM;
592		goto err_late;
593	}
594
595	/* Allocate multicast array memory. */
596	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598	if (adapter->mta == NULL) {
599		device_printf(dev, "Can not allocate multicast setup array\n");
600		error = ENOMEM;
601		goto err_late;
602	}
603
604	/* Some adapter-specific advanced features */
605	if (adapter->hw.mac.type >= e1000_i350) {
606		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613		    adapter, 0, igb_sysctl_eee, "I",
614		    "Disable Energy Efficient Ethernet");
615		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616			if (adapter->hw.mac.type == e1000_i354)
617				e1000_set_eee_i354(&adapter->hw);
618			else
619				e1000_set_eee_i350(&adapter->hw);
620		}
621	}
622
623	/*
624	** Start from a known state, this is
625	** important in reading the nvm and
626	** mac from that.
627	*/
628	e1000_reset_hw(&adapter->hw);
629
630	/* Make sure we have a good EEPROM before we read from it */
631	if (((adapter->hw.mac.type != e1000_i210) &&
632	    (adapter->hw.mac.type != e1000_i211)) &&
633	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634		/*
635		** Some PCI-E parts fail the first check due to
636		** the link being in sleep state, call it again,
637		** if it fails a second time its a real issue.
638		*/
639		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640			device_printf(dev,
641			    "The EEPROM Checksum Is Not Valid\n");
642			error = EIO;
643			goto err_late;
644		}
645	}
646
647	/*
648	** Copy the permanent MAC address out of the EEPROM
649	*/
650	if (e1000_read_mac_addr(&adapter->hw) < 0) {
651		device_printf(dev, "EEPROM read error while reading MAC"
652		    " address\n");
653		error = EIO;
654		goto err_late;
655	}
656	/* Check its sanity */
657	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658		device_printf(dev, "Invalid MAC address\n");
659		error = EIO;
660		goto err_late;
661	}
662
663	/* Setup OS specific network interface */
664	if (igb_setup_interface(dev, adapter) != 0)
665		goto err_late;
666
667	/* Now get a good starting state */
668	igb_reset(adapter);
669
670	/* Initialize statistics */
671	igb_update_stats_counters(adapter);
672
673	adapter->hw.mac.get_link_status = 1;
674	igb_update_link_status(adapter);
675
676	/* Indicate SOL/IDER usage */
677	if (e1000_check_reset_block(&adapter->hw))
678		device_printf(dev,
679		    "PHY reset is blocked due to SOL/IDER session.\n");
680
681	/* Determine if we have to control management hardware */
682	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684	/*
685	 * Setup Wake-on-Lan
686	 */
687	/* APME bit in EEPROM is mapped to WUC.APME */
688	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689	if (eeprom_data)
690		adapter->wol = E1000_WUFC_MAG;
691
692	/* Register for VLAN events */
693	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698	igb_add_hw_stats(adapter);
699
700	/* Tell the stack that the interface is not active */
701	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
703
704	adapter->led_dev = led_create(igb_led_func, adapter,
705	    device_get_nameunit(dev));
706
707	/*
708	** Configure Interrupts
709	*/
710	if ((adapter->msix > 1) && (igb_enable_msix))
711		error = igb_allocate_msix(adapter);
712	else /* MSI or Legacy */
713		error = igb_allocate_legacy(adapter);
714	if (error)
715		goto err_late;
716
717#ifdef DEV_NETMAP
718	igb_netmap_attach(adapter);
719#endif /* DEV_NETMAP */
720	INIT_DEBUGOUT("igb_attach: end");
721
722	return (0);
723
724err_late:
725	igb_detach(dev);
726	igb_free_transmit_structures(adapter);
727	igb_free_receive_structures(adapter);
728	igb_release_hw_control(adapter);
729err_pci:
730	igb_free_pci_resources(adapter);
731	if (adapter->ifp != NULL)
732		if_free(adapter->ifp);
733	free(adapter->mta, M_DEVBUF);
734	IGB_CORE_LOCK_DESTROY(adapter);
735
736	return (error);
737}
738
739/*********************************************************************
740 *  Device removal routine
741 *
742 *  The detach entry point is called when the driver is being removed.
743 *  This routine stops the adapter and deallocates all the resources
744 *  that were allocated for driver operation.
745 *
746 *  return 0 on success, positive on failure
747 *********************************************************************/
748
749static int
750igb_detach(device_t dev)
751{
752	struct adapter	*adapter = device_get_softc(dev);
753	struct ifnet	*ifp = adapter->ifp;
754
755	INIT_DEBUGOUT("igb_detach: begin");
756
757	/* Make sure VLANS are not using driver */
758	if (adapter->ifp->if_vlantrunk != NULL) {
759		device_printf(dev,"Vlan in use, detach first\n");
760		return (EBUSY);
761	}
762
763	ether_ifdetach(adapter->ifp);
764
765	if (adapter->led_dev != NULL)
766		led_destroy(adapter->led_dev);
767
768#ifdef DEVICE_POLLING
769	if (ifp->if_capenable & IFCAP_POLLING)
770		ether_poll_deregister(ifp);
771#endif
772
773	IGB_CORE_LOCK(adapter);
774	adapter->in_detach = 1;
775	igb_stop(adapter);
776	IGB_CORE_UNLOCK(adapter);
777
778	e1000_phy_hw_reset(&adapter->hw);
779
780	/* Give control back to firmware */
781	igb_release_manageability(adapter);
782	igb_release_hw_control(adapter);
783
784	if (adapter->wol) {
785		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787		igb_enable_wakeup(dev);
788	}
789
790	/* Unregister VLAN events */
791	if (adapter->vlan_attach != NULL)
792		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793	if (adapter->vlan_detach != NULL)
794		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796	callout_drain(&adapter->timer);
797
798#ifdef DEV_NETMAP
799	netmap_detach(adapter->ifp);
800#endif /* DEV_NETMAP */
801	igb_free_pci_resources(adapter);
802	bus_generic_detach(dev);
803	if_free(ifp);
804
805	igb_free_transmit_structures(adapter);
806	igb_free_receive_structures(adapter);
807	if (adapter->mta != NULL)
808		free(adapter->mta, M_DEVBUF);
809
810	IGB_CORE_LOCK_DESTROY(adapter);
811
812	return (0);
813}
814
815/*********************************************************************
816 *
817 *  Shutdown entry point
818 *
819 **********************************************************************/
820
821static int
822igb_shutdown(device_t dev)
823{
824	return igb_suspend(dev);
825}
826
827/*
828 * Suspend/resume device methods.
829 */
830static int
831igb_suspend(device_t dev)
832{
833	struct adapter *adapter = device_get_softc(dev);
834
835	IGB_CORE_LOCK(adapter);
836
837	igb_stop(adapter);
838
839        igb_release_manageability(adapter);
840	igb_release_hw_control(adapter);
841
842        if (adapter->wol) {
843                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845                igb_enable_wakeup(dev);
846        }
847
848	IGB_CORE_UNLOCK(adapter);
849
850	return bus_generic_suspend(dev);
851}
852
853static int
854igb_resume(device_t dev)
855{
856	struct adapter *adapter = device_get_softc(dev);
857	struct tx_ring	*txr = adapter->tx_rings;
858	struct ifnet *ifp = adapter->ifp;
859
860	IGB_CORE_LOCK(adapter);
861	igb_init_locked(adapter);
862	igb_init_manageability(adapter);
863
864	if ((ifp->if_flags & IFF_UP) &&
865	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867			IGB_TX_LOCK(txr);
868#ifndef IGB_LEGACY_TX
869			/* Process the stack queue only if not depleted */
870			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871			    !drbr_empty(ifp, txr->br))
872				igb_mq_start_locked(ifp, txr);
873#else
874			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875				igb_start_locked(txr, ifp);
876#endif
877			IGB_TX_UNLOCK(txr);
878		}
879	}
880	IGB_CORE_UNLOCK(adapter);
881
882	return bus_generic_resume(dev);
883}
884
885
886#ifdef IGB_LEGACY_TX
887
888/*********************************************************************
889 *  Transmit entry point
890 *
891 *  igb_start is called by the stack to initiate a transmit.
892 *  The driver will remain in this routine as long as there are
893 *  packets to transmit and transmit resources are available.
894 *  In case resources are not available stack is notified and
895 *  the packet is requeued.
896 **********************************************************************/
897
898static void
899igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900{
901	struct adapter	*adapter = ifp->if_softc;
902	struct mbuf	*m_head;
903
904	IGB_TX_LOCK_ASSERT(txr);
905
906	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907	    IFF_DRV_RUNNING)
908		return;
909	if (!adapter->link_active)
910		return;
911
912	/* Call cleanup if number of TX descriptors low */
913	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914		igb_txeof(txr);
915
916	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917		if (txr->tx_avail <= IGB_MAX_SCATTER) {
918			txr->queue_status |= IGB_QUEUE_DEPLETED;
919			break;
920		}
921		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922		if (m_head == NULL)
923			break;
924		/*
925		 *  Encapsulation can modify our pointer, and or make it
926		 *  NULL on failure.  In that event, we can't requeue.
927		 */
928		if (igb_xmit(txr, &m_head)) {
929			if (m_head != NULL)
930				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931			if (txr->tx_avail <= IGB_MAX_SCATTER)
932				txr->queue_status |= IGB_QUEUE_DEPLETED;
933			break;
934		}
935
936		/* Send a copy of the frame to the BPF listener */
937		ETHER_BPF_MTAP(ifp, m_head);
938
939		/* Set watchdog on */
940		txr->watchdog_time = ticks;
941		txr->queue_status |= IGB_QUEUE_WORKING;
942	}
943}
944
945/*
946 * Legacy TX driver routine, called from the
947 * stack, always uses tx[0], and spins for it.
948 * Should not be used with multiqueue tx
949 */
950static void
951igb_start(struct ifnet *ifp)
952{
953	struct adapter	*adapter = ifp->if_softc;
954	struct tx_ring	*txr = adapter->tx_rings;
955
956	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957		IGB_TX_LOCK(txr);
958		igb_start_locked(txr, ifp);
959		IGB_TX_UNLOCK(txr);
960	}
961	return;
962}
963
964#else /* ~IGB_LEGACY_TX */
965
966/*
967** Multiqueue Transmit Entry:
968**  quick turnaround to the stack
969**
970*/
971static int
972igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973{
974	struct adapter		*adapter = ifp->if_softc;
975	struct igb_queue	*que;
976	struct tx_ring		*txr;
977	int 			i, err = 0;
978
979	/* Which queue to use */
980	if ((m->m_flags & M_FLOWID) != 0)
981		i = m->m_pkthdr.flowid % adapter->num_queues;
982	else
983		i = curcpu % adapter->num_queues;
984	txr = &adapter->tx_rings[i];
985	que = &adapter->queues[i];
986
987	err = drbr_enqueue(ifp, txr->br, m);
988	if (err)
989		return (err);
990	if (IGB_TX_TRYLOCK(txr)) {
991		err = igb_mq_start_locked(ifp, txr);
992		IGB_TX_UNLOCK(txr);
993	} else
994		taskqueue_enqueue(que->tq, &txr->txq_task);
995
996	return (err);
997}
998
999static int
1000igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1001{
1002	struct adapter  *adapter = txr->adapter;
1003        struct mbuf     *next;
1004        int             err = 0, enq = 0;
1005
1006	IGB_TX_LOCK_ASSERT(txr);
1007
1008	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1009	    adapter->link_active == 0)
1010		return (ENETDOWN);
1011
1012
1013	/* Process the queue */
1014	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1015		if ((err = igb_xmit(txr, &next)) != 0) {
1016			if (next == NULL) {
1017				/* It was freed, move forward */
1018				drbr_advance(ifp, txr->br);
1019			} else {
1020				/*
1021				 * Still have one left, it may not be
1022				 * the same since the transmit function
1023				 * may have changed it.
1024				 */
1025				drbr_putback(ifp, txr->br, next);
1026			}
1027			break;
1028		}
1029		drbr_advance(ifp, txr->br);
1030		enq++;
1031		ifp->if_obytes += next->m_pkthdr.len;
1032		if (next->m_flags & M_MCAST)
1033			ifp->if_omcasts++;
1034		ETHER_BPF_MTAP(ifp, next);
1035		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1036			break;
1037	}
1038	if (enq > 0) {
1039		/* Set the watchdog */
1040		txr->queue_status |= IGB_QUEUE_WORKING;
1041		txr->watchdog_time = ticks;
1042	}
1043	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1044		igb_txeof(txr);
1045	if (txr->tx_avail <= IGB_MAX_SCATTER)
1046		txr->queue_status |= IGB_QUEUE_DEPLETED;
1047	return (err);
1048}
1049
1050/*
1051 * Called from a taskqueue to drain queued transmit packets.
1052 */
1053static void
1054igb_deferred_mq_start(void *arg, int pending)
1055{
1056	struct tx_ring *txr = arg;
1057	struct adapter *adapter = txr->adapter;
1058	struct ifnet *ifp = adapter->ifp;
1059
1060	IGB_TX_LOCK(txr);
1061	if (!drbr_empty(ifp, txr->br))
1062		igb_mq_start_locked(ifp, txr);
1063	IGB_TX_UNLOCK(txr);
1064}
1065
1066/*
1067** Flush all ring buffers
1068*/
1069static void
1070igb_qflush(struct ifnet *ifp)
1071{
1072	struct adapter	*adapter = ifp->if_softc;
1073	struct tx_ring	*txr = adapter->tx_rings;
1074	struct mbuf	*m;
1075
1076	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1077		IGB_TX_LOCK(txr);
1078		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1079			m_freem(m);
1080		IGB_TX_UNLOCK(txr);
1081	}
1082	if_qflush(ifp);
1083}
1084#endif /* ~IGB_LEGACY_TX */
1085
1086/*********************************************************************
1087 *  Ioctl entry point
1088 *
1089 *  igb_ioctl is called when the user wants to configure the
1090 *  interface.
1091 *
1092 *  return 0 on success, positive on failure
1093 **********************************************************************/
1094
1095static int
1096igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1097{
1098	struct adapter	*adapter = ifp->if_softc;
1099	struct ifreq	*ifr = (struct ifreq *)data;
1100#if defined(INET) || defined(INET6)
1101	struct ifaddr	*ifa = (struct ifaddr *)data;
1102#endif
1103	bool		avoid_reset = FALSE;
1104	int		error = 0;
1105
1106	if (adapter->in_detach)
1107		return (error);
1108
1109	switch (command) {
1110	case SIOCSIFADDR:
1111#ifdef INET
1112		if (ifa->ifa_addr->sa_family == AF_INET)
1113			avoid_reset = TRUE;
1114#endif
1115#ifdef INET6
1116		if (ifa->ifa_addr->sa_family == AF_INET6)
1117			avoid_reset = TRUE;
1118#endif
1119		/*
1120		** Calling init results in link renegotiation,
1121		** so we avoid doing it when possible.
1122		*/
1123		if (avoid_reset) {
1124			ifp->if_flags |= IFF_UP;
1125			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1126				igb_init(adapter);
1127#ifdef INET
1128			if (!(ifp->if_flags & IFF_NOARP))
1129				arp_ifinit(ifp, ifa);
1130#endif
1131		} else
1132			error = ether_ioctl(ifp, command, data);
1133		break;
1134	case SIOCSIFMTU:
1135	    {
1136		int max_frame_size;
1137
1138		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1139
1140		IGB_CORE_LOCK(adapter);
1141		max_frame_size = 9234;
1142		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143		    ETHER_CRC_LEN) {
1144			IGB_CORE_UNLOCK(adapter);
1145			error = EINVAL;
1146			break;
1147		}
1148
1149		ifp->if_mtu = ifr->ifr_mtu;
1150		adapter->max_frame_size =
1151		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152		igb_init_locked(adapter);
1153		IGB_CORE_UNLOCK(adapter);
1154		break;
1155	    }
1156	case SIOCSIFFLAGS:
1157		IOCTL_DEBUGOUT("ioctl rcv'd:\
1158		    SIOCSIFFLAGS (Set Interface Flags)");
1159		IGB_CORE_LOCK(adapter);
1160		if (ifp->if_flags & IFF_UP) {
1161			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162				if ((ifp->if_flags ^ adapter->if_flags) &
1163				    (IFF_PROMISC | IFF_ALLMULTI)) {
1164					igb_disable_promisc(adapter);
1165					igb_set_promisc(adapter);
1166				}
1167			} else
1168				igb_init_locked(adapter);
1169		} else
1170			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171				igb_stop(adapter);
1172		adapter->if_flags = ifp->if_flags;
1173		IGB_CORE_UNLOCK(adapter);
1174		break;
1175	case SIOCADDMULTI:
1176	case SIOCDELMULTI:
1177		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179			IGB_CORE_LOCK(adapter);
1180			igb_disable_intr(adapter);
1181			igb_set_multi(adapter);
1182#ifdef DEVICE_POLLING
1183			if (!(ifp->if_capenable & IFCAP_POLLING))
1184#endif
1185				igb_enable_intr(adapter);
1186			IGB_CORE_UNLOCK(adapter);
1187		}
1188		break;
1189	case SIOCSIFMEDIA:
1190		/* Check SOL/IDER usage */
1191		IGB_CORE_LOCK(adapter);
1192		if (e1000_check_reset_block(&adapter->hw)) {
1193			IGB_CORE_UNLOCK(adapter);
1194			device_printf(adapter->dev, "Media change is"
1195			    " blocked due to SOL/IDER session.\n");
1196			break;
1197		}
1198		IGB_CORE_UNLOCK(adapter);
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(igb_poll, ifp);
1215				if (error)
1216					return (error);
1217				IGB_CORE_LOCK(adapter);
1218				igb_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				IGB_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				IGB_CORE_LOCK(adapter);
1225				igb_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				IGB_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_TSO6) {
1240			ifp->if_capenable ^= IFCAP_TSO6;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWTAGGING) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWFILTER) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1249			reinit = 1;
1250		}
1251		if (mask & IFCAP_VLAN_HWTSO) {
1252			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1253			reinit = 1;
1254		}
1255		if (mask & IFCAP_LRO) {
1256			ifp->if_capenable ^= IFCAP_LRO;
1257			reinit = 1;
1258		}
1259		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260			igb_init(adapter);
1261		VLAN_CAPABILITIES(ifp);
1262		break;
1263	    }
1264
1265	default:
1266		error = ether_ioctl(ifp, command, data);
1267		break;
1268	}
1269
1270	return (error);
1271}
1272
1273
1274/*********************************************************************
1275 *  Init entry point
1276 *
1277 *  This routine is used in two ways. It is used by the stack as
1278 *  init entry point in network interface structure. It is also used
1279 *  by the driver as a hw/sw initialization routine to get to a
1280 *  consistent state.
1281 *
1282 *  return 0 on success, positive on failure
1283 **********************************************************************/
1284
1285static void
1286igb_init_locked(struct adapter *adapter)
1287{
1288	struct ifnet	*ifp = adapter->ifp;
1289	device_t	dev = adapter->dev;
1290
1291	INIT_DEBUGOUT("igb_init: begin");
1292
1293	IGB_CORE_LOCK_ASSERT(adapter);
1294
1295	igb_disable_intr(adapter);
1296	callout_stop(&adapter->timer);
1297
1298	/* Get the latest mac address, User can use a LAA */
1299        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300              ETHER_ADDR_LEN);
1301
1302	/* Put the address into the Receive Address Array */
1303	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304
1305	igb_reset(adapter);
1306	igb_update_link_status(adapter);
1307
1308	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1309
1310	/* Set hardware offload abilities */
1311	ifp->if_hwassist = 0;
1312	if (ifp->if_capenable & IFCAP_TXCSUM) {
1313		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1314#if __FreeBSD_version >= 800000
1315		if (adapter->hw.mac.type == e1000_82576)
1316			ifp->if_hwassist |= CSUM_SCTP;
1317#endif
1318	}
1319
1320	if (ifp->if_capenable & IFCAP_TSO)
1321		ifp->if_hwassist |= CSUM_TSO;
1322
1323	/* Configure for OS presence */
1324	igb_init_manageability(adapter);
1325
1326	/* Prepare transmit descriptors and buffers */
1327	igb_setup_transmit_structures(adapter);
1328	igb_initialize_transmit_units(adapter);
1329
1330	/* Setup Multicast table */
1331	igb_set_multi(adapter);
1332
1333	/*
1334	** Figure out the desired mbuf pool
1335	** for doing jumbo/packetsplit
1336	*/
1337	if (adapter->max_frame_size <= 2048)
1338		adapter->rx_mbuf_sz = MCLBYTES;
1339	else if (adapter->max_frame_size <= 4096)
1340		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341	else
1342		adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344	/* Prepare receive descriptors and buffers */
1345	if (igb_setup_receive_structures(adapter)) {
1346		device_printf(dev, "Could not setup receive structures\n");
1347		return;
1348	}
1349	igb_initialize_receive_units(adapter);
1350
1351        /* Enable VLAN support */
1352	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1353		igb_setup_vlan_hw_support(adapter);
1354
1355	/* Don't lose promiscuous settings */
1356	igb_set_promisc(adapter);
1357
1358	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1362	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364	if (adapter->msix > 1) /* Set up queue routing */
1365		igb_configure_queues(adapter);
1366
1367	/* this clears any pending interrupts */
1368	E1000_READ_REG(&adapter->hw, E1000_ICR);
1369#ifdef DEVICE_POLLING
1370	/*
1371	 * Only enable interrupts if we are not polling, make sure
1372	 * they are off otherwise.
1373	 */
1374	if (ifp->if_capenable & IFCAP_POLLING)
1375		igb_disable_intr(adapter);
1376	else
1377#endif /* DEVICE_POLLING */
1378	{
1379		igb_enable_intr(adapter);
1380		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1381	}
1382
1383	/* Set Energy Efficient Ethernet */
1384	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1385		if (adapter->hw.mac.type == e1000_i354)
1386			e1000_set_eee_i354(&adapter->hw);
1387		else
1388			e1000_set_eee_i350(&adapter->hw);
1389	}
1390}
1391
1392static void
1393igb_init(void *arg)
1394{
1395	struct adapter *adapter = arg;
1396
1397	IGB_CORE_LOCK(adapter);
1398	igb_init_locked(adapter);
1399	IGB_CORE_UNLOCK(adapter);
1400}
1401
1402
1403static void
1404igb_handle_que(void *context, int pending)
1405{
1406	struct igb_queue *que = context;
1407	struct adapter *adapter = que->adapter;
1408	struct tx_ring *txr = que->txr;
1409	struct ifnet	*ifp = adapter->ifp;
1410
1411	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1412		bool	more;
1413
1414		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1415
1416		IGB_TX_LOCK(txr);
1417		igb_txeof(txr);
1418#ifndef IGB_LEGACY_TX
1419		/* Process the stack queue only if not depleted */
1420		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1421		    !drbr_empty(ifp, txr->br))
1422			igb_mq_start_locked(ifp, txr);
1423#else
1424		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1425			igb_start_locked(txr, ifp);
1426#endif
1427		IGB_TX_UNLOCK(txr);
1428		/* Do we need another? */
1429		if (more) {
1430			taskqueue_enqueue(que->tq, &que->que_task);
1431			return;
1432		}
1433	}
1434
1435#ifdef DEVICE_POLLING
1436	if (ifp->if_capenable & IFCAP_POLLING)
1437		return;
1438#endif
1439	/* Reenable this interrupt */
1440	if (que->eims)
1441		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1442	else
1443		igb_enable_intr(adapter);
1444}
1445
1446/* Deal with link in a sleepable context */
1447static void
1448igb_handle_link(void *context, int pending)
1449{
1450	struct adapter *adapter = context;
1451
1452	IGB_CORE_LOCK(adapter);
1453	igb_handle_link_locked(adapter);
1454	IGB_CORE_UNLOCK(adapter);
1455}
1456
1457static void
1458igb_handle_link_locked(struct adapter *adapter)
1459{
1460	struct tx_ring	*txr = adapter->tx_rings;
1461	struct ifnet *ifp = adapter->ifp;
1462
1463	IGB_CORE_LOCK_ASSERT(adapter);
1464	adapter->hw.mac.get_link_status = 1;
1465	igb_update_link_status(adapter);
1466	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1467		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1468			IGB_TX_LOCK(txr);
1469#ifndef IGB_LEGACY_TX
1470			/* Process the stack queue only if not depleted */
1471			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1472			    !drbr_empty(ifp, txr->br))
1473				igb_mq_start_locked(ifp, txr);
1474#else
1475			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1476				igb_start_locked(txr, ifp);
1477#endif
1478			IGB_TX_UNLOCK(txr);
1479		}
1480	}
1481}
1482
1483/*********************************************************************
1484 *
1485 *  MSI/Legacy Deferred
1486 *  Interrupt Service routine
1487 *
1488 *********************************************************************/
1489static int
1490igb_irq_fast(void *arg)
1491{
1492	struct adapter		*adapter = arg;
1493	struct igb_queue	*que = adapter->queues;
1494	u32			reg_icr;
1495
1496
1497	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1498
1499	/* Hot eject?  */
1500	if (reg_icr == 0xffffffff)
1501		return FILTER_STRAY;
1502
1503	/* Definitely not our interrupt.  */
1504	if (reg_icr == 0x0)
1505		return FILTER_STRAY;
1506
1507	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1508		return FILTER_STRAY;
1509
1510	/*
1511	 * Mask interrupts until the taskqueue is finished running.  This is
1512	 * cheap, just assume that it is needed.  This also works around the
1513	 * MSI message reordering errata on certain systems.
1514	 */
1515	igb_disable_intr(adapter);
1516	taskqueue_enqueue(que->tq, &que->que_task);
1517
1518	/* Link status change */
1519	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1520		taskqueue_enqueue(que->tq, &adapter->link_task);
1521
1522	if (reg_icr & E1000_ICR_RXO)
1523		adapter->rx_overruns++;
1524	return FILTER_HANDLED;
1525}
1526
1527#ifdef DEVICE_POLLING
1528#if __FreeBSD_version >= 800000
1529#define POLL_RETURN_COUNT(a) (a)
1530static int
1531#else
1532#define POLL_RETURN_COUNT(a)
1533static void
1534#endif
1535igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1536{
1537	struct adapter		*adapter = ifp->if_softc;
1538	struct igb_queue	*que;
1539	struct tx_ring		*txr;
1540	u32			reg_icr, rx_done = 0;
1541	u32			loop = IGB_MAX_LOOP;
1542	bool			more;
1543
1544	IGB_CORE_LOCK(adapter);
1545	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1546		IGB_CORE_UNLOCK(adapter);
1547		return POLL_RETURN_COUNT(rx_done);
1548	}
1549
1550	if (cmd == POLL_AND_CHECK_STATUS) {
1551		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552		/* Link status change */
1553		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1554			igb_handle_link_locked(adapter);
1555
1556		if (reg_icr & E1000_ICR_RXO)
1557			adapter->rx_overruns++;
1558	}
1559	IGB_CORE_UNLOCK(adapter);
1560
1561	for (int i = 0; i < adapter->num_queues; i++) {
1562		que = &adapter->queues[i];
1563		txr = que->txr;
1564
1565		igb_rxeof(que, count, &rx_done);
1566
1567		IGB_TX_LOCK(txr);
1568		do {
1569			more = igb_txeof(txr);
1570		} while (loop-- && more);
1571#ifndef IGB_LEGACY_TX
1572		if (!drbr_empty(ifp, txr->br))
1573			igb_mq_start_locked(ifp, txr);
1574#else
1575		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576			igb_start_locked(txr, ifp);
1577#endif
1578		IGB_TX_UNLOCK(txr);
1579	}
1580
1581	return POLL_RETURN_COUNT(rx_done);
1582}
1583#endif /* DEVICE_POLLING */
1584
1585/*********************************************************************
1586 *
1587 *  MSIX Que Interrupt Service routine
1588 *
1589 **********************************************************************/
1590static void
1591igb_msix_que(void *arg)
1592{
1593	struct igb_queue *que = arg;
1594	struct adapter *adapter = que->adapter;
1595	struct ifnet   *ifp = adapter->ifp;
1596	struct tx_ring *txr = que->txr;
1597	struct rx_ring *rxr = que->rxr;
1598	u32		newitr = 0;
1599	bool		more_rx;
1600
1601	/* Ignore spurious interrupts */
1602	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1603		return;
1604
1605	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1606	++que->irqs;
1607
1608	IGB_TX_LOCK(txr);
1609	igb_txeof(txr);
1610#ifndef IGB_LEGACY_TX
1611	/* Process the stack queue only if not depleted */
1612	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1613	    !drbr_empty(ifp, txr->br))
1614		igb_mq_start_locked(ifp, txr);
1615#else
1616	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1617		igb_start_locked(txr, ifp);
1618#endif
1619	IGB_TX_UNLOCK(txr);
1620
1621	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1622
1623	if (adapter->enable_aim == FALSE)
1624		goto no_calc;
1625	/*
1626	** Do Adaptive Interrupt Moderation:
1627        **  - Write out last calculated setting
1628	**  - Calculate based on average size over
1629	**    the last interval.
1630	*/
1631        if (que->eitr_setting)
1632                E1000_WRITE_REG(&adapter->hw,
1633                    E1000_EITR(que->msix), que->eitr_setting);
1634
1635        que->eitr_setting = 0;
1636
1637        /* Idle, do nothing */
1638        if ((txr->bytes == 0) && (rxr->bytes == 0))
1639                goto no_calc;
1640
1641        /* Used half Default if sub-gig */
1642        if (adapter->link_speed != 1000)
1643                newitr = IGB_DEFAULT_ITR / 2;
1644        else {
1645		if ((txr->bytes) && (txr->packets))
1646                	newitr = txr->bytes/txr->packets;
1647		if ((rxr->bytes) && (rxr->packets))
1648			newitr = max(newitr,
1649			    (rxr->bytes / rxr->packets));
1650                newitr += 24; /* account for hardware frame, crc */
1651		/* set an upper boundary */
1652		newitr = min(newitr, 3000);
1653		/* Be nice to the mid range */
1654                if ((newitr > 300) && (newitr < 1200))
1655                        newitr = (newitr / 3);
1656                else
1657                        newitr = (newitr / 2);
1658        }
1659        newitr &= 0x7FFC;  /* Mask invalid bits */
1660        if (adapter->hw.mac.type == e1000_82575)
1661                newitr |= newitr << 16;
1662        else
1663                newitr |= E1000_EITR_CNT_IGNR;
1664
1665        /* save for next interrupt */
1666        que->eitr_setting = newitr;
1667
1668        /* Reset state */
1669        txr->bytes = 0;
1670        txr->packets = 0;
1671        rxr->bytes = 0;
1672        rxr->packets = 0;
1673
1674no_calc:
1675	/* Schedule a clean task if needed*/
1676	if (more_rx)
1677		taskqueue_enqueue(que->tq, &que->que_task);
1678	else
1679		/* Reenable this interrupt */
1680		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1681	return;
1682}
1683
1684
1685/*********************************************************************
1686 *
1687 *  MSIX Link Interrupt Service routine
1688 *
1689 **********************************************************************/
1690
1691static void
1692igb_msix_link(void *arg)
1693{
1694	struct adapter	*adapter = arg;
1695	u32       	icr;
1696
1697	++adapter->link_irq;
1698	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1699	if (!(icr & E1000_ICR_LSC))
1700		goto spurious;
1701	igb_handle_link(adapter, 0);
1702
1703spurious:
1704	/* Rearm */
1705	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1706	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1707	return;
1708}
1709
1710
1711/*********************************************************************
1712 *
1713 *  Media Ioctl callback
1714 *
1715 *  This routine is called whenever the user queries the status of
1716 *  the interface using ifconfig.
1717 *
1718 **********************************************************************/
1719static void
1720igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1721{
1722	struct adapter *adapter = ifp->if_softc;
1723
1724	INIT_DEBUGOUT("igb_media_status: begin");
1725
1726	IGB_CORE_LOCK(adapter);
1727	igb_update_link_status(adapter);
1728
1729	ifmr->ifm_status = IFM_AVALID;
1730	ifmr->ifm_active = IFM_ETHER;
1731
1732	if (!adapter->link_active) {
1733		IGB_CORE_UNLOCK(adapter);
1734		return;
1735	}
1736
1737	ifmr->ifm_status |= IFM_ACTIVE;
1738
1739	switch (adapter->link_speed) {
1740	case 10:
1741		ifmr->ifm_active |= IFM_10_T;
1742		break;
1743	case 100:
1744		/*
1745		** Support for 100Mb SFP - these are Fiber
1746		** but the media type appears as serdes
1747		*/
1748		if (adapter->hw.phy.media_type ==
1749		    e1000_media_type_internal_serdes)
1750			ifmr->ifm_active |= IFM_100_FX;
1751		else
1752			ifmr->ifm_active |= IFM_100_TX;
1753		break;
1754	case 1000:
1755		ifmr->ifm_active |= IFM_1000_T;
1756		break;
1757	case 2500:
1758		ifmr->ifm_active |= IFM_2500_SX;
1759		break;
1760	}
1761
1762	if (adapter->link_duplex == FULL_DUPLEX)
1763		ifmr->ifm_active |= IFM_FDX;
1764	else
1765		ifmr->ifm_active |= IFM_HDX;
1766
1767	IGB_CORE_UNLOCK(adapter);
1768}
1769
1770/*********************************************************************
1771 *
1772 *  Media Ioctl callback
1773 *
1774 *  This routine is called when the user changes speed/duplex using
1775 *  media/mediopt option with ifconfig.
1776 *
1777 **********************************************************************/
1778static int
1779igb_media_change(struct ifnet *ifp)
1780{
1781	struct adapter *adapter = ifp->if_softc;
1782	struct ifmedia  *ifm = &adapter->media;
1783
1784	INIT_DEBUGOUT("igb_media_change: begin");
1785
1786	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1787		return (EINVAL);
1788
1789	IGB_CORE_LOCK(adapter);
1790	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1791	case IFM_AUTO:
1792		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1793		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1794		break;
1795	case IFM_1000_LX:
1796	case IFM_1000_SX:
1797	case IFM_1000_T:
1798		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1799		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1800		break;
1801	case IFM_100_TX:
1802		adapter->hw.mac.autoneg = FALSE;
1803		adapter->hw.phy.autoneg_advertised = 0;
1804		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1805			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1806		else
1807			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1808		break;
1809	case IFM_10_T:
1810		adapter->hw.mac.autoneg = FALSE;
1811		adapter->hw.phy.autoneg_advertised = 0;
1812		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1813			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1814		else
1815			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1816		break;
1817	default:
1818		device_printf(adapter->dev, "Unsupported media type\n");
1819	}
1820
1821	igb_init_locked(adapter);
1822	IGB_CORE_UNLOCK(adapter);
1823
1824	return (0);
1825}
1826
1827
1828/*********************************************************************
1829 *
1830 *  This routine maps the mbufs to Advanced TX descriptors.
1831 *
1832 **********************************************************************/
1833static int
1834igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1835{
1836	struct adapter  *adapter = txr->adapter;
1837	u32		olinfo_status = 0, cmd_type_len;
1838	int             i, j, error, nsegs;
1839	int		first;
1840	bool		remap = TRUE;
1841	struct mbuf	*m_head;
1842	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1843	bus_dmamap_t	map;
1844	struct igb_tx_buf *txbuf;
1845	union e1000_adv_tx_desc *txd = NULL;
1846
1847	m_head = *m_headp;
1848
1849	/* Basic descriptor defines */
1850        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1851	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1852
1853	if (m_head->m_flags & M_VLANTAG)
1854        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1855
1856        /*
1857         * Important to capture the first descriptor
1858         * used because it will contain the index of
1859         * the one we tell the hardware to report back
1860         */
1861        first = txr->next_avail_desc;
1862	txbuf = &txr->tx_buffers[first];
1863	map = txbuf->map;
1864
1865	/*
1866	 * Map the packet for DMA.
1867	 */
1868retry:
1869	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1870	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1871
1872	if (__predict_false(error)) {
1873		struct mbuf *m;
1874
1875		switch (error) {
1876		case EFBIG:
1877			/* Try it again? - one try */
1878			if (remap == TRUE) {
1879				remap = FALSE;
1880				m = m_defrag(*m_headp, M_NOWAIT);
1881				if (m == NULL) {
1882					adapter->mbuf_defrag_failed++;
1883					m_freem(*m_headp);
1884					*m_headp = NULL;
1885					return (ENOBUFS);
1886				}
1887				*m_headp = m;
1888				goto retry;
1889			} else
1890				return (error);
1891		case ENOMEM:
1892			txr->no_tx_dma_setup++;
1893			return (error);
1894		default:
1895			txr->no_tx_dma_setup++;
1896			m_freem(*m_headp);
1897			*m_headp = NULL;
1898			return (error);
1899		}
1900	}
1901
1902	/* Make certain there are enough descriptors */
1903	if (nsegs > txr->tx_avail - 2) {
1904		txr->no_desc_avail++;
1905		bus_dmamap_unload(txr->txtag, map);
1906		return (ENOBUFS);
1907	}
1908	m_head = *m_headp;
1909
1910	/*
1911	** Set up the appropriate offload context
1912	** this will consume the first descriptor
1913	*/
1914	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1915	if (__predict_false(error)) {
1916		m_freem(*m_headp);
1917		*m_headp = NULL;
1918		return (error);
1919	}
1920
1921	/* 82575 needs the queue index added */
1922	if (adapter->hw.mac.type == e1000_82575)
1923		olinfo_status |= txr->me << 4;
1924
1925	i = txr->next_avail_desc;
1926	for (j = 0; j < nsegs; j++) {
1927		bus_size_t seglen;
1928		bus_addr_t segaddr;
1929
1930		txbuf = &txr->tx_buffers[i];
1931		txd = &txr->tx_base[i];
1932		seglen = segs[j].ds_len;
1933		segaddr = htole64(segs[j].ds_addr);
1934
1935		txd->read.buffer_addr = segaddr;
1936		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1937		    cmd_type_len | seglen);
1938		txd->read.olinfo_status = htole32(olinfo_status);
1939
1940		if (++i == txr->num_desc)
1941			i = 0;
1942	}
1943
1944	txd->read.cmd_type_len |=
1945	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1946	txr->tx_avail -= nsegs;
1947	txr->next_avail_desc = i;
1948
1949	txbuf->m_head = m_head;
1950	/*
1951	** Here we swap the map so the last descriptor,
1952	** which gets the completion interrupt has the
1953	** real map, and the first descriptor gets the
1954	** unused map from this descriptor.
1955	*/
1956	txr->tx_buffers[first].map = txbuf->map;
1957	txbuf->map = map;
1958	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1959
1960        /* Set the EOP descriptor that will be marked done */
1961        txbuf = &txr->tx_buffers[first];
1962	txbuf->eop = txd;
1963
1964        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1965            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966	/*
1967	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1968	 * hardware that this frame is available to transmit.
1969	 */
1970	++txr->total_packets;
1971	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1972
1973	return (0);
1974}
1975static void
1976igb_set_promisc(struct adapter *adapter)
1977{
1978	struct ifnet	*ifp = adapter->ifp;
1979	struct e1000_hw *hw = &adapter->hw;
1980	u32		reg;
1981
1982	if (adapter->vf_ifp) {
1983		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1984		return;
1985	}
1986
1987	reg = E1000_READ_REG(hw, E1000_RCTL);
1988	if (ifp->if_flags & IFF_PROMISC) {
1989		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1990		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1991	} else if (ifp->if_flags & IFF_ALLMULTI) {
1992		reg |= E1000_RCTL_MPE;
1993		reg &= ~E1000_RCTL_UPE;
1994		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1995	}
1996}
1997
1998static void
1999igb_disable_promisc(struct adapter *adapter)
2000{
2001	struct e1000_hw *hw = &adapter->hw;
2002	struct ifnet	*ifp = adapter->ifp;
2003	u32		reg;
2004	int		mcnt = 0;
2005
2006	if (adapter->vf_ifp) {
2007		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2008		return;
2009	}
2010	reg = E1000_READ_REG(hw, E1000_RCTL);
2011	reg &=  (~E1000_RCTL_UPE);
2012	if (ifp->if_flags & IFF_ALLMULTI)
2013		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2014	else {
2015		struct  ifmultiaddr *ifma;
2016#if __FreeBSD_version < 800000
2017		IF_ADDR_LOCK(ifp);
2018#else
2019		if_maddr_rlock(ifp);
2020#endif
2021		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2022			if (ifma->ifma_addr->sa_family != AF_LINK)
2023				continue;
2024			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025				break;
2026			mcnt++;
2027		}
2028#if __FreeBSD_version < 800000
2029		IF_ADDR_UNLOCK(ifp);
2030#else
2031		if_maddr_runlock(ifp);
2032#endif
2033	}
2034	/* Don't disable if in MAX groups */
2035	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2036		reg &=  (~E1000_RCTL_MPE);
2037	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2038}
2039
2040
2041/*********************************************************************
2042 *  Multicast Update
2043 *
2044 *  This routine is called whenever multicast address list is updated.
2045 *
2046 **********************************************************************/
2047
2048static void
2049igb_set_multi(struct adapter *adapter)
2050{
2051	struct ifnet	*ifp = adapter->ifp;
2052	struct ifmultiaddr *ifma;
2053	u32 reg_rctl = 0;
2054	u8  *mta;
2055
2056	int mcnt = 0;
2057
2058	IOCTL_DEBUGOUT("igb_set_multi: begin");
2059
2060	mta = adapter->mta;
2061	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2062	    MAX_NUM_MULTICAST_ADDRESSES);
2063
2064#if __FreeBSD_version < 800000
2065	IF_ADDR_LOCK(ifp);
2066#else
2067	if_maddr_rlock(ifp);
2068#endif
2069	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2070		if (ifma->ifma_addr->sa_family != AF_LINK)
2071			continue;
2072
2073		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2074			break;
2075
2076		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2077		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2078		mcnt++;
2079	}
2080#if __FreeBSD_version < 800000
2081	IF_ADDR_UNLOCK(ifp);
2082#else
2083	if_maddr_runlock(ifp);
2084#endif
2085
2086	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2087		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2088		reg_rctl |= E1000_RCTL_MPE;
2089		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090	} else
2091		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2092}
2093
2094
2095/*********************************************************************
2096 *  Timer routine:
2097 *  	This routine checks for link status,
2098 *	updates statistics, and does the watchdog.
2099 *
2100 **********************************************************************/
2101
2102static void
2103igb_local_timer(void *arg)
2104{
2105	struct adapter		*adapter = arg;
2106	device_t		dev = adapter->dev;
2107	struct ifnet		*ifp = adapter->ifp;
2108	struct tx_ring		*txr = adapter->tx_rings;
2109	struct igb_queue	*que = adapter->queues;
2110	int			hung = 0, busy = 0;
2111
2112
2113	IGB_CORE_LOCK_ASSERT(adapter);
2114
2115	igb_update_link_status(adapter);
2116	igb_update_stats_counters(adapter);
2117
2118        /*
2119        ** Check the TX queues status
2120	**	- central locked handling of OACTIVE
2121	**	- watchdog only if all queues show hung
2122        */
2123	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2124		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2125		    (adapter->pause_frames == 0))
2126			++hung;
2127		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2128			++busy;
2129		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2130			taskqueue_enqueue(que->tq, &que->que_task);
2131	}
2132	if (hung == adapter->num_queues)
2133		goto timeout;
2134	if (busy == adapter->num_queues)
2135		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2136	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2137	    (busy < adapter->num_queues))
2138		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2139
2140	adapter->pause_frames = 0;
2141	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2142#ifndef DEVICE_POLLING
2143	/* Schedule all queue interrupts - deadlock protection */
2144	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2145#endif
2146	return;
2147
2148timeout:
2149	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2150	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2151            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2152            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2153	device_printf(dev,"TX(%d) desc avail = %d,"
2154            "Next TX to Clean = %d\n",
2155            txr->me, txr->tx_avail, txr->next_to_clean);
2156	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2157	adapter->watchdog_events++;
2158	igb_init_locked(adapter);
2159}
2160
2161static void
2162igb_update_link_status(struct adapter *adapter)
2163{
2164	struct e1000_hw		*hw = &adapter->hw;
2165	struct e1000_fc_info	*fc = &hw->fc;
2166	struct ifnet		*ifp = adapter->ifp;
2167	device_t		dev = adapter->dev;
2168	struct tx_ring		*txr = adapter->tx_rings;
2169	u32			link_check, thstat, ctrl;
2170	char			*flowctl = NULL;
2171
2172	link_check = thstat = ctrl = 0;
2173
2174	/* Get the cached link value or read for real */
2175        switch (hw->phy.media_type) {
2176        case e1000_media_type_copper:
2177                if (hw->mac.get_link_status) {
2178			/* Do the work to read phy */
2179                        e1000_check_for_link(hw);
2180                        link_check = !hw->mac.get_link_status;
2181                } else
2182                        link_check = TRUE;
2183                break;
2184        case e1000_media_type_fiber:
2185                e1000_check_for_link(hw);
2186                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2187                                 E1000_STATUS_LU);
2188                break;
2189        case e1000_media_type_internal_serdes:
2190                e1000_check_for_link(hw);
2191                link_check = adapter->hw.mac.serdes_has_link;
2192                break;
2193	/* VF device is type_unknown */
2194        case e1000_media_type_unknown:
2195                e1000_check_for_link(hw);
2196		link_check = !hw->mac.get_link_status;
2197		/* Fall thru */
2198        default:
2199                break;
2200        }
2201
2202	/* Check for thermal downshift or shutdown */
2203	if (hw->mac.type == e1000_i350) {
2204		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2205		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2206	}
2207
2208	/* Get the flow control for display */
2209	switch (fc->current_mode) {
2210	case e1000_fc_rx_pause:
2211		flowctl = "RX";
2212		break;
2213	case e1000_fc_tx_pause:
2214		flowctl = "TX";
2215		break;
2216	case e1000_fc_full:
2217		flowctl = "Full";
2218		break;
2219	case e1000_fc_none:
2220	default:
2221		flowctl = "None";
2222		break;
2223	}
2224
2225	/* Now we check if a transition has happened */
2226	if (link_check && (adapter->link_active == 0)) {
2227		e1000_get_speed_and_duplex(&adapter->hw,
2228		    &adapter->link_speed, &adapter->link_duplex);
2229		if (bootverbose)
2230			device_printf(dev, "Link is up %d Mbps %s,"
2231			    " Flow Control: %s\n",
2232			    adapter->link_speed,
2233			    ((adapter->link_duplex == FULL_DUPLEX) ?
2234			    "Full Duplex" : "Half Duplex"), flowctl);
2235		adapter->link_active = 1;
2236		ifp->if_baudrate = adapter->link_speed * 1000000;
2237		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2238		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2239			device_printf(dev, "Link: thermal downshift\n");
2240		/* Delay Link Up for Phy update */
2241		if (((hw->mac.type == e1000_i210) ||
2242		    (hw->mac.type == e1000_i211)) &&
2243		    (hw->phy.id == I210_I_PHY_ID))
2244			msec_delay(I210_LINK_DELAY);
2245		/* Reset if the media type changed. */
2246		if (hw->dev_spec._82575.media_changed) {
2247			hw->dev_spec._82575.media_changed = false;
2248			adapter->flags |= IGB_MEDIA_RESET;
2249			igb_reset(adapter);
2250		}
2251		/* This can sleep */
2252		if_link_state_change(ifp, LINK_STATE_UP);
2253	} else if (!link_check && (adapter->link_active == 1)) {
2254		ifp->if_baudrate = adapter->link_speed = 0;
2255		adapter->link_duplex = 0;
2256		if (bootverbose)
2257			device_printf(dev, "Link is Down\n");
2258		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2259		    (thstat & E1000_THSTAT_PWR_DOWN))
2260			device_printf(dev, "Link: thermal shutdown\n");
2261		adapter->link_active = 0;
2262		/* This can sleep */
2263		if_link_state_change(ifp, LINK_STATE_DOWN);
2264		/* Reset queue state */
2265		for (int i = 0; i < adapter->num_queues; i++, txr++)
2266			txr->queue_status = IGB_QUEUE_IDLE;
2267	}
2268}
2269
2270/*********************************************************************
2271 *
2272 *  This routine disables all traffic on the adapter by issuing a
2273 *  global reset on the MAC and deallocates TX/RX buffers.
2274 *
2275 **********************************************************************/
2276
2277static void
2278igb_stop(void *arg)
2279{
2280	struct adapter	*adapter = arg;
2281	struct ifnet	*ifp = adapter->ifp;
2282	struct tx_ring *txr = adapter->tx_rings;
2283
2284	IGB_CORE_LOCK_ASSERT(adapter);
2285
2286	INIT_DEBUGOUT("igb_stop: begin");
2287
2288	igb_disable_intr(adapter);
2289
2290	callout_stop(&adapter->timer);
2291
2292	/* Tell the stack that the interface is no longer active */
2293	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2294	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2295
2296	/* Disarm watchdog timer. */
2297	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2298		IGB_TX_LOCK(txr);
2299		txr->queue_status = IGB_QUEUE_IDLE;
2300		IGB_TX_UNLOCK(txr);
2301	}
2302
2303	e1000_reset_hw(&adapter->hw);
2304	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2305
2306	e1000_led_off(&adapter->hw);
2307	e1000_cleanup_led(&adapter->hw);
2308}
2309
2310
2311/*********************************************************************
2312 *
2313 *  Determine hardware revision.
2314 *
2315 **********************************************************************/
2316static void
2317igb_identify_hardware(struct adapter *adapter)
2318{
2319	device_t dev = adapter->dev;
2320
2321	/* Make sure our PCI config space has the necessary stuff set */
2322	pci_enable_busmaster(dev);
2323	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2324
2325	/* Save off the information about this board */
2326	adapter->hw.vendor_id = pci_get_vendor(dev);
2327	adapter->hw.device_id = pci_get_device(dev);
2328	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2329	adapter->hw.subsystem_vendor_id =
2330	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2331	adapter->hw.subsystem_device_id =
2332	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2333
2334	/* Set MAC type early for PCI setup */
2335	e1000_set_mac_type(&adapter->hw);
2336
2337	/* Are we a VF device? */
2338	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2339	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2340		adapter->vf_ifp = 1;
2341	else
2342		adapter->vf_ifp = 0;
2343}
2344
2345static int
2346igb_allocate_pci_resources(struct adapter *adapter)
2347{
2348	device_t	dev = adapter->dev;
2349	int		rid;
2350
2351	rid = PCIR_BAR(0);
2352	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2353	    &rid, RF_ACTIVE);
2354	if (adapter->pci_mem == NULL) {
2355		device_printf(dev, "Unable to allocate bus resource: memory\n");
2356		return (ENXIO);
2357	}
2358	adapter->osdep.mem_bus_space_tag =
2359	    rman_get_bustag(adapter->pci_mem);
2360	adapter->osdep.mem_bus_space_handle =
2361	    rman_get_bushandle(adapter->pci_mem);
2362	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2363
2364	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2365
2366	/* This will setup either MSI/X or MSI */
2367	adapter->msix = igb_setup_msix(adapter);
2368	adapter->hw.back = &adapter->osdep;
2369
2370	return (0);
2371}
2372
2373/*********************************************************************
2374 *
2375 *  Setup the Legacy or MSI Interrupt handler
2376 *
2377 **********************************************************************/
2378static int
2379igb_allocate_legacy(struct adapter *adapter)
2380{
2381	device_t		dev = adapter->dev;
2382	struct igb_queue	*que = adapter->queues;
2383	struct tx_ring		*txr = adapter->tx_rings;
2384	int			error, rid = 0;
2385
2386	/* Turn off all interrupts */
2387	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2388
2389	/* MSI RID is 1 */
2390	if (adapter->msix == 1)
2391		rid = 1;
2392
2393	/* We allocate a single interrupt resource */
2394	adapter->res = bus_alloc_resource_any(dev,
2395	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2396	if (adapter->res == NULL) {
2397		device_printf(dev, "Unable to allocate bus resource: "
2398		    "interrupt\n");
2399		return (ENXIO);
2400	}
2401
2402#ifndef IGB_LEGACY_TX
2403	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2404#endif
2405
2406	/*
2407	 * Try allocating a fast interrupt and the associated deferred
2408	 * processing contexts.
2409	 */
2410	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2411	/* Make tasklet for deferred link handling */
2412	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2413	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2414	    taskqueue_thread_enqueue, &que->tq);
2415	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2416	    device_get_nameunit(adapter->dev));
2417	if ((error = bus_setup_intr(dev, adapter->res,
2418	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2419	    adapter, &adapter->tag)) != 0) {
2420		device_printf(dev, "Failed to register fast interrupt "
2421			    "handler: %d\n", error);
2422		taskqueue_free(que->tq);
2423		que->tq = NULL;
2424		return (error);
2425	}
2426
2427	return (0);
2428}
2429
2430
2431/*********************************************************************
2432 *
2433 *  Setup the MSIX Queue Interrupt handlers:
2434 *
2435 **********************************************************************/
2436static int
2437igb_allocate_msix(struct adapter *adapter)
2438{
2439	device_t		dev = adapter->dev;
2440	struct igb_queue	*que = adapter->queues;
2441	int			error, rid, vector = 0;
2442
2443	/* Be sure to start with all interrupts disabled */
2444	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2445	E1000_WRITE_FLUSH(&adapter->hw);
2446
2447	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2448		rid = vector +1;
2449		que->res = bus_alloc_resource_any(dev,
2450		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2451		if (que->res == NULL) {
2452			device_printf(dev,
2453			    "Unable to allocate bus resource: "
2454			    "MSIX Queue Interrupt\n");
2455			return (ENXIO);
2456		}
2457		error = bus_setup_intr(dev, que->res,
2458	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2459		    igb_msix_que, que, &que->tag);
2460		if (error) {
2461			que->res = NULL;
2462			device_printf(dev, "Failed to register Queue handler");
2463			return (error);
2464		}
2465#if __FreeBSD_version >= 800504
2466		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2467#endif
2468		que->msix = vector;
2469		if (adapter->hw.mac.type == e1000_82575)
2470			que->eims = E1000_EICR_TX_QUEUE0 << i;
2471		else
2472			que->eims = 1 << vector;
2473		/*
2474		** Bind the msix vector, and thus the
2475		** rings to the corresponding cpu.
2476		*/
2477		if (adapter->num_queues > 1) {
2478			if (igb_last_bind_cpu < 0)
2479				igb_last_bind_cpu = CPU_FIRST();
2480			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2481			device_printf(dev,
2482				"Bound queue %d to cpu %d\n",
2483				i,igb_last_bind_cpu);
2484			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2485		}
2486#ifndef IGB_LEGACY_TX
2487		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2488		    que->txr);
2489#endif
2490		/* Make tasklet for deferred handling */
2491		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2492		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2493		    taskqueue_thread_enqueue, &que->tq);
2494		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2495		    device_get_nameunit(adapter->dev));
2496	}
2497
2498	/* And Link */
2499	rid = vector + 1;
2500	adapter->res = bus_alloc_resource_any(dev,
2501	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2502	if (adapter->res == NULL) {
2503		device_printf(dev,
2504		    "Unable to allocate bus resource: "
2505		    "MSIX Link Interrupt\n");
2506		return (ENXIO);
2507	}
2508	if ((error = bus_setup_intr(dev, adapter->res,
2509	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2510	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2511		device_printf(dev, "Failed to register Link handler");
2512		return (error);
2513	}
2514#if __FreeBSD_version >= 800504
2515	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2516#endif
2517	adapter->linkvec = vector;
2518
2519	return (0);
2520}
2521
2522
2523static void
2524igb_configure_queues(struct adapter *adapter)
2525{
2526	struct	e1000_hw	*hw = &adapter->hw;
2527	struct	igb_queue	*que;
2528	u32			tmp, ivar = 0, newitr = 0;
2529
2530	/* First turn on RSS capability */
2531	if (adapter->hw.mac.type != e1000_82575)
2532		E1000_WRITE_REG(hw, E1000_GPIE,
2533		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2534		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2535
2536	/* Turn on MSIX */
2537	switch (adapter->hw.mac.type) {
2538	case e1000_82580:
2539	case e1000_i350:
2540	case e1000_i354:
2541	case e1000_i210:
2542	case e1000_i211:
2543	case e1000_vfadapt:
2544	case e1000_vfadapt_i350:
2545		/* RX entries */
2546		for (int i = 0; i < adapter->num_queues; i++) {
2547			u32 index = i >> 1;
2548			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2549			que = &adapter->queues[i];
2550			if (i & 1) {
2551				ivar &= 0xFF00FFFF;
2552				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2553			} else {
2554				ivar &= 0xFFFFFF00;
2555				ivar |= que->msix | E1000_IVAR_VALID;
2556			}
2557			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2558		}
2559		/* TX entries */
2560		for (int i = 0; i < adapter->num_queues; i++) {
2561			u32 index = i >> 1;
2562			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2563			que = &adapter->queues[i];
2564			if (i & 1) {
2565				ivar &= 0x00FFFFFF;
2566				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2567			} else {
2568				ivar &= 0xFFFF00FF;
2569				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2570			}
2571			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2572			adapter->que_mask |= que->eims;
2573		}
2574
2575		/* And for the link interrupt */
2576		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2577		adapter->link_mask = 1 << adapter->linkvec;
2578		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2579		break;
2580	case e1000_82576:
2581		/* RX entries */
2582		for (int i = 0; i < adapter->num_queues; i++) {
2583			u32 index = i & 0x7; /* Each IVAR has two entries */
2584			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2585			que = &adapter->queues[i];
2586			if (i < 8) {
2587				ivar &= 0xFFFFFF00;
2588				ivar |= que->msix | E1000_IVAR_VALID;
2589			} else {
2590				ivar &= 0xFF00FFFF;
2591				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2592			}
2593			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2594			adapter->que_mask |= que->eims;
2595		}
2596		/* TX entries */
2597		for (int i = 0; i < adapter->num_queues; i++) {
2598			u32 index = i & 0x7; /* Each IVAR has two entries */
2599			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2600			que = &adapter->queues[i];
2601			if (i < 8) {
2602				ivar &= 0xFFFF00FF;
2603				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2604			} else {
2605				ivar &= 0x00FFFFFF;
2606				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2607			}
2608			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2609			adapter->que_mask |= que->eims;
2610		}
2611
2612		/* And for the link interrupt */
2613		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2614		adapter->link_mask = 1 << adapter->linkvec;
2615		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2616		break;
2617
2618	case e1000_82575:
2619                /* enable MSI-X support*/
2620		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2621                tmp |= E1000_CTRL_EXT_PBA_CLR;
2622                /* Auto-Mask interrupts upon ICR read. */
2623                tmp |= E1000_CTRL_EXT_EIAME;
2624                tmp |= E1000_CTRL_EXT_IRCA;
2625                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2626
2627		/* Queues */
2628		for (int i = 0; i < adapter->num_queues; i++) {
2629			que = &adapter->queues[i];
2630			tmp = E1000_EICR_RX_QUEUE0 << i;
2631			tmp |= E1000_EICR_TX_QUEUE0 << i;
2632			que->eims = tmp;
2633			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2634			    i, que->eims);
2635			adapter->que_mask |= que->eims;
2636		}
2637
2638		/* Link */
2639		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2640		    E1000_EIMS_OTHER);
2641		adapter->link_mask |= E1000_EIMS_OTHER;
2642	default:
2643		break;
2644	}
2645
2646	/* Set the starting interrupt rate */
2647	if (igb_max_interrupt_rate > 0)
2648		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2649
2650        if (hw->mac.type == e1000_82575)
2651                newitr |= newitr << 16;
2652        else
2653                newitr |= E1000_EITR_CNT_IGNR;
2654
2655	for (int i = 0; i < adapter->num_queues; i++) {
2656		que = &adapter->queues[i];
2657		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2658	}
2659
2660	return;
2661}
2662
2663
2664static void
2665igb_free_pci_resources(struct adapter *adapter)
2666{
2667	struct		igb_queue *que = adapter->queues;
2668	device_t	dev = adapter->dev;
2669	int		rid;
2670
2671	/*
2672	** There is a slight possibility of a failure mode
2673	** in attach that will result in entering this function
2674	** before interrupt resources have been initialized, and
2675	** in that case we do not want to execute the loops below
2676	** We can detect this reliably by the state of the adapter
2677	** res pointer.
2678	*/
2679	if (adapter->res == NULL)
2680		goto mem;
2681
2682	/*
2683	 * First release all the interrupt resources:
2684	 */
2685	for (int i = 0; i < adapter->num_queues; i++, que++) {
2686		rid = que->msix + 1;
2687		if (que->tag != NULL) {
2688			bus_teardown_intr(dev, que->res, que->tag);
2689			que->tag = NULL;
2690		}
2691		if (que->res != NULL)
2692			bus_release_resource(dev,
2693			    SYS_RES_IRQ, rid, que->res);
2694	}
2695
2696	/* Clean the Legacy or Link interrupt last */
2697	if (adapter->linkvec) /* we are doing MSIX */
2698		rid = adapter->linkvec + 1;
2699	else
2700		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2701
2702	que = adapter->queues;
2703	if (adapter->tag != NULL) {
2704		taskqueue_drain(que->tq, &adapter->link_task);
2705		bus_teardown_intr(dev, adapter->res, adapter->tag);
2706		adapter->tag = NULL;
2707	}
2708	if (adapter->res != NULL)
2709		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2710
2711	for (int i = 0; i < adapter->num_queues; i++, que++) {
2712		if (que->tq != NULL) {
2713#ifndef IGB_LEGACY_TX
2714			taskqueue_drain(que->tq, &que->txr->txq_task);
2715#endif
2716			taskqueue_drain(que->tq, &que->que_task);
2717			taskqueue_free(que->tq);
2718		}
2719	}
2720mem:
2721	if (adapter->msix)
2722		pci_release_msi(dev);
2723
2724	if (adapter->msix_mem != NULL)
2725		bus_release_resource(dev, SYS_RES_MEMORY,
2726		    adapter->memrid, adapter->msix_mem);
2727
2728	if (adapter->pci_mem != NULL)
2729		bus_release_resource(dev, SYS_RES_MEMORY,
2730		    PCIR_BAR(0), adapter->pci_mem);
2731
2732}
2733
2734/*
2735 * Setup Either MSI/X or MSI
2736 */
2737static int
2738igb_setup_msix(struct adapter *adapter)
2739{
2740	device_t	dev = adapter->dev;
2741	int		bar, want, queues, msgs, maxqueues;
2742
2743	/* tuneable override */
2744	if (igb_enable_msix == 0)
2745		goto msi;
2746
2747	/* First try MSI/X */
2748	msgs = pci_msix_count(dev);
2749	if (msgs == 0)
2750		goto msi;
2751	/*
2752	** Some new devices, as with ixgbe, now may
2753	** use a different BAR, so we need to keep
2754	** track of which is used.
2755	*/
2756	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2757	bar = pci_read_config(dev, adapter->memrid, 4);
2758	if (bar == 0) /* use next bar */
2759		adapter->memrid += 4;
2760	adapter->msix_mem = bus_alloc_resource_any(dev,
2761	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2762       	if (adapter->msix_mem == NULL) {
2763		/* May not be enabled */
2764		device_printf(adapter->dev,
2765		    "Unable to map MSIX table \n");
2766		goto msi;
2767	}
2768
2769	/* Figure out a reasonable auto config value */
2770	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2771
2772	/* Manual override */
2773	if (igb_num_queues != 0)
2774		queues = igb_num_queues;
2775
2776	/* Sanity check based on HW */
2777	switch (adapter->hw.mac.type) {
2778		case e1000_82575:
2779			maxqueues = 4;
2780			break;
2781		case e1000_82576:
2782		case e1000_82580:
2783		case e1000_i350:
2784		case e1000_i354:
2785			maxqueues = 8;
2786			break;
2787		case e1000_i210:
2788			maxqueues = 4;
2789			break;
2790		case e1000_i211:
2791			maxqueues = 2;
2792			break;
2793		default:  /* VF interfaces */
2794			maxqueues = 1;
2795			break;
2796	}
2797	if (queues > maxqueues)
2798		queues = maxqueues;
2799
2800	/* Manual override */
2801	if (igb_num_queues != 0)
2802		queues = igb_num_queues;
2803
2804	/*
2805	** One vector (RX/TX pair) per queue
2806	** plus an additional for Link interrupt
2807	*/
2808	want = queues + 1;
2809	if (msgs >= want)
2810		msgs = want;
2811	else {
2812               	device_printf(adapter->dev,
2813		    "MSIX Configuration Problem, "
2814		    "%d vectors configured, but %d queues wanted!\n",
2815		    msgs, want);
2816		goto msi;
2817	}
2818	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2819               	device_printf(adapter->dev,
2820		    "Using MSIX interrupts with %d vectors\n", msgs);
2821		adapter->num_queues = queues;
2822		return (msgs);
2823	}
2824	/*
2825	** If MSIX alloc failed or provided us with
2826	** less than needed, free and fall through to MSI
2827	*/
2828	pci_release_msi(dev);
2829
2830msi:
2831       	if (adapter->msix_mem != NULL) {
2832		bus_release_resource(dev, SYS_RES_MEMORY,
2833		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2834		adapter->msix_mem = NULL;
2835	}
2836       	msgs = 1;
2837	if (pci_alloc_msi(dev, &msgs) == 0) {
2838		device_printf(adapter->dev," Using an MSI interrupt\n");
2839		return (msgs);
2840	}
2841	device_printf(adapter->dev," Using a Legacy interrupt\n");
2842	return (0);
2843}
2844
2845/*********************************************************************
2846 *
2847 *  Initialize the DMA Coalescing feature
2848 *
2849 **********************************************************************/
2850static void
2851igb_init_dmac(struct adapter *adapter, u32 pba)
2852{
2853	device_t	dev = adapter->dev;
2854	struct e1000_hw *hw = &adapter->hw;
2855	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2856	u16		hwm;
2857
2858	if (hw->mac.type == e1000_i211)
2859		return;
2860
2861	if (hw->mac.type > e1000_82580) {
2862
2863		if (adapter->dmac == 0) { /* Disabling it */
2864			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2865			return;
2866		} else
2867			device_printf(dev, "DMA Coalescing enabled\n");
2868
2869		/* Set starting threshold */
2870		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2871
2872		hwm = 64 * pba - adapter->max_frame_size / 16;
2873		if (hwm < 64 * (pba - 6))
2874			hwm = 64 * (pba - 6);
2875		reg = E1000_READ_REG(hw, E1000_FCRTC);
2876		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2877		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2878		    & E1000_FCRTC_RTH_COAL_MASK);
2879		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2880
2881
2882		dmac = pba - adapter->max_frame_size / 512;
2883		if (dmac < pba - 10)
2884			dmac = pba - 10;
2885		reg = E1000_READ_REG(hw, E1000_DMACR);
2886		reg &= ~E1000_DMACR_DMACTHR_MASK;
2887		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2888		    & E1000_DMACR_DMACTHR_MASK);
2889
2890		/* transition to L0x or L1 if available..*/
2891		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2892
2893		/* Check if status is 2.5Gb backplane connection
2894		* before configuration of watchdog timer, which is
2895		* in msec values in 12.8usec intervals
2896		* watchdog timer= msec values in 32usec intervals
2897		* for non 2.5Gb connection
2898		*/
2899		if (hw->mac.type == e1000_i354) {
2900			int status = E1000_READ_REG(hw, E1000_STATUS);
2901			if ((status & E1000_STATUS_2P5_SKU) &&
2902			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2903				reg |= ((adapter->dmac * 5) >> 6);
2904			else
2905				reg |= (adapter->dmac >> 5);
2906		} else {
2907			reg |= (adapter->dmac >> 5);
2908		}
2909
2910		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2911
2912#ifdef I210_OBFF_SUPPORT
2913		/*
2914		 * Set the OBFF Rx threshold to DMA Coalescing Rx
2915		 * threshold - 2KB and enable the feature in the
2916		 * hardware for I210.
2917		 */
2918		if (hw->mac.type == e1000_i210) {
2919			int obff = dmac - 2;
2920			reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
2921			reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
2922			reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
2923			    | E1000_DOBFFCTL_EXIT_ACT_MASK;
2924			E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
2925		}
2926#endif
2927		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2928
2929		/* Set the interval before transition */
2930		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2931		if (hw->mac.type == e1000_i350)
2932			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2933		/*
2934		** in 2.5Gb connection, TTLX unit is 0.4 usec
2935		** which is 0x4*2 = 0xA. But delay is still 4 usec
2936		*/
2937		if (hw->mac.type == e1000_i354) {
2938			int status = E1000_READ_REG(hw, E1000_STATUS);
2939			if ((status & E1000_STATUS_2P5_SKU) &&
2940			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2941				reg |= 0xA;
2942			else
2943				reg |= 0x4;
2944		} else {
2945			reg |= 0x4;
2946		}
2947
2948		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2949
2950		/* free space in tx packet buffer to wake from DMA coal */
2951		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2952		    (2 * adapter->max_frame_size)) >> 6);
2953
2954		/* make low power state decision controlled by DMA coal */
2955		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2956		reg &= ~E1000_PCIEMISC_LX_DECISION;
2957		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2958
2959	} else if (hw->mac.type == e1000_82580) {
2960		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2961		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2962		    reg & ~E1000_PCIEMISC_LX_DECISION);
2963		E1000_WRITE_REG(hw, E1000_DMACR, 0);
2964	}
2965}
2966
2967
2968/*********************************************************************
2969 *
2970 *  Set up an fresh starting state
2971 *
2972 **********************************************************************/
2973static void
2974igb_reset(struct adapter *adapter)
2975{
2976	device_t	dev = adapter->dev;
2977	struct e1000_hw *hw = &adapter->hw;
2978	struct e1000_fc_info *fc = &hw->fc;
2979	struct ifnet	*ifp = adapter->ifp;
2980	u32		pba = 0;
2981	u16		hwm;
2982
2983	INIT_DEBUGOUT("igb_reset: begin");
2984
2985	/* Let the firmware know the OS is in control */
2986	igb_get_hw_control(adapter);
2987
2988	/*
2989	 * Packet Buffer Allocation (PBA)
2990	 * Writing PBA sets the receive portion of the buffer
2991	 * the remainder is used for the transmit buffer.
2992	 */
2993	switch (hw->mac.type) {
2994	case e1000_82575:
2995		pba = E1000_PBA_32K;
2996		break;
2997	case e1000_82576:
2998	case e1000_vfadapt:
2999		pba = E1000_READ_REG(hw, E1000_RXPBS);
3000		pba &= E1000_RXPBS_SIZE_MASK_82576;
3001		break;
3002	case e1000_82580:
3003	case e1000_i350:
3004	case e1000_i354:
3005	case e1000_vfadapt_i350:
3006		pba = E1000_READ_REG(hw, E1000_RXPBS);
3007		pba = e1000_rxpbs_adjust_82580(pba);
3008		break;
3009	case e1000_i210:
3010	case e1000_i211:
3011		pba = E1000_PBA_34K;
3012	default:
3013		break;
3014	}
3015
3016	/* Special needs in case of Jumbo frames */
3017	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3018		u32 tx_space, min_tx, min_rx;
3019		pba = E1000_READ_REG(hw, E1000_PBA);
3020		tx_space = pba >> 16;
3021		pba &= 0xffff;
3022		min_tx = (adapter->max_frame_size +
3023		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3024		min_tx = roundup2(min_tx, 1024);
3025		min_tx >>= 10;
3026                min_rx = adapter->max_frame_size;
3027                min_rx = roundup2(min_rx, 1024);
3028                min_rx >>= 10;
3029		if (tx_space < min_tx &&
3030		    ((min_tx - tx_space) < pba)) {
3031			pba = pba - (min_tx - tx_space);
3032			/*
3033                         * if short on rx space, rx wins
3034                         * and must trump tx adjustment
3035			 */
3036                        if (pba < min_rx)
3037                                pba = min_rx;
3038		}
3039		E1000_WRITE_REG(hw, E1000_PBA, pba);
3040	}
3041
3042	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3043
3044	/*
3045	 * These parameters control the automatic generation (Tx) and
3046	 * response (Rx) to Ethernet PAUSE frames.
3047	 * - High water mark should allow for at least two frames to be
3048	 *   received after sending an XOFF.
3049	 * - Low water mark works best when it is very near the high water mark.
3050	 *   This allows the receiver to restart by sending XON when it has
3051	 *   drained a bit.
3052	 */
3053	hwm = min(((pba << 10) * 9 / 10),
3054	    ((pba << 10) - 2 * adapter->max_frame_size));
3055
3056	if (hw->mac.type < e1000_82576) {
3057		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3058		fc->low_water = fc->high_water - 8;
3059	} else {
3060		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3061		fc->low_water = fc->high_water - 16;
3062	}
3063
3064	fc->pause_time = IGB_FC_PAUSE_TIME;
3065	fc->send_xon = TRUE;
3066	if (adapter->fc)
3067		fc->requested_mode = adapter->fc;
3068	else
3069		fc->requested_mode = e1000_fc_default;
3070
3071	/* Issue a global reset */
3072	e1000_reset_hw(hw);
3073	E1000_WRITE_REG(hw, E1000_WUC, 0);
3074
3075	/* Reset for AutoMediaDetect */
3076	if (adapter->flags & IGB_MEDIA_RESET) {
3077		e1000_setup_init_funcs(hw, TRUE);
3078		e1000_get_bus_info(hw);
3079		adapter->flags &= ~IGB_MEDIA_RESET;
3080	}
3081
3082	if (e1000_init_hw(hw) < 0)
3083		device_printf(dev, "Hardware Initialization Failed\n");
3084
3085	/* Setup DMA Coalescing */
3086	igb_init_dmac(adapter, pba);
3087
3088	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3089	e1000_get_phy_info(hw);
3090	e1000_check_for_link(hw);
3091	return;
3092}
3093
3094/*********************************************************************
3095 *
3096 *  Setup networking device structure and register an interface.
3097 *
3098 **********************************************************************/
3099static int
3100igb_setup_interface(device_t dev, struct adapter *adapter)
3101{
3102	struct ifnet   *ifp;
3103
3104	INIT_DEBUGOUT("igb_setup_interface: begin");
3105
3106	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3107	if (ifp == NULL) {
3108		device_printf(dev, "can not allocate ifnet structure\n");
3109		return (-1);
3110	}
3111	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3112	ifp->if_init =  igb_init;
3113	ifp->if_softc = adapter;
3114	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3115	ifp->if_ioctl = igb_ioctl;
3116#ifndef IGB_LEGACY_TX
3117	ifp->if_transmit = igb_mq_start;
3118	ifp->if_qflush = igb_qflush;
3119#else
3120	ifp->if_start = igb_start;
3121	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3122	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3123	IFQ_SET_READY(&ifp->if_snd);
3124#endif
3125
3126	ether_ifattach(ifp, adapter->hw.mac.addr);
3127
3128	ifp->if_capabilities = ifp->if_capenable = 0;
3129
3130	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3131	ifp->if_capabilities |= IFCAP_TSO;
3132	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3133	ifp->if_capenable = ifp->if_capabilities;
3134
3135	/* Don't enable LRO by default */
3136	ifp->if_capabilities |= IFCAP_LRO;
3137
3138#ifdef DEVICE_POLLING
3139	ifp->if_capabilities |= IFCAP_POLLING;
3140#endif
3141
3142	/*
3143	 * Tell the upper layer(s) we
3144	 * support full VLAN capability.
3145	 */
3146	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3147	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3148			     |  IFCAP_VLAN_HWTSO
3149			     |  IFCAP_VLAN_MTU;
3150	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3151			  |  IFCAP_VLAN_HWTSO
3152			  |  IFCAP_VLAN_MTU;
3153
3154	/*
3155	** Don't turn this on by default, if vlans are
3156	** created on another pseudo device (eg. lagg)
3157	** then vlan events are not passed thru, breaking
3158	** operation, but with HW FILTER off it works. If
3159	** using vlans directly on the igb driver you can
3160	** enable this and get full hardware tag filtering.
3161	*/
3162	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3163
3164	/*
3165	 * Specify the media types supported by this adapter and register
3166	 * callbacks to update media and link information
3167	 */
3168	ifmedia_init(&adapter->media, IFM_IMASK,
3169	    igb_media_change, igb_media_status);
3170	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3171	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3172		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3173			    0, NULL);
3174		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3175	} else {
3176		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3177		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3178			    0, NULL);
3179		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3180			    0, NULL);
3181		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3182			    0, NULL);
3183		if (adapter->hw.phy.type != e1000_phy_ife) {
3184			ifmedia_add(&adapter->media,
3185				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3186			ifmedia_add(&adapter->media,
3187				IFM_ETHER | IFM_1000_T, 0, NULL);
3188		}
3189	}
3190	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3191	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3192	return (0);
3193}
3194
3195
3196/*
3197 * Manage DMA'able memory.
3198 */
3199static void
3200igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3201{
3202	if (error)
3203		return;
3204	*(bus_addr_t *) arg = segs[0].ds_addr;
3205}
3206
3207static int
3208igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3209        struct igb_dma_alloc *dma, int mapflags)
3210{
3211	int error;
3212
3213	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3214				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3215				BUS_SPACE_MAXADDR,	/* lowaddr */
3216				BUS_SPACE_MAXADDR,	/* highaddr */
3217				NULL, NULL,		/* filter, filterarg */
3218				size,			/* maxsize */
3219				1,			/* nsegments */
3220				size,			/* maxsegsize */
3221				0,			/* flags */
3222				NULL,			/* lockfunc */
3223				NULL,			/* lockarg */
3224				&dma->dma_tag);
3225	if (error) {
3226		device_printf(adapter->dev,
3227		    "%s: bus_dma_tag_create failed: %d\n",
3228		    __func__, error);
3229		goto fail_0;
3230	}
3231
3232	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3233	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3234	if (error) {
3235		device_printf(adapter->dev,
3236		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3237		    __func__, (uintmax_t)size, error);
3238		goto fail_2;
3239	}
3240
3241	dma->dma_paddr = 0;
3242	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3243	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3244	if (error || dma->dma_paddr == 0) {
3245		device_printf(adapter->dev,
3246		    "%s: bus_dmamap_load failed: %d\n",
3247		    __func__, error);
3248		goto fail_3;
3249	}
3250
3251	return (0);
3252
3253fail_3:
3254	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3255fail_2:
3256	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3257	bus_dma_tag_destroy(dma->dma_tag);
3258fail_0:
3259	dma->dma_map = NULL;
3260	dma->dma_tag = NULL;
3261
3262	return (error);
3263}
3264
3265static void
3266igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3267{
3268	if (dma->dma_tag == NULL)
3269		return;
3270	if (dma->dma_map != NULL) {
3271		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3272		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3273		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3274		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3275		dma->dma_map = NULL;
3276	}
3277	bus_dma_tag_destroy(dma->dma_tag);
3278	dma->dma_tag = NULL;
3279}
3280
3281
3282/*********************************************************************
3283 *
3284 *  Allocate memory for the transmit and receive rings, and then
3285 *  the descriptors associated with each, called only once at attach.
3286 *
3287 **********************************************************************/
3288static int
3289igb_allocate_queues(struct adapter *adapter)
3290{
3291	device_t dev = adapter->dev;
3292	struct igb_queue	*que = NULL;
3293	struct tx_ring		*txr = NULL;
3294	struct rx_ring		*rxr = NULL;
3295	int rsize, tsize, error = E1000_SUCCESS;
3296	int txconf = 0, rxconf = 0;
3297
3298	/* First allocate the top level queue structs */
3299	if (!(adapter->queues =
3300	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3301	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3302		device_printf(dev, "Unable to allocate queue memory\n");
3303		error = ENOMEM;
3304		goto fail;
3305	}
3306
3307	/* Next allocate the TX ring struct memory */
3308	if (!(adapter->tx_rings =
3309	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3310	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3311		device_printf(dev, "Unable to allocate TX ring memory\n");
3312		error = ENOMEM;
3313		goto tx_fail;
3314	}
3315
3316	/* Now allocate the RX */
3317	if (!(adapter->rx_rings =
3318	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3319	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320		device_printf(dev, "Unable to allocate RX ring memory\n");
3321		error = ENOMEM;
3322		goto rx_fail;
3323	}
3324
3325	tsize = roundup2(adapter->num_tx_desc *
3326	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3327	/*
3328	 * Now set up the TX queues, txconf is needed to handle the
3329	 * possibility that things fail midcourse and we need to
3330	 * undo memory gracefully
3331	 */
3332	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3333		/* Set up some basics */
3334		txr = &adapter->tx_rings[i];
3335		txr->adapter = adapter;
3336		txr->me = i;
3337		txr->num_desc = adapter->num_tx_desc;
3338
3339		/* Initialize the TX lock */
3340		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3341		    device_get_nameunit(dev), txr->me);
3342		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3343
3344		if (igb_dma_malloc(adapter, tsize,
3345			&txr->txdma, BUS_DMA_NOWAIT)) {
3346			device_printf(dev,
3347			    "Unable to allocate TX Descriptor memory\n");
3348			error = ENOMEM;
3349			goto err_tx_desc;
3350		}
3351		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3352		bzero((void *)txr->tx_base, tsize);
3353
3354        	/* Now allocate transmit buffers for the ring */
3355        	if (igb_allocate_transmit_buffers(txr)) {
3356			device_printf(dev,
3357			    "Critical Failure setting up transmit buffers\n");
3358			error = ENOMEM;
3359			goto err_tx_desc;
3360        	}
3361#ifndef IGB_LEGACY_TX
3362		/* Allocate a buf ring */
3363		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3364		    M_WAITOK, &txr->tx_mtx);
3365#endif
3366	}
3367
3368	/*
3369	 * Next the RX queues...
3370	 */
3371	rsize = roundup2(adapter->num_rx_desc *
3372	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3373	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3374		rxr = &adapter->rx_rings[i];
3375		rxr->adapter = adapter;
3376		rxr->me = i;
3377
3378		/* Initialize the RX lock */
3379		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3380		    device_get_nameunit(dev), txr->me);
3381		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3382
3383		if (igb_dma_malloc(adapter, rsize,
3384			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3385			device_printf(dev,
3386			    "Unable to allocate RxDescriptor memory\n");
3387			error = ENOMEM;
3388			goto err_rx_desc;
3389		}
3390		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3391		bzero((void *)rxr->rx_base, rsize);
3392
3393        	/* Allocate receive buffers for the ring*/
3394		if (igb_allocate_receive_buffers(rxr)) {
3395			device_printf(dev,
3396			    "Critical Failure setting up receive buffers\n");
3397			error = ENOMEM;
3398			goto err_rx_desc;
3399		}
3400	}
3401
3402	/*
3403	** Finally set up the queue holding structs
3404	*/
3405	for (int i = 0; i < adapter->num_queues; i++) {
3406		que = &adapter->queues[i];
3407		que->adapter = adapter;
3408		que->txr = &adapter->tx_rings[i];
3409		que->rxr = &adapter->rx_rings[i];
3410	}
3411
3412	return (0);
3413
3414err_rx_desc:
3415	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3416		igb_dma_free(adapter, &rxr->rxdma);
3417err_tx_desc:
3418	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3419		igb_dma_free(adapter, &txr->txdma);
3420	free(adapter->rx_rings, M_DEVBUF);
3421rx_fail:
3422#ifndef IGB_LEGACY_TX
3423	buf_ring_free(txr->br, M_DEVBUF);
3424#endif
3425	free(adapter->tx_rings, M_DEVBUF);
3426tx_fail:
3427	free(adapter->queues, M_DEVBUF);
3428fail:
3429	return (error);
3430}
3431
3432/*********************************************************************
3433 *
3434 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3435 *  the information needed to transmit a packet on the wire. This is
3436 *  called only once at attach, setup is done every reset.
3437 *
3438 **********************************************************************/
3439static int
3440igb_allocate_transmit_buffers(struct tx_ring *txr)
3441{
3442	struct adapter *adapter = txr->adapter;
3443	device_t dev = adapter->dev;
3444	struct igb_tx_buf *txbuf;
3445	int error, i;
3446
3447	/*
3448	 * Setup DMA descriptor areas.
3449	 */
3450	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3451			       1, 0,			/* alignment, bounds */
3452			       BUS_SPACE_MAXADDR,	/* lowaddr */
3453			       BUS_SPACE_MAXADDR,	/* highaddr */
3454			       NULL, NULL,		/* filter, filterarg */
3455			       IGB_TSO_SIZE,		/* maxsize */
3456			       IGB_MAX_SCATTER,		/* nsegments */
3457			       PAGE_SIZE,		/* maxsegsize */
3458			       0,			/* flags */
3459			       NULL,			/* lockfunc */
3460			       NULL,			/* lockfuncarg */
3461			       &txr->txtag))) {
3462		device_printf(dev,"Unable to allocate TX DMA tag\n");
3463		goto fail;
3464	}
3465
3466	if (!(txr->tx_buffers =
3467	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3468	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3469		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3470		error = ENOMEM;
3471		goto fail;
3472	}
3473
3474        /* Create the descriptor buffer dma maps */
3475	txbuf = txr->tx_buffers;
3476	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3477		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3478		if (error != 0) {
3479			device_printf(dev, "Unable to create TX DMA map\n");
3480			goto fail;
3481		}
3482	}
3483
3484	return 0;
3485fail:
3486	/* We free all, it handles case where we are in the middle */
3487	igb_free_transmit_structures(adapter);
3488	return (error);
3489}
3490
3491/*********************************************************************
3492 *
3493 *  Initialize a transmit ring.
3494 *
3495 **********************************************************************/
3496static void
3497igb_setup_transmit_ring(struct tx_ring *txr)
3498{
3499	struct adapter *adapter = txr->adapter;
3500	struct igb_tx_buf *txbuf;
3501	int i;
3502#ifdef DEV_NETMAP
3503	struct netmap_adapter *na = NA(adapter->ifp);
3504	struct netmap_slot *slot;
3505#endif /* DEV_NETMAP */
3506
3507	/* Clear the old descriptor contents */
3508	IGB_TX_LOCK(txr);
3509#ifdef DEV_NETMAP
3510	slot = netmap_reset(na, NR_TX, txr->me, 0);
3511#endif /* DEV_NETMAP */
3512	bzero((void *)txr->tx_base,
3513	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3514	/* Reset indices */
3515	txr->next_avail_desc = 0;
3516	txr->next_to_clean = 0;
3517
3518	/* Free any existing tx buffers. */
3519        txbuf = txr->tx_buffers;
3520	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3521		if (txbuf->m_head != NULL) {
3522			bus_dmamap_sync(txr->txtag, txbuf->map,
3523			    BUS_DMASYNC_POSTWRITE);
3524			bus_dmamap_unload(txr->txtag, txbuf->map);
3525			m_freem(txbuf->m_head);
3526			txbuf->m_head = NULL;
3527		}
3528#ifdef DEV_NETMAP
3529		if (slot) {
3530			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3531			/* no need to set the address */
3532			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3533		}
3534#endif /* DEV_NETMAP */
3535		/* clear the watch index */
3536		txbuf->eop = NULL;
3537        }
3538
3539	/* Set number of descriptors available */
3540	txr->tx_avail = adapter->num_tx_desc;
3541
3542	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3543	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3544	IGB_TX_UNLOCK(txr);
3545}
3546
3547/*********************************************************************
3548 *
3549 *  Initialize all transmit rings.
3550 *
3551 **********************************************************************/
3552static void
3553igb_setup_transmit_structures(struct adapter *adapter)
3554{
3555	struct tx_ring *txr = adapter->tx_rings;
3556
3557	for (int i = 0; i < adapter->num_queues; i++, txr++)
3558		igb_setup_transmit_ring(txr);
3559
3560	return;
3561}
3562
3563/*********************************************************************
3564 *
3565 *  Enable transmit unit.
3566 *
3567 **********************************************************************/
3568static void
3569igb_initialize_transmit_units(struct adapter *adapter)
3570{
3571	struct tx_ring	*txr = adapter->tx_rings;
3572	struct e1000_hw *hw = &adapter->hw;
3573	u32		tctl, txdctl;
3574
3575	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3576	tctl = txdctl = 0;
3577
3578	/* Setup the Tx Descriptor Rings */
3579	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3580		u64 bus_addr = txr->txdma.dma_paddr;
3581
3582		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3583		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3584		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3585		    (uint32_t)(bus_addr >> 32));
3586		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3587		    (uint32_t)bus_addr);
3588
3589		/* Setup the HW Tx Head and Tail descriptor pointers */
3590		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3591		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3592
3593		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3594		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3595		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3596
3597		txr->queue_status = IGB_QUEUE_IDLE;
3598
3599		txdctl |= IGB_TX_PTHRESH;
3600		txdctl |= IGB_TX_HTHRESH << 8;
3601		txdctl |= IGB_TX_WTHRESH << 16;
3602		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3603		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3604	}
3605
3606	if (adapter->vf_ifp)
3607		return;
3608
3609	e1000_config_collision_dist(hw);
3610
3611	/* Program the Transmit Control Register */
3612	tctl = E1000_READ_REG(hw, E1000_TCTL);
3613	tctl &= ~E1000_TCTL_CT;
3614	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3615		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3616
3617	/* This write will effectively turn on the transmit unit. */
3618	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3619}
3620
3621/*********************************************************************
3622 *
3623 *  Free all transmit rings.
3624 *
3625 **********************************************************************/
3626static void
3627igb_free_transmit_structures(struct adapter *adapter)
3628{
3629	struct tx_ring *txr = adapter->tx_rings;
3630
3631	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3632		IGB_TX_LOCK(txr);
3633		igb_free_transmit_buffers(txr);
3634		igb_dma_free(adapter, &txr->txdma);
3635		IGB_TX_UNLOCK(txr);
3636		IGB_TX_LOCK_DESTROY(txr);
3637	}
3638	free(adapter->tx_rings, M_DEVBUF);
3639}
3640
3641/*********************************************************************
3642 *
3643 *  Free transmit ring related data structures.
3644 *
3645 **********************************************************************/
3646static void
3647igb_free_transmit_buffers(struct tx_ring *txr)
3648{
3649	struct adapter *adapter = txr->adapter;
3650	struct igb_tx_buf *tx_buffer;
3651	int             i;
3652
3653	INIT_DEBUGOUT("free_transmit_ring: begin");
3654
3655	if (txr->tx_buffers == NULL)
3656		return;
3657
3658	tx_buffer = txr->tx_buffers;
3659	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3660		if (tx_buffer->m_head != NULL) {
3661			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3662			    BUS_DMASYNC_POSTWRITE);
3663			bus_dmamap_unload(txr->txtag,
3664			    tx_buffer->map);
3665			m_freem(tx_buffer->m_head);
3666			tx_buffer->m_head = NULL;
3667			if (tx_buffer->map != NULL) {
3668				bus_dmamap_destroy(txr->txtag,
3669				    tx_buffer->map);
3670				tx_buffer->map = NULL;
3671			}
3672		} else if (tx_buffer->map != NULL) {
3673			bus_dmamap_unload(txr->txtag,
3674			    tx_buffer->map);
3675			bus_dmamap_destroy(txr->txtag,
3676			    tx_buffer->map);
3677			tx_buffer->map = NULL;
3678		}
3679	}
3680#ifndef IGB_LEGACY_TX
3681	if (txr->br != NULL)
3682		buf_ring_free(txr->br, M_DEVBUF);
3683#endif
3684	if (txr->tx_buffers != NULL) {
3685		free(txr->tx_buffers, M_DEVBUF);
3686		txr->tx_buffers = NULL;
3687	}
3688	if (txr->txtag != NULL) {
3689		bus_dma_tag_destroy(txr->txtag);
3690		txr->txtag = NULL;
3691	}
3692	return;
3693}
3694
3695/**********************************************************************
3696 *
3697 *  Setup work for hardware segmentation offload (TSO) on
3698 *  adapters using advanced tx descriptors
3699 *
3700 **********************************************************************/
3701static int
3702igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3703    u32 *cmd_type_len, u32 *olinfo_status)
3704{
3705	struct adapter *adapter = txr->adapter;
3706	struct e1000_adv_tx_context_desc *TXD;
3707	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3708	u32 mss_l4len_idx = 0, paylen;
3709	u16 vtag = 0, eh_type;
3710	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3711	struct ether_vlan_header *eh;
3712#ifdef INET6
3713	struct ip6_hdr *ip6;
3714#endif
3715#ifdef INET
3716	struct ip *ip;
3717#endif
3718	struct tcphdr *th;
3719
3720
3721	/*
3722	 * Determine where frame payload starts.
3723	 * Jump over vlan headers if already present
3724	 */
3725	eh = mtod(mp, struct ether_vlan_header *);
3726	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3727		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3728		eh_type = eh->evl_proto;
3729	} else {
3730		ehdrlen = ETHER_HDR_LEN;
3731		eh_type = eh->evl_encap_proto;
3732	}
3733
3734	switch (ntohs(eh_type)) {
3735#ifdef INET6
3736	case ETHERTYPE_IPV6:
3737		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3738		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3739		if (ip6->ip6_nxt != IPPROTO_TCP)
3740			return (ENXIO);
3741		ip_hlen = sizeof(struct ip6_hdr);
3742		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3743		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3744		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3745		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3746		break;
3747#endif
3748#ifdef INET
3749	case ETHERTYPE_IP:
3750		ip = (struct ip *)(mp->m_data + ehdrlen);
3751		if (ip->ip_p != IPPROTO_TCP)
3752			return (ENXIO);
3753		ip->ip_sum = 0;
3754		ip_hlen = ip->ip_hl << 2;
3755		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3756		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3757		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3758		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3759		/* Tell transmit desc to also do IPv4 checksum. */
3760		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3761		break;
3762#endif
3763	default:
3764		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3765		    __func__, ntohs(eh_type));
3766		break;
3767	}
3768
3769	ctxd = txr->next_avail_desc;
3770	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3771
3772	tcp_hlen = th->th_off << 2;
3773
3774	/* This is used in the transmit desc in encap */
3775	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3776
3777	/* VLAN MACLEN IPLEN */
3778	if (mp->m_flags & M_VLANTAG) {
3779		vtag = htole16(mp->m_pkthdr.ether_vtag);
3780                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3781	}
3782
3783	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3784	vlan_macip_lens |= ip_hlen;
3785	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3786
3787	/* ADV DTYPE TUCMD */
3788	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3789	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3790	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3791
3792	/* MSS L4LEN IDX */
3793	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3794	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3795	/* 82575 needs the queue index added */
3796	if (adapter->hw.mac.type == e1000_82575)
3797		mss_l4len_idx |= txr->me << 4;
3798	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3799
3800	TXD->seqnum_seed = htole32(0);
3801
3802	if (++ctxd == txr->num_desc)
3803		ctxd = 0;
3804
3805	txr->tx_avail--;
3806	txr->next_avail_desc = ctxd;
3807	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3808	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3809	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3810	++txr->tso_tx;
3811	return (0);
3812}
3813
3814/*********************************************************************
3815 *
3816 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3817 *
3818 **********************************************************************/
3819
3820static int
3821igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3822    u32 *cmd_type_len, u32 *olinfo_status)
3823{
3824	struct e1000_adv_tx_context_desc *TXD;
3825	struct adapter *adapter = txr->adapter;
3826	struct ether_vlan_header *eh;
3827	struct ip *ip;
3828	struct ip6_hdr *ip6;
3829	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3830	int	ehdrlen, ip_hlen = 0;
3831	u16	etype;
3832	u8	ipproto = 0;
3833	int	offload = TRUE;
3834	int	ctxd = txr->next_avail_desc;
3835	u16	vtag = 0;
3836
3837	/* First check if TSO is to be used */
3838	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3839		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3840
3841	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3842		offload = FALSE;
3843
3844	/* Indicate the whole packet as payload when not doing TSO */
3845       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3846
3847	/* Now ready a context descriptor */
3848	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3849
3850	/*
3851	** In advanced descriptors the vlan tag must
3852	** be placed into the context descriptor. Hence
3853	** we need to make one even if not doing offloads.
3854	*/
3855	if (mp->m_flags & M_VLANTAG) {
3856		vtag = htole16(mp->m_pkthdr.ether_vtag);
3857		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3858	} else if (offload == FALSE) /* ... no offload to do */
3859		return (0);
3860
3861	/*
3862	 * Determine where frame payload starts.
3863	 * Jump over vlan headers if already present,
3864	 * helpful for QinQ too.
3865	 */
3866	eh = mtod(mp, struct ether_vlan_header *);
3867	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3868		etype = ntohs(eh->evl_proto);
3869		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3870	} else {
3871		etype = ntohs(eh->evl_encap_proto);
3872		ehdrlen = ETHER_HDR_LEN;
3873	}
3874
3875	/* Set the ether header length */
3876	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3877
3878	switch (etype) {
3879		case ETHERTYPE_IP:
3880			ip = (struct ip *)(mp->m_data + ehdrlen);
3881			ip_hlen = ip->ip_hl << 2;
3882			ipproto = ip->ip_p;
3883			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3884			break;
3885		case ETHERTYPE_IPV6:
3886			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3887			ip_hlen = sizeof(struct ip6_hdr);
3888			/* XXX-BZ this will go badly in case of ext hdrs. */
3889			ipproto = ip6->ip6_nxt;
3890			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3891			break;
3892		default:
3893			offload = FALSE;
3894			break;
3895	}
3896
3897	vlan_macip_lens |= ip_hlen;
3898	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3899
3900	switch (ipproto) {
3901		case IPPROTO_TCP:
3902			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3903				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3904			break;
3905		case IPPROTO_UDP:
3906			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3907				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3908			break;
3909
3910#if __FreeBSD_version >= 800000
3911		case IPPROTO_SCTP:
3912			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3913				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3914			break;
3915#endif
3916		default:
3917			offload = FALSE;
3918			break;
3919	}
3920
3921	if (offload) /* For the TX descriptor setup */
3922		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3923
3924	/* 82575 needs the queue index added */
3925	if (adapter->hw.mac.type == e1000_82575)
3926		mss_l4len_idx = txr->me << 4;
3927
3928	/* Now copy bits into descriptor */
3929	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3930	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3931	TXD->seqnum_seed = htole32(0);
3932	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3933
3934	/* We've consumed the first desc, adjust counters */
3935	if (++ctxd == txr->num_desc)
3936		ctxd = 0;
3937	txr->next_avail_desc = ctxd;
3938	--txr->tx_avail;
3939
3940        return (0);
3941}
3942
3943/**********************************************************************
3944 *
3945 *  Examine each tx_buffer in the used queue. If the hardware is done
3946 *  processing the packet then free associated resources. The
3947 *  tx_buffer is put back on the free queue.
3948 *
3949 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3950 **********************************************************************/
3951static bool
3952igb_txeof(struct tx_ring *txr)
3953{
3954	struct adapter		*adapter = txr->adapter;
3955	struct ifnet		*ifp = adapter->ifp;
3956	u32			work, processed = 0;
3957	u16			limit = txr->process_limit;
3958	struct igb_tx_buf	*buf;
3959	union e1000_adv_tx_desc *txd;
3960
3961	mtx_assert(&txr->tx_mtx, MA_OWNED);
3962
3963#ifdef DEV_NETMAP
3964	if (netmap_tx_irq(ifp, txr->me))
3965		return (FALSE);
3966#endif /* DEV_NETMAP */
3967
3968	if (txr->tx_avail == txr->num_desc) {
3969		txr->queue_status = IGB_QUEUE_IDLE;
3970		return FALSE;
3971	}
3972
3973	/* Get work starting point */
3974	work = txr->next_to_clean;
3975	buf = &txr->tx_buffers[work];
3976	txd = &txr->tx_base[work];
3977	work -= txr->num_desc; /* The distance to ring end */
3978        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3979            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3980	do {
3981		union e1000_adv_tx_desc *eop = buf->eop;
3982		if (eop == NULL) /* No work */
3983			break;
3984
3985		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3986			break;	/* I/O not complete */
3987
3988		if (buf->m_head) {
3989			txr->bytes +=
3990			    buf->m_head->m_pkthdr.len;
3991			bus_dmamap_sync(txr->txtag,
3992			    buf->map,
3993			    BUS_DMASYNC_POSTWRITE);
3994			bus_dmamap_unload(txr->txtag,
3995			    buf->map);
3996			m_freem(buf->m_head);
3997			buf->m_head = NULL;
3998		}
3999		buf->eop = NULL;
4000		++txr->tx_avail;
4001
4002		/* We clean the range if multi segment */
4003		while (txd != eop) {
4004			++txd;
4005			++buf;
4006			++work;
4007			/* wrap the ring? */
4008			if (__predict_false(!work)) {
4009				work -= txr->num_desc;
4010				buf = txr->tx_buffers;
4011				txd = txr->tx_base;
4012			}
4013			if (buf->m_head) {
4014				txr->bytes +=
4015				    buf->m_head->m_pkthdr.len;
4016				bus_dmamap_sync(txr->txtag,
4017				    buf->map,
4018				    BUS_DMASYNC_POSTWRITE);
4019				bus_dmamap_unload(txr->txtag,
4020				    buf->map);
4021				m_freem(buf->m_head);
4022				buf->m_head = NULL;
4023			}
4024			++txr->tx_avail;
4025			buf->eop = NULL;
4026
4027		}
4028		++txr->packets;
4029		++processed;
4030		++ifp->if_opackets;
4031		txr->watchdog_time = ticks;
4032
4033		/* Try the next packet */
4034		++txd;
4035		++buf;
4036		++work;
4037		/* reset with a wrap */
4038		if (__predict_false(!work)) {
4039			work -= txr->num_desc;
4040			buf = txr->tx_buffers;
4041			txd = txr->tx_base;
4042		}
4043		prefetch(txd);
4044	} while (__predict_true(--limit));
4045
4046	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4047	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4048
4049	work += txr->num_desc;
4050	txr->next_to_clean = work;
4051
4052	/*
4053	** Watchdog calculation, we know there's
4054	** work outstanding or the first return
4055	** would have been taken, so none processed
4056	** for too long indicates a hang.
4057	*/
4058	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4059		txr->queue_status |= IGB_QUEUE_HUNG;
4060
4061	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4062		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4063
4064	if (txr->tx_avail == txr->num_desc) {
4065		txr->queue_status = IGB_QUEUE_IDLE;
4066		return (FALSE);
4067	}
4068
4069	return (TRUE);
4070}
4071
4072/*********************************************************************
4073 *
4074 *  Refresh mbuf buffers for RX descriptor rings
4075 *   - now keeps its own state so discards due to resource
4076 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4077 *     it just returns, keeping its placeholder, thus it can simply
4078 *     be recalled to try again.
4079 *
4080 **********************************************************************/
4081static void
4082igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4083{
4084	struct adapter		*adapter = rxr->adapter;
4085	bus_dma_segment_t	hseg[1];
4086	bus_dma_segment_t	pseg[1];
4087	struct igb_rx_buf	*rxbuf;
4088	struct mbuf		*mh, *mp;
4089	int			i, j, nsegs, error;
4090	bool			refreshed = FALSE;
4091
4092	i = j = rxr->next_to_refresh;
4093	/*
4094	** Get one descriptor beyond
4095	** our work mark to control
4096	** the loop.
4097        */
4098	if (++j == adapter->num_rx_desc)
4099		j = 0;
4100
4101	while (j != limit) {
4102		rxbuf = &rxr->rx_buffers[i];
4103		/* No hdr mbuf used with header split off */
4104		if (rxr->hdr_split == FALSE)
4105			goto no_split;
4106		if (rxbuf->m_head == NULL) {
4107			mh = m_gethdr(M_NOWAIT, MT_DATA);
4108			if (mh == NULL)
4109				goto update;
4110		} else
4111			mh = rxbuf->m_head;
4112
4113		mh->m_pkthdr.len = mh->m_len = MHLEN;
4114		mh->m_len = MHLEN;
4115		mh->m_flags |= M_PKTHDR;
4116		/* Get the memory mapping */
4117		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4118		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4119		if (error != 0) {
4120			printf("Refresh mbufs: hdr dmamap load"
4121			    " failure - %d\n", error);
4122			m_free(mh);
4123			rxbuf->m_head = NULL;
4124			goto update;
4125		}
4126		rxbuf->m_head = mh;
4127		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4128		    BUS_DMASYNC_PREREAD);
4129		rxr->rx_base[i].read.hdr_addr =
4130		    htole64(hseg[0].ds_addr);
4131no_split:
4132		if (rxbuf->m_pack == NULL) {
4133			mp = m_getjcl(M_NOWAIT, MT_DATA,
4134			    M_PKTHDR, adapter->rx_mbuf_sz);
4135			if (mp == NULL)
4136				goto update;
4137		} else
4138			mp = rxbuf->m_pack;
4139
4140		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4141		/* Get the memory mapping */
4142		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4143		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4144		if (error != 0) {
4145			printf("Refresh mbufs: payload dmamap load"
4146			    " failure - %d\n", error);
4147			m_free(mp);
4148			rxbuf->m_pack = NULL;
4149			goto update;
4150		}
4151		rxbuf->m_pack = mp;
4152		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4153		    BUS_DMASYNC_PREREAD);
4154		rxr->rx_base[i].read.pkt_addr =
4155		    htole64(pseg[0].ds_addr);
4156		refreshed = TRUE; /* I feel wefreshed :) */
4157
4158		i = j; /* our next is precalculated */
4159		rxr->next_to_refresh = i;
4160		if (++j == adapter->num_rx_desc)
4161			j = 0;
4162	}
4163update:
4164	if (refreshed) /* update tail */
4165		E1000_WRITE_REG(&adapter->hw,
4166		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4167	return;
4168}
4169
4170
4171/*********************************************************************
4172 *
4173 *  Allocate memory for rx_buffer structures. Since we use one
4174 *  rx_buffer per received packet, the maximum number of rx_buffer's
4175 *  that we'll need is equal to the number of receive descriptors
4176 *  that we've allocated.
4177 *
4178 **********************************************************************/
4179static int
4180igb_allocate_receive_buffers(struct rx_ring *rxr)
4181{
4182	struct	adapter 	*adapter = rxr->adapter;
4183	device_t 		dev = adapter->dev;
4184	struct igb_rx_buf	*rxbuf;
4185	int             	i, bsize, error;
4186
4187	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4188	if (!(rxr->rx_buffers =
4189	    (struct igb_rx_buf *) malloc(bsize,
4190	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4191		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4192		error = ENOMEM;
4193		goto fail;
4194	}
4195
4196	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4197				   1, 0,		/* alignment, bounds */
4198				   BUS_SPACE_MAXADDR,	/* lowaddr */
4199				   BUS_SPACE_MAXADDR,	/* highaddr */
4200				   NULL, NULL,		/* filter, filterarg */
4201				   MSIZE,		/* maxsize */
4202				   1,			/* nsegments */
4203				   MSIZE,		/* maxsegsize */
4204				   0,			/* flags */
4205				   NULL,		/* lockfunc */
4206				   NULL,		/* lockfuncarg */
4207				   &rxr->htag))) {
4208		device_printf(dev, "Unable to create RX DMA tag\n");
4209		goto fail;
4210	}
4211
4212	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4213				   1, 0,		/* alignment, bounds */
4214				   BUS_SPACE_MAXADDR,	/* lowaddr */
4215				   BUS_SPACE_MAXADDR,	/* highaddr */
4216				   NULL, NULL,		/* filter, filterarg */
4217				   MJUM9BYTES,		/* maxsize */
4218				   1,			/* nsegments */
4219				   MJUM9BYTES,		/* maxsegsize */
4220				   0,			/* flags */
4221				   NULL,		/* lockfunc */
4222				   NULL,		/* lockfuncarg */
4223				   &rxr->ptag))) {
4224		device_printf(dev, "Unable to create RX payload DMA tag\n");
4225		goto fail;
4226	}
4227
4228	for (i = 0; i < adapter->num_rx_desc; i++) {
4229		rxbuf = &rxr->rx_buffers[i];
4230		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4231		if (error) {
4232			device_printf(dev,
4233			    "Unable to create RX head DMA maps\n");
4234			goto fail;
4235		}
4236		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4237		if (error) {
4238			device_printf(dev,
4239			    "Unable to create RX packet DMA maps\n");
4240			goto fail;
4241		}
4242	}
4243
4244	return (0);
4245
4246fail:
4247	/* Frees all, but can handle partial completion */
4248	igb_free_receive_structures(adapter);
4249	return (error);
4250}
4251
4252
4253static void
4254igb_free_receive_ring(struct rx_ring *rxr)
4255{
4256	struct	adapter		*adapter = rxr->adapter;
4257	struct igb_rx_buf	*rxbuf;
4258
4259
4260	for (int i = 0; i < adapter->num_rx_desc; i++) {
4261		rxbuf = &rxr->rx_buffers[i];
4262		if (rxbuf->m_head != NULL) {
4263			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4264			    BUS_DMASYNC_POSTREAD);
4265			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4266			rxbuf->m_head->m_flags |= M_PKTHDR;
4267			m_freem(rxbuf->m_head);
4268		}
4269		if (rxbuf->m_pack != NULL) {
4270			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4271			    BUS_DMASYNC_POSTREAD);
4272			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4273			rxbuf->m_pack->m_flags |= M_PKTHDR;
4274			m_freem(rxbuf->m_pack);
4275		}
4276		rxbuf->m_head = NULL;
4277		rxbuf->m_pack = NULL;
4278	}
4279}
4280
4281
4282/*********************************************************************
4283 *
4284 *  Initialize a receive ring and its buffers.
4285 *
4286 **********************************************************************/
4287static int
4288igb_setup_receive_ring(struct rx_ring *rxr)
4289{
4290	struct	adapter		*adapter;
4291	struct  ifnet		*ifp;
4292	device_t		dev;
4293	struct igb_rx_buf	*rxbuf;
4294	bus_dma_segment_t	pseg[1], hseg[1];
4295	struct lro_ctrl		*lro = &rxr->lro;
4296	int			rsize, nsegs, error = 0;
4297#ifdef DEV_NETMAP
4298	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4299	struct netmap_slot *slot;
4300#endif /* DEV_NETMAP */
4301
4302	adapter = rxr->adapter;
4303	dev = adapter->dev;
4304	ifp = adapter->ifp;
4305
4306	/* Clear the ring contents */
4307	IGB_RX_LOCK(rxr);
4308#ifdef DEV_NETMAP
4309	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4310#endif /* DEV_NETMAP */
4311	rsize = roundup2(adapter->num_rx_desc *
4312	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4313	bzero((void *)rxr->rx_base, rsize);
4314
4315	/*
4316	** Free current RX buffer structures and their mbufs
4317	*/
4318	igb_free_receive_ring(rxr);
4319
4320	/* Configure for header split? */
4321	if (igb_header_split)
4322		rxr->hdr_split = TRUE;
4323
4324        /* Now replenish the ring mbufs */
4325	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4326		struct mbuf	*mh, *mp;
4327
4328		rxbuf = &rxr->rx_buffers[j];
4329#ifdef DEV_NETMAP
4330		if (slot) {
4331			/* slot sj is mapped to the i-th NIC-ring entry */
4332			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4333			uint64_t paddr;
4334			void *addr;
4335
4336			addr = PNMB(slot + sj, &paddr);
4337			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4338			/* Update descriptor */
4339			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4340			continue;
4341		}
4342#endif /* DEV_NETMAP */
4343		if (rxr->hdr_split == FALSE)
4344			goto skip_head;
4345
4346		/* First the header */
4347		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4348		if (rxbuf->m_head == NULL) {
4349			error = ENOBUFS;
4350                        goto fail;
4351		}
4352		m_adj(rxbuf->m_head, ETHER_ALIGN);
4353		mh = rxbuf->m_head;
4354		mh->m_len = mh->m_pkthdr.len = MHLEN;
4355		mh->m_flags |= M_PKTHDR;
4356		/* Get the memory mapping */
4357		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4358		    rxbuf->hmap, rxbuf->m_head, hseg,
4359		    &nsegs, BUS_DMA_NOWAIT);
4360		if (error != 0) /* Nothing elegant to do here */
4361                        goto fail;
4362		bus_dmamap_sync(rxr->htag,
4363		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4364		/* Update descriptor */
4365		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4366
4367skip_head:
4368		/* Now the payload cluster */
4369		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4370		    M_PKTHDR, adapter->rx_mbuf_sz);
4371		if (rxbuf->m_pack == NULL) {
4372			error = ENOBUFS;
4373                        goto fail;
4374		}
4375		mp = rxbuf->m_pack;
4376		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4377		/* Get the memory mapping */
4378		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4379		    rxbuf->pmap, mp, pseg,
4380		    &nsegs, BUS_DMA_NOWAIT);
4381		if (error != 0)
4382                        goto fail;
4383		bus_dmamap_sync(rxr->ptag,
4384		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4385		/* Update descriptor */
4386		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4387        }
4388
4389	/* Setup our descriptor indices */
4390	rxr->next_to_check = 0;
4391	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4392	rxr->lro_enabled = FALSE;
4393	rxr->rx_split_packets = 0;
4394	rxr->rx_bytes = 0;
4395
4396	rxr->fmp = NULL;
4397	rxr->lmp = NULL;
4398	rxr->discard = FALSE;
4399
4400	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4401	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4402
4403	/*
4404	** Now set up the LRO interface, we
4405	** also only do head split when LRO
4406	** is enabled, since so often they
4407	** are undesireable in similar setups.
4408	*/
4409	if (ifp->if_capenable & IFCAP_LRO) {
4410		error = tcp_lro_init(lro);
4411		if (error) {
4412			device_printf(dev, "LRO Initialization failed!\n");
4413			goto fail;
4414		}
4415		INIT_DEBUGOUT("RX LRO Initialized\n");
4416		rxr->lro_enabled = TRUE;
4417		lro->ifp = adapter->ifp;
4418	}
4419
4420	IGB_RX_UNLOCK(rxr);
4421	return (0);
4422
4423fail:
4424	igb_free_receive_ring(rxr);
4425	IGB_RX_UNLOCK(rxr);
4426	return (error);
4427}
4428
4429
4430/*********************************************************************
4431 *
4432 *  Initialize all receive rings.
4433 *
4434 **********************************************************************/
4435static int
4436igb_setup_receive_structures(struct adapter *adapter)
4437{
4438	struct rx_ring *rxr = adapter->rx_rings;
4439	int i;
4440
4441	for (i = 0; i < adapter->num_queues; i++, rxr++)
4442		if (igb_setup_receive_ring(rxr))
4443			goto fail;
4444
4445	return (0);
4446fail:
4447	/*
4448	 * Free RX buffers allocated so far, we will only handle
4449	 * the rings that completed, the failing case will have
4450	 * cleaned up for itself. 'i' is the endpoint.
4451	 */
4452	for (int j = 0; j < i; ++j) {
4453		rxr = &adapter->rx_rings[j];
4454		IGB_RX_LOCK(rxr);
4455		igb_free_receive_ring(rxr);
4456		IGB_RX_UNLOCK(rxr);
4457	}
4458
4459	return (ENOBUFS);
4460}
4461
4462/*********************************************************************
4463 *
4464 *  Enable receive unit.
4465 *
4466 **********************************************************************/
4467static void
4468igb_initialize_receive_units(struct adapter *adapter)
4469{
4470	struct rx_ring	*rxr = adapter->rx_rings;
4471	struct ifnet	*ifp = adapter->ifp;
4472	struct e1000_hw *hw = &adapter->hw;
4473	u32		rctl, rxcsum, psize, srrctl = 0;
4474
4475	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4476
4477	/*
4478	 * Make sure receives are disabled while setting
4479	 * up the descriptor ring
4480	 */
4481	rctl = E1000_READ_REG(hw, E1000_RCTL);
4482	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4483
4484	/*
4485	** Set up for header split
4486	*/
4487	if (igb_header_split) {
4488		/* Use a standard mbuf for the header */
4489		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4490		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4491	} else
4492		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4493
4494	/*
4495	** Set up for jumbo frames
4496	*/
4497	if (ifp->if_mtu > ETHERMTU) {
4498		rctl |= E1000_RCTL_LPE;
4499		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4500			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4501			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4502		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4503			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4504			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4505		}
4506		/* Set maximum packet len */
4507		psize = adapter->max_frame_size;
4508		/* are we on a vlan? */
4509		if (adapter->ifp->if_vlantrunk != NULL)
4510			psize += VLAN_TAG_SIZE;
4511		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4512	} else {
4513		rctl &= ~E1000_RCTL_LPE;
4514		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4515		rctl |= E1000_RCTL_SZ_2048;
4516	}
4517
4518	/* Setup the Base and Length of the Rx Descriptor Rings */
4519	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4520		u64 bus_addr = rxr->rxdma.dma_paddr;
4521		u32 rxdctl;
4522
4523		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4524		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4525		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4526		    (uint32_t)(bus_addr >> 32));
4527		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4528		    (uint32_t)bus_addr);
4529		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4530		/* Enable this Queue */
4531		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4532		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4533		rxdctl &= 0xFFF00000;
4534		rxdctl |= IGB_RX_PTHRESH;
4535		rxdctl |= IGB_RX_HTHRESH << 8;
4536		rxdctl |= IGB_RX_WTHRESH << 16;
4537		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4538	}
4539
4540	/*
4541	** Setup for RX MultiQueue
4542	*/
4543	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4544	if (adapter->num_queues >1) {
4545		u32 random[10], mrqc, shift = 0;
4546		union igb_reta {
4547			u32 dword;
4548			u8  bytes[4];
4549		} reta;
4550
4551		arc4rand(&random, sizeof(random), 0);
4552		if (adapter->hw.mac.type == e1000_82575)
4553			shift = 6;
4554		/* Warning FM follows */
4555		for (int i = 0; i < 128; i++) {
4556			reta.bytes[i & 3] =
4557			    (i % adapter->num_queues) << shift;
4558			if ((i & 3) == 3)
4559				E1000_WRITE_REG(hw,
4560				    E1000_RETA(i >> 2), reta.dword);
4561		}
4562		/* Now fill in hash table */
4563		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4564		for (int i = 0; i < 10; i++)
4565			E1000_WRITE_REG_ARRAY(hw,
4566			    E1000_RSSRK(0), i, random[i]);
4567
4568		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4569		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4570		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4571		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4572		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4573		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4574		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4575		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4576
4577		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4578
4579		/*
4580		** NOTE: Receive Full-Packet Checksum Offload
4581		** is mutually exclusive with Multiqueue. However
4582		** this is not the same as TCP/IP checksums which
4583		** still work.
4584		*/
4585		rxcsum |= E1000_RXCSUM_PCSD;
4586#if __FreeBSD_version >= 800000
4587		/* For SCTP Offload */
4588		if ((hw->mac.type == e1000_82576)
4589		    && (ifp->if_capenable & IFCAP_RXCSUM))
4590			rxcsum |= E1000_RXCSUM_CRCOFL;
4591#endif
4592	} else {
4593		/* Non RSS setup */
4594		if (ifp->if_capenable & IFCAP_RXCSUM) {
4595			rxcsum |= E1000_RXCSUM_IPPCSE;
4596#if __FreeBSD_version >= 800000
4597			if (adapter->hw.mac.type == e1000_82576)
4598				rxcsum |= E1000_RXCSUM_CRCOFL;
4599#endif
4600		} else
4601			rxcsum &= ~E1000_RXCSUM_TUOFL;
4602	}
4603	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4604
4605	/* Setup the Receive Control Register */
4606	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4607	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4608		   E1000_RCTL_RDMTS_HALF |
4609		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4610	/* Strip CRC bytes. */
4611	rctl |= E1000_RCTL_SECRC;
4612	/* Make sure VLAN Filters are off */
4613	rctl &= ~E1000_RCTL_VFE;
4614	/* Don't store bad packets */
4615	rctl &= ~E1000_RCTL_SBP;
4616
4617	/* Enable Receives */
4618	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4619
4620	/*
4621	 * Setup the HW Rx Head and Tail Descriptor Pointers
4622	 *   - needs to be after enable
4623	 */
4624	for (int i = 0; i < adapter->num_queues; i++) {
4625		rxr = &adapter->rx_rings[i];
4626		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4627#ifdef DEV_NETMAP
4628		/*
4629		 * an init() while a netmap client is active must
4630		 * preserve the rx buffers passed to userspace.
4631		 * In this driver it means we adjust RDT to
4632		 * something different from next_to_refresh
4633		 * (which is not used in netmap mode).
4634		 */
4635		if (ifp->if_capenable & IFCAP_NETMAP) {
4636			struct netmap_adapter *na = NA(adapter->ifp);
4637			struct netmap_kring *kring = &na->rx_rings[i];
4638			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4639
4640			if (t >= adapter->num_rx_desc)
4641				t -= adapter->num_rx_desc;
4642			else if (t < 0)
4643				t += adapter->num_rx_desc;
4644			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4645		} else
4646#endif /* DEV_NETMAP */
4647		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4648	}
4649	return;
4650}
4651
4652/*********************************************************************
4653 *
4654 *  Free receive rings.
4655 *
4656 **********************************************************************/
4657static void
4658igb_free_receive_structures(struct adapter *adapter)
4659{
4660	struct rx_ring *rxr = adapter->rx_rings;
4661
4662	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4663		struct lro_ctrl	*lro = &rxr->lro;
4664		igb_free_receive_buffers(rxr);
4665		tcp_lro_free(lro);
4666		igb_dma_free(adapter, &rxr->rxdma);
4667	}
4668
4669	free(adapter->rx_rings, M_DEVBUF);
4670}
4671
4672/*********************************************************************
4673 *
4674 *  Free receive ring data structures.
4675 *
4676 **********************************************************************/
4677static void
4678igb_free_receive_buffers(struct rx_ring *rxr)
4679{
4680	struct adapter		*adapter = rxr->adapter;
4681	struct igb_rx_buf	*rxbuf;
4682	int i;
4683
4684	INIT_DEBUGOUT("free_receive_structures: begin");
4685
4686	/* Cleanup any existing buffers */
4687	if (rxr->rx_buffers != NULL) {
4688		for (i = 0; i < adapter->num_rx_desc; i++) {
4689			rxbuf = &rxr->rx_buffers[i];
4690			if (rxbuf->m_head != NULL) {
4691				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4692				    BUS_DMASYNC_POSTREAD);
4693				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4694				rxbuf->m_head->m_flags |= M_PKTHDR;
4695				m_freem(rxbuf->m_head);
4696			}
4697			if (rxbuf->m_pack != NULL) {
4698				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4699				    BUS_DMASYNC_POSTREAD);
4700				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4701				rxbuf->m_pack->m_flags |= M_PKTHDR;
4702				m_freem(rxbuf->m_pack);
4703			}
4704			rxbuf->m_head = NULL;
4705			rxbuf->m_pack = NULL;
4706			if (rxbuf->hmap != NULL) {
4707				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4708				rxbuf->hmap = NULL;
4709			}
4710			if (rxbuf->pmap != NULL) {
4711				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4712				rxbuf->pmap = NULL;
4713			}
4714		}
4715		if (rxr->rx_buffers != NULL) {
4716			free(rxr->rx_buffers, M_DEVBUF);
4717			rxr->rx_buffers = NULL;
4718		}
4719	}
4720
4721	if (rxr->htag != NULL) {
4722		bus_dma_tag_destroy(rxr->htag);
4723		rxr->htag = NULL;
4724	}
4725	if (rxr->ptag != NULL) {
4726		bus_dma_tag_destroy(rxr->ptag);
4727		rxr->ptag = NULL;
4728	}
4729}
4730
4731static __inline void
4732igb_rx_discard(struct rx_ring *rxr, int i)
4733{
4734	struct igb_rx_buf	*rbuf;
4735
4736	rbuf = &rxr->rx_buffers[i];
4737
4738	/* Partially received? Free the chain */
4739	if (rxr->fmp != NULL) {
4740		rxr->fmp->m_flags |= M_PKTHDR;
4741		m_freem(rxr->fmp);
4742		rxr->fmp = NULL;
4743		rxr->lmp = NULL;
4744	}
4745
4746	/*
4747	** With advanced descriptors the writeback
4748	** clobbers the buffer addrs, so its easier
4749	** to just free the existing mbufs and take
4750	** the normal refresh path to get new buffers
4751	** and mapping.
4752	*/
4753	if (rbuf->m_head) {
4754		m_free(rbuf->m_head);
4755		rbuf->m_head = NULL;
4756		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4757	}
4758
4759	if (rbuf->m_pack) {
4760		m_free(rbuf->m_pack);
4761		rbuf->m_pack = NULL;
4762		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4763	}
4764
4765	return;
4766}
4767
4768static __inline void
4769igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4770{
4771
4772	/*
4773	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4774	 * should be computed by hardware. Also it should not have VLAN tag in
4775	 * ethernet header.
4776	 */
4777	if (rxr->lro_enabled &&
4778	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4779	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4780	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4781	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4782	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4783	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4784		/*
4785		 * Send to the stack if:
4786		 **  - LRO not enabled, or
4787		 **  - no LRO resources, or
4788		 **  - lro enqueue fails
4789		 */
4790		if (rxr->lro.lro_cnt != 0)
4791			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4792				return;
4793	}
4794	IGB_RX_UNLOCK(rxr);
4795	(*ifp->if_input)(ifp, m);
4796	IGB_RX_LOCK(rxr);
4797}
4798
4799/*********************************************************************
4800 *
4801 *  This routine executes in interrupt context. It replenishes
4802 *  the mbufs in the descriptor and sends data which has been
4803 *  dma'ed into host memory to upper layer.
4804 *
4805 *  We loop at most count times if count is > 0, or until done if
4806 *  count < 0.
4807 *
4808 *  Return TRUE if more to clean, FALSE otherwise
4809 *********************************************************************/
4810static bool
4811igb_rxeof(struct igb_queue *que, int count, int *done)
4812{
4813	struct adapter		*adapter = que->adapter;
4814	struct rx_ring		*rxr = que->rxr;
4815	struct ifnet		*ifp = adapter->ifp;
4816	struct lro_ctrl		*lro = &rxr->lro;
4817	struct lro_entry	*queued;
4818	int			i, processed = 0, rxdone = 0;
4819	u32			ptype, staterr = 0;
4820	union e1000_adv_rx_desc	*cur;
4821
4822	IGB_RX_LOCK(rxr);
4823	/* Sync the ring. */
4824	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4825	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4826
4827#ifdef DEV_NETMAP
4828	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4829		IGB_RX_UNLOCK(rxr);
4830		return (FALSE);
4831	}
4832#endif /* DEV_NETMAP */
4833
4834	/* Main clean loop */
4835	for (i = rxr->next_to_check; count != 0;) {
4836		struct mbuf		*sendmp, *mh, *mp;
4837		struct igb_rx_buf	*rxbuf;
4838		u16			hlen, plen, hdr, vtag;
4839		bool			eop = FALSE;
4840
4841		cur = &rxr->rx_base[i];
4842		staterr = le32toh(cur->wb.upper.status_error);
4843		if ((staterr & E1000_RXD_STAT_DD) == 0)
4844			break;
4845		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4846			break;
4847		count--;
4848		sendmp = mh = mp = NULL;
4849		cur->wb.upper.status_error = 0;
4850		rxbuf = &rxr->rx_buffers[i];
4851		plen = le16toh(cur->wb.upper.length);
4852		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4853		if (((adapter->hw.mac.type == e1000_i350) ||
4854		    (adapter->hw.mac.type == e1000_i354)) &&
4855		    (staterr & E1000_RXDEXT_STATERR_LB))
4856			vtag = be16toh(cur->wb.upper.vlan);
4857		else
4858			vtag = le16toh(cur->wb.upper.vlan);
4859		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4860		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4861
4862		/* Make sure all segments of a bad packet are discarded */
4863		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4864		    (rxr->discard)) {
4865			adapter->dropped_pkts++;
4866			++rxr->rx_discarded;
4867			if (!eop) /* Catch subsequent segs */
4868				rxr->discard = TRUE;
4869			else
4870				rxr->discard = FALSE;
4871			igb_rx_discard(rxr, i);
4872			goto next_desc;
4873		}
4874
4875		/*
4876		** The way the hardware is configured to
4877		** split, it will ONLY use the header buffer
4878		** when header split is enabled, otherwise we
4879		** get normal behavior, ie, both header and
4880		** payload are DMA'd into the payload buffer.
4881		**
4882		** The fmp test is to catch the case where a
4883		** packet spans multiple descriptors, in that
4884		** case only the first header is valid.
4885		*/
4886		if (rxr->hdr_split && rxr->fmp == NULL) {
4887			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4888			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4889			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4890			if (hlen > IGB_HDR_BUF)
4891				hlen = IGB_HDR_BUF;
4892			mh = rxr->rx_buffers[i].m_head;
4893			mh->m_len = hlen;
4894			/* clear buf pointer for refresh */
4895			rxbuf->m_head = NULL;
4896			/*
4897			** Get the payload length, this
4898			** could be zero if its a small
4899			** packet.
4900			*/
4901			if (plen > 0) {
4902				mp = rxr->rx_buffers[i].m_pack;
4903				mp->m_len = plen;
4904				mh->m_next = mp;
4905				/* clear buf pointer */
4906				rxbuf->m_pack = NULL;
4907				rxr->rx_split_packets++;
4908			}
4909		} else {
4910			/*
4911			** Either no header split, or a
4912			** secondary piece of a fragmented
4913			** split packet.
4914			*/
4915			mh = rxr->rx_buffers[i].m_pack;
4916			mh->m_len = plen;
4917			/* clear buf info for refresh */
4918			rxbuf->m_pack = NULL;
4919		}
4920		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4921
4922		++processed; /* So we know when to refresh */
4923
4924		/* Initial frame - setup */
4925		if (rxr->fmp == NULL) {
4926			mh->m_pkthdr.len = mh->m_len;
4927			/* Save the head of the chain */
4928			rxr->fmp = mh;
4929			rxr->lmp = mh;
4930			if (mp != NULL) {
4931				/* Add payload if split */
4932				mh->m_pkthdr.len += mp->m_len;
4933				rxr->lmp = mh->m_next;
4934			}
4935		} else {
4936			/* Chain mbuf's together */
4937			rxr->lmp->m_next = mh;
4938			rxr->lmp = rxr->lmp->m_next;
4939			rxr->fmp->m_pkthdr.len += mh->m_len;
4940		}
4941
4942		if (eop) {
4943			rxr->fmp->m_pkthdr.rcvif = ifp;
4944			ifp->if_ipackets++;
4945			rxr->rx_packets++;
4946			/* capture data for AIM */
4947			rxr->packets++;
4948			rxr->bytes += rxr->fmp->m_pkthdr.len;
4949			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4950
4951			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4952				igb_rx_checksum(staterr, rxr->fmp, ptype);
4953
4954			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4955			    (staterr & E1000_RXD_STAT_VP) != 0) {
4956				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4957				rxr->fmp->m_flags |= M_VLANTAG;
4958			}
4959#ifndef IGB_LEGACY_TX
4960			rxr->fmp->m_pkthdr.flowid = que->msix;
4961			rxr->fmp->m_flags |= M_FLOWID;
4962#endif
4963			sendmp = rxr->fmp;
4964			/* Make sure to set M_PKTHDR. */
4965			sendmp->m_flags |= M_PKTHDR;
4966			rxr->fmp = NULL;
4967			rxr->lmp = NULL;
4968		}
4969
4970next_desc:
4971		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4972		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4973
4974		/* Advance our pointers to the next descriptor. */
4975		if (++i == adapter->num_rx_desc)
4976			i = 0;
4977		/*
4978		** Send to the stack or LRO
4979		*/
4980		if (sendmp != NULL) {
4981			rxr->next_to_check = i;
4982			igb_rx_input(rxr, ifp, sendmp, ptype);
4983			i = rxr->next_to_check;
4984			rxdone++;
4985		}
4986
4987		/* Every 8 descriptors we go to refresh mbufs */
4988		if (processed == 8) {
4989                        igb_refresh_mbufs(rxr, i);
4990                        processed = 0;
4991		}
4992	}
4993
4994	/* Catch any remainders */
4995	if (igb_rx_unrefreshed(rxr))
4996		igb_refresh_mbufs(rxr, i);
4997
4998	rxr->next_to_check = i;
4999
5000	/*
5001	 * Flush any outstanding LRO work
5002	 */
5003	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5004		SLIST_REMOVE_HEAD(&lro->lro_active, next);
5005		tcp_lro_flush(lro, queued);
5006	}
5007
5008	if (done != NULL)
5009		*done += rxdone;
5010
5011	IGB_RX_UNLOCK(rxr);
5012	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5013}
5014
5015/*********************************************************************
5016 *
5017 *  Verify that the hardware indicated that the checksum is valid.
5018 *  Inform the stack about the status of checksum so that stack
5019 *  doesn't spend time verifying the checksum.
5020 *
5021 *********************************************************************/
5022static void
5023igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5024{
5025	u16 status = (u16)staterr;
5026	u8  errors = (u8) (staterr >> 24);
5027	int sctp;
5028
5029	/* Ignore Checksum bit is set */
5030	if (status & E1000_RXD_STAT_IXSM) {
5031		mp->m_pkthdr.csum_flags = 0;
5032		return;
5033	}
5034
5035	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5036	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5037		sctp = 1;
5038	else
5039		sctp = 0;
5040	if (status & E1000_RXD_STAT_IPCS) {
5041		/* Did it pass? */
5042		if (!(errors & E1000_RXD_ERR_IPE)) {
5043			/* IP Checksum Good */
5044			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5045			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5046		} else
5047			mp->m_pkthdr.csum_flags = 0;
5048	}
5049
5050	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5051		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5052#if __FreeBSD_version >= 800000
5053		if (sctp) /* reassign */
5054			type = CSUM_SCTP_VALID;
5055#endif
5056		/* Did it pass? */
5057		if (!(errors & E1000_RXD_ERR_TCPE)) {
5058			mp->m_pkthdr.csum_flags |= type;
5059			if (sctp == 0)
5060				mp->m_pkthdr.csum_data = htons(0xffff);
5061		}
5062	}
5063	return;
5064}
5065
5066/*
5067 * This routine is run via an vlan
5068 * config EVENT
5069 */
5070static void
5071igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5072{
5073	struct adapter	*adapter = ifp->if_softc;
5074	u32		index, bit;
5075
5076	if (ifp->if_softc !=  arg)   /* Not our event */
5077		return;
5078
5079	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5080                return;
5081
5082	IGB_CORE_LOCK(adapter);
5083	index = (vtag >> 5) & 0x7F;
5084	bit = vtag & 0x1F;
5085	adapter->shadow_vfta[index] |= (1 << bit);
5086	++adapter->num_vlans;
5087	/* Change hw filter setting */
5088	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5089		igb_setup_vlan_hw_support(adapter);
5090	IGB_CORE_UNLOCK(adapter);
5091}
5092
5093/*
5094 * This routine is run via an vlan
5095 * unconfig EVENT
5096 */
5097static void
5098igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5099{
5100	struct adapter	*adapter = ifp->if_softc;
5101	u32		index, bit;
5102
5103	if (ifp->if_softc !=  arg)
5104		return;
5105
5106	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5107                return;
5108
5109	IGB_CORE_LOCK(adapter);
5110	index = (vtag >> 5) & 0x7F;
5111	bit = vtag & 0x1F;
5112	adapter->shadow_vfta[index] &= ~(1 << bit);
5113	--adapter->num_vlans;
5114	/* Change hw filter setting */
5115	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5116		igb_setup_vlan_hw_support(adapter);
5117	IGB_CORE_UNLOCK(adapter);
5118}
5119
5120static void
5121igb_setup_vlan_hw_support(struct adapter *adapter)
5122{
5123	struct e1000_hw *hw = &adapter->hw;
5124	struct ifnet	*ifp = adapter->ifp;
5125	u32             reg;
5126
5127	if (adapter->vf_ifp) {
5128		e1000_rlpml_set_vf(hw,
5129		    adapter->max_frame_size + VLAN_TAG_SIZE);
5130		return;
5131	}
5132
5133	reg = E1000_READ_REG(hw, E1000_CTRL);
5134	reg |= E1000_CTRL_VME;
5135	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5136
5137	/* Enable the Filter Table */
5138	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5139		reg = E1000_READ_REG(hw, E1000_RCTL);
5140		reg &= ~E1000_RCTL_CFIEN;
5141		reg |= E1000_RCTL_VFE;
5142		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5143	}
5144
5145	/* Update the frame size */
5146	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5147	    adapter->max_frame_size + VLAN_TAG_SIZE);
5148
5149	/* Don't bother with table if no vlans */
5150	if ((adapter->num_vlans == 0) ||
5151	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5152                return;
5153	/*
5154	** A soft reset zero's out the VFTA, so
5155	** we need to repopulate it now.
5156	*/
5157	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5158                if (adapter->shadow_vfta[i] != 0) {
5159			if (adapter->vf_ifp)
5160				e1000_vfta_set_vf(hw,
5161				    adapter->shadow_vfta[i], TRUE);
5162			else
5163				e1000_write_vfta(hw,
5164				    i, adapter->shadow_vfta[i]);
5165		}
5166}
5167
5168static void
5169igb_enable_intr(struct adapter *adapter)
5170{
5171	/* With RSS set up what to auto clear */
5172	if (adapter->msix_mem) {
5173		u32 mask = (adapter->que_mask | adapter->link_mask);
5174		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5175		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5176		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5177		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5178		    E1000_IMS_LSC);
5179	} else {
5180		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5181		    IMS_ENABLE_MASK);
5182	}
5183	E1000_WRITE_FLUSH(&adapter->hw);
5184
5185	return;
5186}
5187
5188static void
5189igb_disable_intr(struct adapter *adapter)
5190{
5191	if (adapter->msix_mem) {
5192		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5193		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5194	}
5195	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5196	E1000_WRITE_FLUSH(&adapter->hw);
5197	return;
5198}
5199
5200/*
5201 * Bit of a misnomer, what this really means is
5202 * to enable OS management of the system... aka
5203 * to disable special hardware management features
5204 */
5205static void
5206igb_init_manageability(struct adapter *adapter)
5207{
5208	if (adapter->has_manage) {
5209		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5210		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5211
5212		/* disable hardware interception of ARP */
5213		manc &= ~(E1000_MANC_ARP_EN);
5214
5215                /* enable receiving management packets to the host */
5216		manc |= E1000_MANC_EN_MNG2HOST;
5217		manc2h |= 1 << 5;  /* Mng Port 623 */
5218		manc2h |= 1 << 6;  /* Mng Port 664 */
5219		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5220		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5221	}
5222}
5223
5224/*
5225 * Give control back to hardware management
5226 * controller if there is one.
5227 */
5228static void
5229igb_release_manageability(struct adapter *adapter)
5230{
5231	if (adapter->has_manage) {
5232		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5233
5234		/* re-enable hardware interception of ARP */
5235		manc |= E1000_MANC_ARP_EN;
5236		manc &= ~E1000_MANC_EN_MNG2HOST;
5237
5238		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5239	}
5240}
5241
5242/*
5243 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5244 * For ASF and Pass Through versions of f/w this means that
5245 * the driver is loaded.
5246 *
5247 */
5248static void
5249igb_get_hw_control(struct adapter *adapter)
5250{
5251	u32 ctrl_ext;
5252
5253	if (adapter->vf_ifp)
5254		return;
5255
5256	/* Let firmware know the driver has taken over */
5257	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5258	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5259	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5260}
5261
5262/*
5263 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5264 * For ASF and Pass Through versions of f/w this means that the
5265 * driver is no longer loaded.
5266 *
5267 */
5268static void
5269igb_release_hw_control(struct adapter *adapter)
5270{
5271	u32 ctrl_ext;
5272
5273	if (adapter->vf_ifp)
5274		return;
5275
5276	/* Let firmware taken over control of h/w */
5277	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5278	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5279	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5280}
5281
5282static int
5283igb_is_valid_ether_addr(uint8_t *addr)
5284{
5285	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5286
5287	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5288		return (FALSE);
5289	}
5290
5291	return (TRUE);
5292}
5293
5294
5295/*
5296 * Enable PCI Wake On Lan capability
5297 */
5298static void
5299igb_enable_wakeup(device_t dev)
5300{
5301	u16     cap, status;
5302	u8      id;
5303
5304	/* First find the capabilities pointer*/
5305	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5306	/* Read the PM Capabilities */
5307	id = pci_read_config(dev, cap, 1);
5308	if (id != PCIY_PMG)     /* Something wrong */
5309		return;
5310	/* OK, we have the power capabilities, so
5311	   now get the status register */
5312	cap += PCIR_POWER_STATUS;
5313	status = pci_read_config(dev, cap, 2);
5314	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5315	pci_write_config(dev, cap, status, 2);
5316	return;
5317}
5318
5319static void
5320igb_led_func(void *arg, int onoff)
5321{
5322	struct adapter	*adapter = arg;
5323
5324	IGB_CORE_LOCK(adapter);
5325	if (onoff) {
5326		e1000_setup_led(&adapter->hw);
5327		e1000_led_on(&adapter->hw);
5328	} else {
5329		e1000_led_off(&adapter->hw);
5330		e1000_cleanup_led(&adapter->hw);
5331	}
5332	IGB_CORE_UNLOCK(adapter);
5333}
5334
5335/**********************************************************************
5336 *
5337 *  Update the board statistics counters.
5338 *
5339 **********************************************************************/
5340static void
5341igb_update_stats_counters(struct adapter *adapter)
5342{
5343	struct ifnet		*ifp;
5344        struct e1000_hw		*hw = &adapter->hw;
5345	struct e1000_hw_stats	*stats;
5346
5347	/*
5348	** The virtual function adapter has only a
5349	** small controlled set of stats, do only
5350	** those and return.
5351	*/
5352	if (adapter->vf_ifp) {
5353		igb_update_vf_stats_counters(adapter);
5354		return;
5355	}
5356
5357	stats = (struct e1000_hw_stats	*)adapter->stats;
5358
5359	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5360	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5361		stats->symerrs +=
5362		    E1000_READ_REG(hw,E1000_SYMERRS);
5363		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5364	}
5365
5366	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5367	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5368	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5369	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5370
5371	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5372	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5373	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5374	stats->dc += E1000_READ_REG(hw, E1000_DC);
5375	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5376	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5377	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5378	/*
5379	** For watchdog management we need to know if we have been
5380	** paused during the last interval, so capture that here.
5381	*/
5382        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5383        stats->xoffrxc += adapter->pause_frames;
5384	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5385	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5386	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5387	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5388	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5389	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5390	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5391	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5392	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5393	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5394	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5395	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5396
5397	/* For the 64-bit byte counters the low dword must be read first. */
5398	/* Both registers clear on the read of the high dword */
5399
5400	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5401	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5402	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5403	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5404
5405	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5406	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5407	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5408	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5409	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5410
5411	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5412	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5413
5414	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5415	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5416	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5417	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5418	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5419	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5420	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5421	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5422	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5423	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5424
5425	/* Interrupt Counts */
5426
5427	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5428	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5429	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5430	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5431	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5432	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5433	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5434	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5435	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5436
5437	/* Host to Card Statistics */
5438
5439	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5440	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5441	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5442	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5443	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5444	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5445	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5446	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5447	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5448	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5449	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5450	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5451	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5452	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5453
5454	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5455	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5456	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5457	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5458	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5459	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5460
5461	ifp = adapter->ifp;
5462	ifp->if_collisions = stats->colc;
5463
5464	/* Rx Errors */
5465	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5466	    stats->crcerrs + stats->algnerrc +
5467	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5468
5469	/* Tx Errors */
5470	ifp->if_oerrors = stats->ecol +
5471	    stats->latecol + adapter->watchdog_events;
5472
5473	/* Driver specific counters */
5474	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5475	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5476	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5477	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5478	adapter->packet_buf_alloc_tx =
5479	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5480	adapter->packet_buf_alloc_rx =
5481	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5482}
5483
5484
5485/**********************************************************************
5486 *
5487 *  Initialize the VF board statistics counters.
5488 *
5489 **********************************************************************/
5490static void
5491igb_vf_init_stats(struct adapter *adapter)
5492{
5493        struct e1000_hw *hw = &adapter->hw;
5494	struct e1000_vf_stats	*stats;
5495
5496	stats = (struct e1000_vf_stats	*)adapter->stats;
5497	if (stats == NULL)
5498		return;
5499        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5500        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5501        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5502        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5503        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5504}
5505
5506/**********************************************************************
5507 *
5508 *  Update the VF board statistics counters.
5509 *
5510 **********************************************************************/
5511static void
5512igb_update_vf_stats_counters(struct adapter *adapter)
5513{
5514	struct e1000_hw *hw = &adapter->hw;
5515	struct e1000_vf_stats	*stats;
5516
5517	if (adapter->link_speed == 0)
5518		return;
5519
5520	stats = (struct e1000_vf_stats	*)adapter->stats;
5521
5522	UPDATE_VF_REG(E1000_VFGPRC,
5523	    stats->last_gprc, stats->gprc);
5524	UPDATE_VF_REG(E1000_VFGORC,
5525	    stats->last_gorc, stats->gorc);
5526	UPDATE_VF_REG(E1000_VFGPTC,
5527	    stats->last_gptc, stats->gptc);
5528	UPDATE_VF_REG(E1000_VFGOTC,
5529	    stats->last_gotc, stats->gotc);
5530	UPDATE_VF_REG(E1000_VFMPRC,
5531	    stats->last_mprc, stats->mprc);
5532}
5533
5534/* Export a single 32-bit register via a read-only sysctl. */
5535static int
5536igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5537{
5538	struct adapter *adapter;
5539	u_int val;
5540
5541	adapter = oidp->oid_arg1;
5542	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5543	return (sysctl_handle_int(oidp, &val, 0, req));
5544}
5545
5546/*
5547**  Tuneable interrupt rate handler
5548*/
5549static int
5550igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5551{
5552	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5553	int			error;
5554	u32			reg, usec, rate;
5555
5556	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5557	usec = ((reg & 0x7FFC) >> 2);
5558	if (usec > 0)
5559		rate = 1000000 / usec;
5560	else
5561		rate = 0;
5562	error = sysctl_handle_int(oidp, &rate, 0, req);
5563	if (error || !req->newptr)
5564		return error;
5565	return 0;
5566}
5567
5568/*
5569 * Add sysctl variables, one per statistic, to the system.
5570 */
5571static void
5572igb_add_hw_stats(struct adapter *adapter)
5573{
5574	device_t dev = adapter->dev;
5575
5576	struct tx_ring *txr = adapter->tx_rings;
5577	struct rx_ring *rxr = adapter->rx_rings;
5578
5579	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5580	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5581	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5582	struct e1000_hw_stats *stats = adapter->stats;
5583
5584	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5585	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5586
5587#define QUEUE_NAME_LEN 32
5588	char namebuf[QUEUE_NAME_LEN];
5589
5590	/* Driver Statistics */
5591	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5592			CTLFLAG_RD, &adapter->link_irq, 0,
5593			"Link MSIX IRQ Handled");
5594	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5595			CTLFLAG_RD, &adapter->dropped_pkts,
5596			"Driver dropped packets");
5597	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5598			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5599			"Driver tx dma failure in xmit");
5600	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5601			CTLFLAG_RD, &adapter->rx_overruns,
5602			"RX overruns");
5603	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5604			CTLFLAG_RD, &adapter->watchdog_events,
5605			"Watchdog timeouts");
5606
5607	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5608			CTLFLAG_RD, &adapter->device_control,
5609			"Device Control Register");
5610	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5611			CTLFLAG_RD, &adapter->rx_control,
5612			"Receiver Control Register");
5613	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5614			CTLFLAG_RD, &adapter->int_mask,
5615			"Interrupt Mask");
5616	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5617			CTLFLAG_RD, &adapter->eint_mask,
5618			"Extended Interrupt Mask");
5619	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5620			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5621			"Transmit Buffer Packet Allocation");
5622	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5623			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5624			"Receive Buffer Packet Allocation");
5625	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5626			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5627			"Flow Control High Watermark");
5628	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5629			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5630			"Flow Control Low Watermark");
5631
5632	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5633		struct lro_ctrl *lro = &rxr->lro;
5634
5635		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5636		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5637					    CTLFLAG_RD, NULL, "Queue Name");
5638		queue_list = SYSCTL_CHILDREN(queue_node);
5639
5640		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5641				CTLFLAG_RD, &adapter->queues[i],
5642				sizeof(&adapter->queues[i]),
5643				igb_sysctl_interrupt_rate_handler,
5644				"IU", "Interrupt Rate");
5645
5646		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5647				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5648				igb_sysctl_reg_handler, "IU",
5649 				"Transmit Descriptor Head");
5650		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5651				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5652				igb_sysctl_reg_handler, "IU",
5653 				"Transmit Descriptor Tail");
5654		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5655				CTLFLAG_RD, &txr->no_desc_avail,
5656				"Queue No Descriptor Available");
5657		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5658				CTLFLAG_RD, &txr->total_packets,
5659				"Queue Packets Transmitted");
5660
5661		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5662				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5663				igb_sysctl_reg_handler, "IU",
5664				"Receive Descriptor Head");
5665		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5666				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5667				igb_sysctl_reg_handler, "IU",
5668				"Receive Descriptor Tail");
5669		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5670				CTLFLAG_RD, &rxr->rx_packets,
5671				"Queue Packets Received");
5672		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5673				CTLFLAG_RD, &rxr->rx_bytes,
5674				"Queue Bytes Received");
5675		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5676				CTLFLAG_RD, &lro->lro_queued, 0,
5677				"LRO Queued");
5678		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5679				CTLFLAG_RD, &lro->lro_flushed, 0,
5680				"LRO Flushed");
5681	}
5682
5683	/* MAC stats get their own sub node */
5684
5685	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5686				    CTLFLAG_RD, NULL, "MAC Statistics");
5687	stat_list = SYSCTL_CHILDREN(stat_node);
5688
5689	/*
5690	** VF adapter has a very limited set of stats
5691	** since its not managing the metal, so to speak.
5692	*/
5693	if (adapter->vf_ifp) {
5694	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5695			CTLFLAG_RD, &stats->gprc,
5696			"Good Packets Received");
5697	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5698			CTLFLAG_RD, &stats->gptc,
5699			"Good Packets Transmitted");
5700 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5701 			CTLFLAG_RD, &stats->gorc,
5702 			"Good Octets Received");
5703 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5704 			CTLFLAG_RD, &stats->gotc,
5705 			"Good Octets Transmitted");
5706	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5707			CTLFLAG_RD, &stats->mprc,
5708			"Multicast Packets Received");
5709		return;
5710	}
5711
5712	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5713			CTLFLAG_RD, &stats->ecol,
5714			"Excessive collisions");
5715	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5716			CTLFLAG_RD, &stats->scc,
5717			"Single collisions");
5718	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5719			CTLFLAG_RD, &stats->mcc,
5720			"Multiple collisions");
5721	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5722			CTLFLAG_RD, &stats->latecol,
5723			"Late collisions");
5724	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5725			CTLFLAG_RD, &stats->colc,
5726			"Collision Count");
5727	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5728			CTLFLAG_RD, &stats->symerrs,
5729			"Symbol Errors");
5730	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5731			CTLFLAG_RD, &stats->sec,
5732			"Sequence Errors");
5733	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5734			CTLFLAG_RD, &stats->dc,
5735			"Defer Count");
5736	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5737			CTLFLAG_RD, &stats->mpc,
5738			"Missed Packets");
5739	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5740			CTLFLAG_RD, &stats->rnbc,
5741			"Receive No Buffers");
5742	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5743			CTLFLAG_RD, &stats->ruc,
5744			"Receive Undersize");
5745	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5746			CTLFLAG_RD, &stats->rfc,
5747			"Fragmented Packets Received ");
5748	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5749			CTLFLAG_RD, &stats->roc,
5750			"Oversized Packets Received");
5751	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5752			CTLFLAG_RD, &stats->rjc,
5753			"Recevied Jabber");
5754	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5755			CTLFLAG_RD, &stats->rxerrc,
5756			"Receive Errors");
5757	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5758			CTLFLAG_RD, &stats->crcerrs,
5759			"CRC errors");
5760	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5761			CTLFLAG_RD, &stats->algnerrc,
5762			"Alignment Errors");
5763	/* On 82575 these are collision counts */
5764	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5765			CTLFLAG_RD, &stats->cexterr,
5766			"Collision/Carrier extension errors");
5767	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5768			CTLFLAG_RD, &stats->xonrxc,
5769			"XON Received");
5770	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5771			CTLFLAG_RD, &stats->xontxc,
5772			"XON Transmitted");
5773	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5774			CTLFLAG_RD, &stats->xoffrxc,
5775			"XOFF Received");
5776	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5777			CTLFLAG_RD, &stats->xofftxc,
5778			"XOFF Transmitted");
5779	/* Packet Reception Stats */
5780	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5781			CTLFLAG_RD, &stats->tpr,
5782			"Total Packets Received ");
5783	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5784			CTLFLAG_RD, &stats->gprc,
5785			"Good Packets Received");
5786	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5787			CTLFLAG_RD, &stats->bprc,
5788			"Broadcast Packets Received");
5789	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5790			CTLFLAG_RD, &stats->mprc,
5791			"Multicast Packets Received");
5792	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5793			CTLFLAG_RD, &stats->prc64,
5794			"64 byte frames received ");
5795	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5796			CTLFLAG_RD, &stats->prc127,
5797			"65-127 byte frames received");
5798	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5799			CTLFLAG_RD, &stats->prc255,
5800			"128-255 byte frames received");
5801	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5802			CTLFLAG_RD, &stats->prc511,
5803			"256-511 byte frames received");
5804	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5805			CTLFLAG_RD, &stats->prc1023,
5806			"512-1023 byte frames received");
5807	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5808			CTLFLAG_RD, &stats->prc1522,
5809			"1023-1522 byte frames received");
5810 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5811 			CTLFLAG_RD, &stats->gorc,
5812 			"Good Octets Received");
5813
5814	/* Packet Transmission Stats */
5815 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5816 			CTLFLAG_RD, &stats->gotc,
5817 			"Good Octets Transmitted");
5818	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5819			CTLFLAG_RD, &stats->tpt,
5820			"Total Packets Transmitted");
5821	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5822			CTLFLAG_RD, &stats->gptc,
5823			"Good Packets Transmitted");
5824	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5825			CTLFLAG_RD, &stats->bptc,
5826			"Broadcast Packets Transmitted");
5827	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5828			CTLFLAG_RD, &stats->mptc,
5829			"Multicast Packets Transmitted");
5830	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5831			CTLFLAG_RD, &stats->ptc64,
5832			"64 byte frames transmitted ");
5833	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5834			CTLFLAG_RD, &stats->ptc127,
5835			"65-127 byte frames transmitted");
5836	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5837			CTLFLAG_RD, &stats->ptc255,
5838			"128-255 byte frames transmitted");
5839	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5840			CTLFLAG_RD, &stats->ptc511,
5841			"256-511 byte frames transmitted");
5842	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5843			CTLFLAG_RD, &stats->ptc1023,
5844			"512-1023 byte frames transmitted");
5845	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5846			CTLFLAG_RD, &stats->ptc1522,
5847			"1024-1522 byte frames transmitted");
5848	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5849			CTLFLAG_RD, &stats->tsctc,
5850			"TSO Contexts Transmitted");
5851	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5852			CTLFLAG_RD, &stats->tsctfc,
5853			"TSO Contexts Failed");
5854
5855
5856	/* Interrupt Stats */
5857
5858	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5859				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5860	int_list = SYSCTL_CHILDREN(int_node);
5861
5862	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5863			CTLFLAG_RD, &stats->iac,
5864			"Interrupt Assertion Count");
5865
5866	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5867			CTLFLAG_RD, &stats->icrxptc,
5868			"Interrupt Cause Rx Pkt Timer Expire Count");
5869
5870	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5871			CTLFLAG_RD, &stats->icrxatc,
5872			"Interrupt Cause Rx Abs Timer Expire Count");
5873
5874	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5875			CTLFLAG_RD, &stats->ictxptc,
5876			"Interrupt Cause Tx Pkt Timer Expire Count");
5877
5878	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5879			CTLFLAG_RD, &stats->ictxatc,
5880			"Interrupt Cause Tx Abs Timer Expire Count");
5881
5882	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5883			CTLFLAG_RD, &stats->ictxqec,
5884			"Interrupt Cause Tx Queue Empty Count");
5885
5886	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5887			CTLFLAG_RD, &stats->ictxqmtc,
5888			"Interrupt Cause Tx Queue Min Thresh Count");
5889
5890	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5891			CTLFLAG_RD, &stats->icrxdmtc,
5892			"Interrupt Cause Rx Desc Min Thresh Count");
5893
5894	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5895			CTLFLAG_RD, &stats->icrxoc,
5896			"Interrupt Cause Receiver Overrun Count");
5897
5898	/* Host to Card Stats */
5899
5900	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5901				    CTLFLAG_RD, NULL,
5902				    "Host to Card Statistics");
5903
5904	host_list = SYSCTL_CHILDREN(host_node);
5905
5906	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5907			CTLFLAG_RD, &stats->cbtmpc,
5908			"Circuit Breaker Tx Packet Count");
5909
5910	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5911			CTLFLAG_RD, &stats->htdpmc,
5912			"Host Transmit Discarded Packets");
5913
5914	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5915			CTLFLAG_RD, &stats->rpthc,
5916			"Rx Packets To Host");
5917
5918	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5919			CTLFLAG_RD, &stats->cbrmpc,
5920			"Circuit Breaker Rx Packet Count");
5921
5922	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5923			CTLFLAG_RD, &stats->cbrdpc,
5924			"Circuit Breaker Rx Dropped Count");
5925
5926	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5927			CTLFLAG_RD, &stats->hgptc,
5928			"Host Good Packets Tx Count");
5929
5930	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5931			CTLFLAG_RD, &stats->htcbdpc,
5932			"Host Tx Circuit Breaker Dropped Count");
5933
5934	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5935			CTLFLAG_RD, &stats->hgorc,
5936			"Host Good Octets Received Count");
5937
5938	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5939			CTLFLAG_RD, &stats->hgotc,
5940			"Host Good Octets Transmit Count");
5941
5942	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5943			CTLFLAG_RD, &stats->lenerrs,
5944			"Length Errors");
5945
5946	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5947			CTLFLAG_RD, &stats->scvpc,
5948			"SerDes/SGMII Code Violation Pkt Count");
5949
5950	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5951			CTLFLAG_RD, &stats->hrmpc,
5952			"Header Redirection Missed Packet Count");
5953}
5954
5955
5956/**********************************************************************
5957 *
5958 *  This routine provides a way to dump out the adapter eeprom,
5959 *  often a useful debug/service tool. This only dumps the first
5960 *  32 words, stuff that matters is in that extent.
5961 *
5962 **********************************************************************/
5963static int
5964igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5965{
5966	struct adapter *adapter;
5967	int error;
5968	int result;
5969
5970	result = -1;
5971	error = sysctl_handle_int(oidp, &result, 0, req);
5972
5973	if (error || !req->newptr)
5974		return (error);
5975
5976	/*
5977	 * This value will cause a hex dump of the
5978	 * first 32 16-bit words of the EEPROM to
5979	 * the screen.
5980	 */
5981	if (result == 1) {
5982		adapter = (struct adapter *)arg1;
5983		igb_print_nvm_info(adapter);
5984        }
5985
5986	return (error);
5987}
5988
5989static void
5990igb_print_nvm_info(struct adapter *adapter)
5991{
5992	u16	eeprom_data;
5993	int	i, j, row = 0;
5994
5995	/* Its a bit crude, but it gets the job done */
5996	printf("\nInterface EEPROM Dump:\n");
5997	printf("Offset\n0x0000  ");
5998	for (i = 0, j = 0; i < 32; i++, j++) {
5999		if (j == 8) { /* Make the offset block */
6000			j = 0; ++row;
6001			printf("\n0x00%x0  ",row);
6002		}
6003		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6004		printf("%04x ", eeprom_data);
6005	}
6006	printf("\n");
6007}
6008
6009static void
6010igb_set_sysctl_value(struct adapter *adapter, const char *name,
6011	const char *description, int *limit, int value)
6012{
6013	*limit = value;
6014	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6015	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6016	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
6017}
6018
6019/*
6020** Set flow control using sysctl:
6021** Flow control values:
6022** 	0 - off
6023**	1 - rx pause
6024**	2 - tx pause
6025**	3 - full
6026*/
6027static int
6028igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6029{
6030	int		error;
6031	static int	input = 3; /* default is full */
6032	struct adapter	*adapter = (struct adapter *) arg1;
6033
6034	error = sysctl_handle_int(oidp, &input, 0, req);
6035
6036	if ((error) || (req->newptr == NULL))
6037		return (error);
6038
6039	switch (input) {
6040		case e1000_fc_rx_pause:
6041		case e1000_fc_tx_pause:
6042		case e1000_fc_full:
6043		case e1000_fc_none:
6044			adapter->hw.fc.requested_mode = input;
6045			adapter->fc = input;
6046			break;
6047		default:
6048			/* Do nothing */
6049			return (error);
6050	}
6051
6052	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6053	e1000_force_mac_fc(&adapter->hw);
6054	return (error);
6055}
6056
6057/*
6058** Manage DMA Coalesce:
6059** Control values:
6060** 	0/1 - off/on
6061**	Legal timer values are:
6062**	250,500,1000-10000 in thousands
6063*/
6064static int
6065igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6066{
6067	struct adapter *adapter = (struct adapter *) arg1;
6068	int		error;
6069
6070	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6071
6072	if ((error) || (req->newptr == NULL))
6073		return (error);
6074
6075	switch (adapter->dmac) {
6076		case 0:
6077			/*Disabling */
6078			break;
6079		case 1: /* Just enable and use default */
6080			adapter->dmac = 1000;
6081			break;
6082		case 250:
6083		case 500:
6084		case 1000:
6085		case 2000:
6086		case 3000:
6087		case 4000:
6088		case 5000:
6089		case 6000:
6090		case 7000:
6091		case 8000:
6092		case 9000:
6093		case 10000:
6094			/* Legal values - allow */
6095			break;
6096		default:
6097			/* Do nothing, illegal value */
6098			adapter->dmac = 0;
6099			return (EINVAL);
6100	}
6101	/* Reinit the interface */
6102	igb_init(adapter);
6103	return (error);
6104}
6105
6106/*
6107** Manage Energy Efficient Ethernet:
6108** Control values:
6109**     0/1 - enabled/disabled
6110*/
6111static int
6112igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6113{
6114	struct adapter	*adapter = (struct adapter *) arg1;
6115	int		error, value;
6116
6117	value = adapter->hw.dev_spec._82575.eee_disable;
6118	error = sysctl_handle_int(oidp, &value, 0, req);
6119	if (error || req->newptr == NULL)
6120		return (error);
6121	IGB_CORE_LOCK(adapter);
6122	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6123	igb_init_locked(adapter);
6124	IGB_CORE_UNLOCK(adapter);
6125	return (0);
6126}
6127